|
1 | 1 | module KWayMerges
|
2 | 2 |
|
3 |
| -export KWayMerger |
| 3 | +using Base.Order: Ordering, Forward, ord, lt |
| 4 | + |
| 5 | +export kway_merge |
| 6 | + |
| 7 | +@static if VERSION >= v"1.11.0" |
| 8 | + eval(Meta.parse("public KWayMerger")) |
| 9 | +end |
4 | 10 |
|
5 | 11 | include("heap.jl")
|
6 | 12 |
|
7 | 13 | """
|
8 |
| - KWayMerger{T, I, F}(f::F, iterators) |
9 |
| - KWayMerger{T, I}(iterators) |
10 |
| - KWayMerger(f, iterators) |
11 |
| - KWayMerger(iterators) |
| 14 | + KWayMerger{T, I, O, S} |
| 15 | +
|
| 16 | +Stateful iterator of a k-way merge of multiple iterators of the same type. |
| 17 | +Constructed using [`kway_merge`](@ref). |
| 18 | +
|
| 19 | +The type parameters are: |
| 20 | +* `T`: Element type of iterators |
| 21 | +* `I`: Iterator type |
| 22 | +* `O`: Ordering, subtype of `Base.Ordering` |
| 23 | +* `S`: Type of state of iterators |
| 24 | +""" |
| 25 | +struct KWayMerger{T, I, O <: Base.Ordering, S} |
| 26 | + ordering::O |
| 27 | + iterators::Vector{I} |
| 28 | + states::Vector{S} |
| 29 | + heap::Vector{@NamedTuple{from_iter::Int, value::T}} |
| 30 | +end |
| 31 | + |
| 32 | +""" |
| 33 | + kway_merge( |
| 34 | + iterators; |
| 35 | + lt=isless, |
| 36 | + by=identity, |
| 37 | + rev::Bool=false, |
| 38 | + order::Base.Order.Ordering=Base.Order.Forward |
| 39 | + ) |
| 40 | + kway_merge(::Type{T}, ::Type{T}, iterators; kwargs...) |
| 41 | + kway_merge(::Type{T}, ::Type{T}, ordering::Ordering, iterators) |
12 | 42 |
|
13 | 43 | Create a stateful iterator which does a k-way merge between multiple
|
14 | 44 | iterators of the same type.
|
15 | 45 |
|
16 |
| -This iterator yields `(index::Int, x::T)` elements, where `x` is the next element from |
17 |
| -one of the iterators, and `index` is the 1-based index of the iterator that yielded `x`. |
18 |
| -The elements `x` are chosen from among the iterators such that, among all elements which |
19 |
| -are the next element of the iterators, the element is chosen which is the smallest |
20 |
| -according to the predicate `f::F`, which defaults to `isless`. |
21 |
| -
|
22 |
| -This implies that if all iterators are sorted by `f`, the yielded will be in sorted |
23 |
| -order. |
| 46 | +This iterator yields `@NamedTuple{from_iter::Int, value::T}` elements, where `value` is the |
| 47 | +next element from one of the iterators, and `from_iter` is the 1-based index of the iterator |
| 48 | +that yielded `value`. |
| 49 | +The element `value` is chosen among the iterators such that, among all elements which |
| 50 | +are the next element of the iterators, the element is chosen which is the first |
| 51 | +according to the ordering. |
| 52 | +This implies that if all iterators are sorted by `f`, the yielded will be in sorted order. |
24 | 53 | Hence, a `KWayMerger` is typically used to combined multiple sorted arrays
|
25 | 54 | into one sorted array.
|
26 | 55 |
|
| 56 | +The ordering is given by the keywords `by`, `lt`, `rev` and `order` - these are the |
| 57 | +same as for `Base.sort!`. |
| 58 | +
|
| 59 | +
|
27 | 60 | # Examples
|
28 | 61 | ```jldoctest
|
29 | 62 | julia> arrs = [[1,6], [2], [5,7], [3,4,8]];
|
30 | 63 |
|
31 |
| -julia> it = KWayMerger(arrs); |
| 64 | +julia> it = kway_merge(arrs); |
| 65 | +
|
| 66 | +julia> first(it, 2) |
| 67 | +2-element Vector{@NamedTuple{from_iter::Int64, value::Int64}}: |
| 68 | + (from_iter = 1, value = 1) |
| 69 | + (from_iter = 2, value = 2) |
32 | 70 |
|
33 |
| -julia> print(collect(it)) |
34 |
| -[(1, 1), (2, 2), (4, 3), (4, 4), (3, 5), (1, 6), (3, 7), (4, 8)] |
| 71 | +julia> print(map(Tuple, it)) |
| 72 | +[(4, 3), (4, 4), (3, 5), (1, 6), (3, 7), (4, 8)] |
35 | 73 | ```
|
36 | 74 |
|
37 | 75 | # Extended help
|
38 |
| -The type parameters are: |
39 |
| -* `F`: Type of function used to compare the elements. It defaults |
40 |
| - to `typeof(Base.isless)` |
41 |
| -* `T`: Element type of iterators |
42 |
| -* `I`: Iterator type |
43 |
| -* `S`: Type of state of iterators |
44 |
| -
|
45 | 76 | All iterators must be of the same type. For the constructors which don't pass
|
46 | 77 | in `T` and `I` explicitly, `Base.eltype` is used
|
47 | 78 | to determine them; since its default implementation
|
48 | 79 | returns `Any`, explicitly passing them may be needed for good performance for some
|
49 | 80 | iterators.
|
50 | 81 |
|
51 | 82 | `S` is derived automatically, but this must be a fixed type;
|
52 |
| -iterators that use states of multiple different types may |
53 |
| -not be supported by `KWayMerger`. |
| 83 | +iterators that use states of multiple different types during iteration may |
| 84 | +not be supported. |
54 | 85 | """
|
55 |
| -struct KWayMerger{T, I, F, S} |
56 |
| - f::F |
57 |
| - iterators::Vector{I} |
58 |
| - states::Vector{S} |
59 |
| - heap::Vector{Tuple{Int, T}} |
60 |
| -end |
61 |
| - |
62 |
| -function KWayMerger{T, I, F}(f::F, iterators) where {T, I, F} |
| 86 | +function kway_merge(::Type{T}, ::Type{I}, ordering::O, iterators) where {T, I, O} |
63 | 87 | iters = vec(collect(iterators))
|
64 | 88 | states = nothing
|
65 |
| - things = Tuple{Int, T}[] |
| 89 | + things = @NamedTuple{from_iter::Int, value::T}[] |
66 | 90 | for i in eachindex(iters)
|
67 | 91 | it = iterate(iters[i])
|
68 | 92 | isnothing(it) && continue
|
69 |
| - (thing::T, state) = it |
| 93 | + (value::T, state) = it |
70 | 94 | if isnothing(states)
|
71 | 95 | states = Vector{typeof(state)}(undef, length(iters))
|
72 | 96 | end
|
73 |
| - push!(things, (i, thing)) |
| 97 | + push!(things, (; from_iter = i, value)) |
74 | 98 | states[i] = state
|
75 | 99 | end
|
76 |
| - heapify!(f, things) |
| 100 | + heapify!(ordering, things) |
77 | 101 | states = if isnothing(states)
|
78 | 102 | Vector{Union{}}(undef, length(iters))
|
79 | 103 | else
|
80 | 104 | states
|
81 | 105 | end
|
82 |
| - return KWayMerger{T, I, F, eltype(states)}(f, iters, states, things) |
| 106 | + return KWayMerger{T, I, O, eltype(states)}(ordering, iters, states, things) |
83 | 107 | end
|
84 | 108 |
|
85 |
| -function KWayMerger{T, I}(iterators) where {T, I} |
86 |
| - return KWayMerger{T, I, typeof(isless)}(isless, iterators) |
| 109 | +function kway_merge( |
| 110 | + ::Type{T}, |
| 111 | + ::Type{I}, |
| 112 | + iterators; |
| 113 | + lt = isless, |
| 114 | + by = identity, |
| 115 | + rev::Bool = false, |
| 116 | + order::Base.Ordering = Forward, |
| 117 | + ) where {T, I} |
| 118 | + ordering = ord(lt, by, rev, order) |
| 119 | + return kway_merge(T, I, ordering, iterators) |
87 | 120 | end
|
88 | 121 |
|
89 |
| -KWayMerger(iterators) = KWayMerger(isless, iterators) |
90 |
| - |
91 |
| -function KWayMerger(f::F, iterators) where {F} |
| 122 | +function kway_merge(iterators; kwargs...) |
92 | 123 | I = eltype(typeof(iterators))
|
93 | 124 | T = eltype(I)
|
94 |
| - return KWayMerger{T, I, F}(f, iterators) |
| 125 | + return kway_merge(T, I, iterators; kwargs...) |
95 | 126 | end
|
96 | 127 |
|
97 | 128 | # We could technically know this, but KWayMerger is stateful, and
|
98 | 129 | # Julia's iterator length works badly with stateful iterators.
|
99 | 130 | Base.IteratorSize(::Type{<:KWayMerger}) = Base.SizeUnknown()
|
100 |
| -Base.eltype(::Type{<:KWayMerger{T}}) where {T} = Tuple{Int, T} |
| 131 | +Base.eltype(::Type{<:KWayMerger{T}}) where {T} = @NamedTuple{from_iter::Int, value::T} |
101 | 132 |
|
102 | 133 | function Base.iterate(x::KWayMerger, ::Nothing = nothing)
|
103 | 134 | isempty(x.heap) && return nothing
|
104 |
| - (i, item) = @inbounds x.heap[1] |
105 |
| - iterator = @inbounds x.iterators[i] |
106 |
| - state = @inbounds x.states[i] |
| 135 | + top = @inbounds x.heap[1] |
| 136 | + iterator = @inbounds x.iterators[top.from_iter] |
| 137 | + state = @inbounds x.states[top.from_iter] |
107 | 138 | it = iterate(iterator, state)
|
108 | 139 | if it === nothing
|
109 |
| - @inbounds heappop!(x.f, x.heap) |
| 140 | + @inbounds heappop!(x.ordering, x.heap) |
110 | 141 | else
|
111 | 142 | (new_item, new_state) = it
|
112 |
| - @inbounds x.states[i] = new_state |
113 |
| - @inbounds heapreplace!(x.f, x.heap, (i, new_item)) |
| 143 | + @inbounds x.states[top.from_iter] = new_state |
| 144 | + @inbounds heapreplace!( |
| 145 | + x.ordering, |
| 146 | + x.heap, |
| 147 | + (; from_iter = top.from_iter, value = new_item) |
| 148 | + ) |
114 | 149 | end
|
115 |
| - return ((i, item), nothing) |
| 150 | + return (top, nothing) |
116 | 151 | end
|
117 | 152 |
|
118 | 153 | Base.isempty(x::KWayMerger) = isempty(x.heap)
|
119 | 154 | Base.isdone(x::KWayMerger) = isempty(x.heap)
|
120 | 155 |
|
121 | 156 | """
|
122 |
| - peek(x::KWayMerger{T})::Union{Tuple{Int, T}, Nothing} |
| 157 | + peek(x::KWayMerger{T})::Union{@NamedTuple{from_iter::Int, value::T}, Nothing} |
123 | 158 |
|
124 | 159 | Get the first element of `x` without advancing the iterator, or `nothing` if the
|
125 | 160 | iterator is empty.
|
126 | 161 |
|
127 | 162 | # Examples
|
128 | 163 | ```jldoctest
|
129 |
| -julia> it = KWayMerger([[3, 4], [2, 7]]); |
| 164 | +julia> it = kway_merge([[3, 4], [2, 7]]); |
130 | 165 |
|
131 | 166 | julia> peek(it)
|
132 |
| -(2, 2) |
| 167 | +(from_iter = 2, value = 2) |
133 | 168 |
|
134 | 169 | julia> collect(it); # exhaust stateful iterator
|
135 | 170 |
|
|
0 commit comments