Skip to content

Commit dbe42b4

Browse files
committed
Add content
1 parent 7eb6d5d commit dbe42b4

File tree

9 files changed

+246
-92
lines changed

9 files changed

+246
-92
lines changed

Project.toml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
name = "KWayMerges"
22
uuid = "f29e91c7-719d-4dbc-8870-0ce36bf055b7"
3-
authors = ["Jakob Nybo Nissen <[email protected]>"]
43
version = "0.1.0"
4+
authors = ["Jakob Nybo Nissen <[email protected]>"]
55

6-
[deps]
6+
[compat]
7+
Test = "1.11"
8+
julia = "1.11"
79

810
[extras]
911
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1012

11-
[compat]
12-
Test = "1.10"
13-
julia = "1.10"
14-
1513
[targets]
1614
test = ["Test"]

README.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,17 @@
77

88
Implementation of k-way merge.
99

10-
This package implements `kway_merge(x::AbstractVector{<:AbstractVector})`, which returns a `KWayMerger`.
11-
This type is a lazy iterator of the elements in the inner vectors. If the inner vectors are sorted, the output of the `KWayMerger` is also guaranteed to be sorted.
10+
This package implements the `KWayMerger` type.
11+
It is a stateful, lazy iterator of the elements in an iterator of iterators, similar to `Iterators.flatten`. However, the elements of the inner iterators will be yielded in an order given by a predicate optionally passed to `KWayMerger` (default: `isless`).
12+
If the inner iterators are sorted by the predicate, the output of the `KWayMerger` is also guaranteed to be sorted.
1213

13-
The function `peek` can be used to check the next element without advancing the iterator.
14+
The primary purpose of `KWayMerger` is to efficiently merge N sorted iterables into one sorted stream.
15+
16+
The function `peek` can be used to check the next element without advancing the iterator.
17+
18+
## Documentation
19+
This package's two public functions are the `KWayMerger` constructor, and its `Base.peek` method.
20+
See their docstrings for more details.
1421

1522
## Contributing
1623
We appreciate contributions from users including reporting bugs, fixing

docs/Project.toml

Lines changed: 0 additions & 9 deletions
This file was deleted.

docs/make.jl

Lines changed: 0 additions & 33 deletions
This file was deleted.

docs/src/index.md

Lines changed: 0 additions & 11 deletions
This file was deleted.

docs/src/reference.md

Lines changed: 0 additions & 5 deletions
This file was deleted.

src/KWayMerges.jl

Lines changed: 139 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,161 @@
11
module KWayMerges
22

3-
export my_function, MyType
3+
export KWayMerger
4+
5+
include("heap.jl")
46

57
"""
6-
MyStruct{T <: Real}
8+
KWayMerger{F, T, I, S}
9+
10+
Stateful iterator which does a K-way merge between multiple
11+
iterators of the same type.
12+
This iterator will yield the elements in every contained
13+
iterator. At each iteration, it will choose the element from
14+
the iterator with the lowest precedence according to the order
15+
determined by `f::F` (default: `isless`).
716
8-
This struct represents a point in 2D-space.
17+
If all inner iterators are sorted by `f`, the yielded elements
18+
will be in sorted order.
19+
A `KWayMerger` is typically used to combined multiple sorted arrays
20+
into one sorted array.
921
1022
# Examples
1123
```jldoctest
12-
julia> x = MyType(1.0, 2.0)
13-
MyType{Float64}(1.0, 2.0)
24+
julia> arrs = [[1,4,10], [2,3], [6,8], [5,7,9]];
25+
26+
julia> it = KWayMerger(arrs);
1427
15-
julia> my_function(x)
16-
2.23606797749979
28+
julia> print(collect(it))
29+
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
1730
```
1831
19-
See also: [`my_function`](@ref)
32+
# Extended help
33+
The type parameters are:
34+
* `F`: Type of function used to compare the elements. It defaults
35+
to `typeof(Base.isless)`
36+
* `T`: Element type of iterators
37+
* `I`: Iterator type
38+
* `S`: Type of state of iterators
39+
40+
All iterators must be of the same type. `Base.eltype` is used
41+
to determine `T` and `I`; since its default implementation
42+
returns `Any`, these type parameters might need to be explicitly
43+
passed to get good performance for some iterators.
44+
45+
`S` is derived automatically, but this must be a fixed type;
46+
iterators that use states of multiple different types may
47+
not be supported by `KWayMerger`.
2048
"""
21-
struct MyType{T <: Real}
22-
x::T
23-
y::T
24-
end
49+
struct KWayMerger{F, T, I, S}
50+
f::F
51+
iterators::Memory{I}
52+
states::Memory{S}
53+
heap::Vector{Tuple{T, Int}}
54+
end
55+
56+
ord(x::KWayMerger) = ord(x.f)
57+
ord(f) = (i, j) -> f(first(i), first(j))
58+
59+
collect_to_memory(x) = collect_to_memory(Base.IteratorSize(typeof(x)), x)
60+
61+
function collect_to_memory(::Union{Base.HasShape, Base.HasLength}, x)
62+
mem = Memory{eltype(x)}(undef, length(x))
63+
i = 0
64+
for it in x
65+
i += 1
66+
mem[i] = it
67+
end
68+
i == length(mem) || error("Implementation error: More elements than reported")
69+
return mem
70+
end
71+
72+
function collect_to_memory(::Base.SizeUnknown, x)
73+
T = eltype(typeof(x))
74+
v = collect(T, x)
75+
return copy!(Memory{T}(undef, length(v)), v)
76+
end
77+
78+
function KWayMerger{F, T, I}(f::F, iterators) where {F, T, I}
79+
iters = collect_to_memory(iterators)
80+
states = nothing
81+
things = Tuple{T, Int}[]
82+
for i in eachindex(iters)
83+
it = iterate(iters[i])
84+
isnothing(it) && continue
85+
(thing::T, state) = it
86+
if isnothing(states)
87+
states = Memory{typeof(state)}(undef, length(iters))
88+
end
89+
push!(things, (thing, i))
90+
states[i] = state
91+
end
92+
heapify!(ord(f), things)
93+
states = if isnothing(states)
94+
Memory{Union{}}(undef, length(iters))
95+
else
96+
states
97+
end
98+
return KWayMerger{F, T, I, eltype(states)}(f, iters, states, things)
99+
end
100+
101+
function KWayMerger{T, I}(iterators) where {T, I}
102+
return KWayMerger{typeof(isless), T, I}(isless, iterators)
103+
end
104+
105+
KWayMerger(iterators) = KWayMerger(isless, iterators)
106+
107+
function KWayMerger(f::F, iterators) where {F}
108+
I = eltype(typeof(iterators))
109+
T = eltype(I)
110+
return KWayMerger{F, T, I}(f, iterators)
111+
end
112+
113+
# We could technically know this, but KWayMerger is stateful, and
114+
# Julia's iterator length works badly with stateful iterators.
115+
Base.IteratorSize(::Type{<:KWayMerger}) = Base.SizeUnknown()
116+
Base.eltype(::Type{<:KWayMerger{F, T}}) where {F, T} = T
117+
118+
function Base.iterate(x::KWayMerger, ::Nothing = nothing)
119+
isempty(x.heap) && return nothing
120+
(item, i) = @inbounds x.heap[1]
121+
iterator = @inbounds x.iterators[i]
122+
state = @inbounds x.states[i]
123+
it = iterate(iterator, state)
124+
if it === nothing
125+
@inbounds heappop!(ord(x.f), x.heap)
126+
else
127+
(new_item, new_state) = it
128+
@inbounds x.states[i] = new_state
129+
@inbounds heapreplace!(ord(x.f), x.heap, (new_item, i))
130+
end
131+
return (item, nothing)
132+
end
133+
134+
Base.isempty(x::KWayMerger) = isempty(x.heap)
135+
Base.isdone(x::KWayMerger) = isempty(x.heap)
25136

26137
"""
27-
my_function(x::MyType) -> Real
138+
peek(x::KWayMerger)::Union{Some, Nothing}
139+
140+
Get the first element of `x` without advancing the iterator, or `nothing` if the
141+
iterator is empty.
28142
29-
Computes the L2 norm of `x`.
30143
# Examples
31144
```jldoctest
32-
julia> x = MyType(1.0, 2.0)
33-
MyType{Float64}(1.0, 2.0)
145+
julia> it = KWayMerger([[3, 4], [2, 7]]);
34146
35-
julia> my_function(x)
36-
2.23606797749979
37-
```
147+
julia> peek(it)
148+
Some(2)
38149
39-
See also: [`MyType`](@ref)
150+
julia> collect(it); # empty stateful iterator
151+
152+
julia> peek(it) === nothing
153+
true
154+
```
40155
"""
41-
my_function(x::MyType) = sqrt(x.x^2 + x.y^2)
156+
function Base.peek(x::KWayMerger{F, T})::Union{Some{T}, Nothing} where {F, T}
157+
isempty(x.heap) && return nothing
158+
return @inbounds Some(x.heap[1][1])
159+
end
42160

43161
end # module KWayMerges

src/heap.jl

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
@inline function heapify!(f, xs::Vector)
2+
for i in div(length(xs), 2):-1:1
3+
percolate_down!(f, xs, i, xs[i])
4+
end
5+
return xs
6+
end
7+
8+
@inline function percolate_down!(
9+
f::F,
10+
xs::Vector,
11+
i::Integer,
12+
x,
13+
) where {F}
14+
len = length(xs)
15+
@inbounds while (l = 2i) <= len
16+
r = 2i + 1
17+
j = if r > len || f(@inbounds(xs[l]), @inbounds(xs[r]))
18+
l
19+
else
20+
r
21+
end
22+
if f(@inbounds(xs[j]), x)
23+
@inbounds xs[i] = xs[j]
24+
i = j
25+
else
26+
break
27+
end
28+
end
29+
return @inbounds xs[i] = x
30+
end
31+
32+
@noinline function heappop!(f, xs::Vector)
33+
isempty(xs) && throw(BoundsError(xs, 1))
34+
x = @inbounds xs[1]
35+
y = @inbounds pop!(xs)
36+
if !isempty(xs)
37+
percolate_down!(f, xs, 1, y)
38+
end
39+
return x
40+
end
41+
42+
@inline function heapreplace!(f, xs::Vector, x)
43+
@boundscheck isempty(xs) && throw(BoundsError(xs, 1))
44+
res = @inbounds xs[1]
45+
@inbounds xs[1] = x
46+
percolate_down!(f, xs, 1, x)
47+
return res
48+
end

test/runtests.jl

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,53 @@
11
module KWayMergesTest
22

33
using KWayMerges
4-
54
using Test
65

7-
@testset "Approximation" begin
8-
x = MyType(3, 4)
9-
@test isapprox(my_function(x), 5)
6+
@testset "Construction" begin
7+
v = [1:3, 4:6, 9:11]
8+
9+
T = KWayMerger{typeof(isless), Int, UnitRange{Int}, Int}
10+
11+
@test KWayMerger{typeof(isless), Int, UnitRange{Int}}(isless, v) isa T
12+
@test KWayMerger{Int, UnitRange{Int}}(v) isa T
13+
@test KWayMerger(v) isa T
14+
@test KWayMerger(isless, v) isa T
15+
@test KWayMerger(<, v) isa KWayMerger{typeof(<), Int, UnitRange{Int}}
16+
end
17+
18+
@testset "Forward sorting" begin
19+
v = [[3, 5, 8], [1, 1], [10, 11], [1, 2, 7], Int[]]
20+
r = sort(collect(Iterators.flatten(v)))
21+
22+
@test collect(KWayMerger(v)) == r
23+
end
24+
25+
@testset "Using a predicate" begin
26+
v = [["de", "abc"], [""], ["xysa", "dsakljdwe"]]
27+
r = sort(collect(Iterators.flatten(v)); by = length)
28+
29+
@test collect(KWayMerger((i, j) -> isless(length(i), length(j)), v)) == r
30+
end
31+
32+
@testset "Reverse sorting" begin
33+
v = [[3, 5, 8], [1, 1], [10, 11], [1, 2, 7], Int[]]
34+
for i in v
35+
sort!(i; rev = true)
36+
end
37+
r = sort(collect(Iterators.flatten(v)); rev = true)
38+
39+
@test collect(KWayMerger((i, j) -> isless(j, i), v)) == r
40+
end
41+
42+
@testset "Some edge cases" begin
43+
it = KWayMerger([])
44+
@test it isa KWayMerger{typeof(isless)}
45+
@test collect(it) == Any[]
46+
47+
it = KWayMerger([1:0, 11:10])
48+
@test it isa KWayMerger{typeof(isless), Int}
49+
@test collect(it) == Int[]
50+
@test typeof(collect(it)) == Vector{Int}
1051
end
1152

1253
end # module KWayMergesTest

0 commit comments

Comments
 (0)