Skip to content

Commit c1cb977

Browse files
mbaumantimholy
authored andcommitted
Fix interval indexing with offset axes
This one is terrifying. We were only testing against axes of the form `step:step:last`. Requires PainterQubits/Unitful.jl#90.
1 parent f0743ce commit c1cb977

File tree

4 files changed

+133
-60
lines changed

4 files changed

+133
-60
lines changed

src/indexing.jl

+47-14
Original file line numberDiff line numberDiff line change
@@ -184,30 +184,63 @@ axisindexes{T}(::Type{Dimensional}, ax::AbstractVector{T}, idx::ClosedInterval)
184184

185185
# Or repeated intervals, which only work if the axis is a range since otherwise
186186
# there will be a non-constant number of indices in each repetition.
187-
# Two small tricks are used here:
188-
# * Compute the resulting interval axis with unsafe indexing without any offset
189-
# - Since it's a range, we can do this, and it makes the resulting axis useful
190-
# * Snap the offsets to the nearest datapoint to avoid fencepost problems
191-
# Adds a dimension to the result; rows represent the interval and columns are offsets.
187+
#
188+
# There are a number of challenges here:
189+
# * This operation adds a dimension to the result; rows represent the interval
190+
# (or subset) and columns are offsets (or repetition). A RepeatedRangeMatrix
191+
# represents the resulting matrix of indices very nicely.
192+
# * We also want the returned matrix to keep track of its axes; the axis
193+
# subset (ax_sub) is the relative location of the interval with respect to
194+
# each offset, and the repetitions (ax_rep) is the array of offsets.
195+
# * We are interested in the resulting *addition* of the interval against the
196+
# offsets. Either the offsets or the interval may independently be out of
197+
# bounds prior to this addition. Even worse: the interval may have different
198+
# units than the axis (e.g., `(Day(-1)..Day(1)) + dates` for a three-day
199+
# span around dates of interest over a Date axis).
200+
# * It is possible (and likely!) that neither the interval endpoints nor the
201+
# offsets fall exactly upon an axis value. Or even worse: the some offsets
202+
# when added to the interval could span more elements than others (the
203+
# fencepost problem). As such, we need to be careful about how and when we
204+
# snap the provided intervals and offsets to exact axis values (and indices).
205+
#
206+
# To avoid the fencepost problems and to define the axes, we convert the
207+
# interval to a UnitRange of relative indices and the array of offsets to an
208+
# array of absolute indices (independently of each other). Exactly how we do so
209+
# must be carefully considered.
210+
#
211+
# Note that this is fundamentally different than indexing by a single interval;
212+
# whereas those intervals are specified in the same units as the elements of the
213+
# axis itself, these intervals are specified in terms of _offsets_. At the same
214+
# time, we want `A[interval] == vec(A[interval + [0]])`. To make these
215+
# computations as similar as possible, we use a phony range of the form
216+
# `step(ax):step(ax):step(ax)` in order to search for the interval.
217+
phony_range(r::Range) = step(r):step(r):step(r)
218+
phony_range(r::AbstractUnitRange) = step(r):step(r)
219+
phony_range(r::StepRangeLen) = StepRangeLen(r.step, r.step, 1)
220+
function relativewindow(r::Range, x::ClosedInterval)
221+
pr = phony_range(r)
222+
idxs = Extrapolated.searchsorted(pr, x)
223+
vals = Extrapolated.getindex(pr, idxs)
224+
return (idxs, vals)
225+
end
226+
192227
axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedInterval) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead")
193228
function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedInterval)
194-
n = length(idx.offsets)
195-
idxs = unsafe_searchsorted(ax, idx.window)
196-
offsets = [searchsortednearest(ax, idx.offsets[i]) for i=1:n]
197-
AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(inbounds_getindex(ax, idxs)), Axis{:rep}(ax[offsets]))
229+
idxs, vals = relativewindow(ax, idx.window)
230+
offsets = [Extrapolated.searchsortednearest(ax, offset) for offset in idx.offsets]
231+
AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(vals), Axis{:rep}(Extrapolated.getindex(ax, offsets)))
198232
end
199233

200234
# We also have special datatypes to represent intervals about indices
201235
axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::IntervalAtIndex) = searchsorted(ax, idx.window + ax[idx.index])
202236
function axisindexes(::Type{Dimensional}, ax::Range, idx::IntervalAtIndex)
203-
idxs = unsafe_searchsorted(ax, idx.window)
204-
AxisArray(idxs + idx.index, Axis{:sub}(inbounds_getindex(ax, idxs)))
237+
idxs, vals = relativewindow(ax, idx.window)
238+
AxisArray(idxs + idx.index, Axis{:sub}(vals))
205239
end
206240
axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedIntervalAtIndexes) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead")
207241
function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedIntervalAtIndexes)
208-
n = length(idx.indexes)
209-
idxs = unsafe_searchsorted(ax, idx.window)
210-
AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(inbounds_getindex(ax, idxs)), Axis{:rep}(ax[idx.indexes]))
242+
idxs, vals = relativewindow(ax, idx.window)
243+
AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(vals), Axis{:rep}(ax[idx.indexes]))
211244
end
212245

213246
# Categorical axes may be indexed by their elements

src/search.jl

+44-26
Original file line numberDiff line numberDiff line change
@@ -15,65 +15,83 @@ function searchsortednearest(vec::AbstractVector, x)
1515
end
1616
return idx
1717
end
18+
# Base only specializes searching ranges by Numbers; so optimize for Intervals
19+
function Base.searchsorted(a::Range, I::ClosedInterval)
20+
searchsortedfirst(a, I.left):searchsortedlast(a, I.right)
21+
end
1822

19-
# We depend upon extrapolative behaviors in searching ranges to shift axes.
20-
# This can be done by stealing Base's implementations and removing the bounds-
21-
# correcting min/max.
23+
"""
24+
The internal `Extrapolated` module contains implementations for indexing and
25+
searching into ranges beyond their bounds. The `@inbounds` macro is not
26+
sufficient since it can be turned off by `--check-bounds=yes`.
27+
"""
28+
module Extrapolated
29+
using ..ClosedInterval
30+
31+
function searchsortednearest(vec::Range, x)
32+
idx = searchsortedfirst(vec, x) # Returns the first idx | vec[idx] >= x
33+
if (getindex(vec, idx) - x) > (x - getindex(vec, idx-1))
34+
idx -= 1 # The previous element is closer
35+
end
36+
return idx
37+
end
2238

23-
# TODO: This could plug into the sorting system better, but it's fine for now
24-
# TODO: This needs to support Dates.
2539
"""
26-
unsafe_searchsorted(a::Range, I::ClosedInterval)
40+
searchsorted(a::Range, I::ClosedInterval)
2741
2842
Return the indices of the range that fall within an interval without checking
2943
bounds, possibly extrapolating outside the range if needed.
3044
"""
31-
function unsafe_searchsorted(a::Range, I::ClosedInterval)
32-
unsafe_searchsortedfirst(a, I.left):unsafe_searchsortedlast(a, I.right)
33-
end
34-
# Base only specializes searching ranges by Numbers; so optimize for Intervals
35-
function Base.searchsorted(a::Range, I::ClosedInterval)
45+
function searchsorted(a::Range, I::ClosedInterval)
3646
searchsortedfirst(a, I.left):searchsortedlast(a, I.right)
3747
end
3848

39-
# When running with "--check-bounds=yes" (like on Travis), the bounds-check isn't elided
40-
@inline function inbounds_getindex{T}(v::Range{T}, i::Integer)
49+
# When running with "--check-bounds=yes`(like on Travis), the bounds-check isn't elided
50+
@inline function getindex{T}(v::Range{T}, i::Integer)
4151
convert(T, first(v) + (i-1)*step(v))
4252
end
43-
@inline function inbounds_getindex{T<:Integer}(r::Range, s::Range{T})
53+
@inline function getindex{T<:Integer}(r::Range, s::Range{T})
4454
f = first(r)
4555
st = oftype(f, f + (first(s)-1)*step(r))
4656
range(st, step(r)*step(s), length(s))
4757
end
48-
@inline inbounds_getindex(r::StepRangeLen, i::Integer) = Base.unsafe_getindex(r, i)
49-
@inline function inbounds_getindex(r::StepRangeLen, s::OrdinalRange)
50-
vfirst = Base.unsafe_getindex(r, first(s))
51-
StepRangeLen(vfirst, step(r)*step(s), length(s))
58+
getindex(r::Range, I::Array) = [getindex(r, i) for i in I]
59+
@inline getindex(r::StepRangeLen, i::Integer) = Base.unsafe_getindex(r, i)
60+
@inline function getindex(r::StepRangeLen, s::AbstractUnitRange)
61+
soffset = 1 + (r.offset - first(s))
62+
soffset = clamp(soffset, 1, length(s))
63+
ioffset = first(s) + (soffset-1)
64+
if ioffset == r.offset
65+
StepRangeLen(r.ref, r.step, length(s), max(1,soffset))
66+
else
67+
StepRangeLen(r.ref + (ioffset-r.offset)*r.step, r.step, length(s), max(1,soffset))
68+
end
5269
end
5370

54-
function unsafe_searchsortedlast{T<:Number}(a::Range{T}, x::Number)
71+
function searchsortedlast(a::Range, x)
5572
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
5673
n = round(Integer,(x-first(a))/step(a))+1
57-
isless(x, inbounds_getindex(a, n)) ? n-1 : n
74+
isless(x, getindex(a, n)) ? n-1 : n
5875
end
59-
function unsafe_searchsortedfirst{T<:Number}(a::Range{T}, x::Number)
76+
function searchsortedfirst(a::Range, x)
6077
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
6178
n = round(Integer,(x-first(a))/step(a))+1
62-
isless(inbounds_getindex(a, n), x) ? n+1 : n
79+
isless(getindex(a, n), x) ? n+1 : n
6380
end
64-
function unsafe_searchsortedlast{T<:Integer}(a::Range{T}, x::Number)
81+
function searchsortedlast{T<:Integer}(a::Range{T}, x)
6582
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
6683
fld(floor(Integer,x)-first(a),step(a))+1
6784
end
68-
function unsafe_searchsortedfirst{T<:Integer}(a::Range{T}, x::Number)
85+
function searchsortedfirst{T<:Integer}(a::Range{T}, x)
6986
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
7087
-fld(floor(Integer,-x)+first(a),step(a))+1
7188
end
72-
function unsafe_searchsortedfirst{T<:Integer}(a::Range{T}, x::Unsigned)
89+
function searchsortedfirst{T<:Integer}(a::Range{T}, x::Unsigned)
7390
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
7491
-fld(first(a)-signed(x),step(a))+1
7592
end
76-
function unsafe_searchsortedlast{T<:Integer}(a::Range{T}, x::Unsigned)
93+
function searchsortedlast{T<:Integer}(a::Range{T}, x::Unsigned)
7794
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
7895
fld(signed(x)-first(a),step(a))+1
7996
end
97+
end

test/indexing.jl

+22
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,19 @@ B = AxisArray(reshape(1:15, 5,3), .1:.1:0.5, [:a, :b, :c])
7070

7171
@test B[Axis{:row}(ClosedInterval(0.15, 0.3))] == @view(B[Axis{:row}(ClosedInterval(0.15, 0.3))]) == B[2:3,:]
7272

73+
# Test indexing by Intervals that aren't of the form step:step:last
74+
B = AxisArray(reshape(1:15, 5,3), 1.1:0.1:1.5, [:a, :b, :c])
75+
@test B[ClosedInterval(1.0, 1.5), :] == B[ClosedInterval(1.0, 1.5)] == B[:,:]
76+
@test B[ClosedInterval(1.0, 1.3), :] == B[ClosedInterval(1.0, 1.3)] == B[1:3,:]
77+
@test B[ClosedInterval(1.15, 1.3), :] == B[ClosedInterval(1.15, 1.3)] == B[2:3,:]
78+
@test B[ClosedInterval(1.2, 1.5), :] == B[ClosedInterval(1.2, 1.5)] == B[2:end,:]
79+
@test B[ClosedInterval(1.2, 1.6), :] == B[ClosedInterval(1.2, 1.6)] == B[2:end,:]
80+
@test @view(B[ClosedInterval(1.0, 1.5), :]) == @view(B[ClosedInterval(1.0, 1.5)]) == B[:,:]
81+
@test @view(B[ClosedInterval(1.0, 1.3), :]) == @view(B[ClosedInterval(1.0, 1.3)]) == B[1:3,:]
82+
@test @view(B[ClosedInterval(1.15, 1.3), :]) == @view(B[ClosedInterval(1.15, 1.3)]) == B[2:3,:]
83+
@test @view(B[ClosedInterval(1.2, 1.5), :]) == @view(B[ClosedInterval(1.2, 1.5)]) == B[2:end,:]
84+
@test @view(B[ClosedInterval(1.2, 1.6), :]) == @view(B[ClosedInterval(1.2, 1.6)]) == B[2:end,:]
85+
7386
A = AxisArray(reshape(1:256, 4,4,4,4), Axis{:d1}(.1:.1:.4), Axis{:d2}(1//10:1//10:4//10), Axis{:d3}(["1","2","3","4"]), Axis{:d4}([:a, :b, :c, :d]))
7487
ax1 = axes(A)[1]
7588
@test A[Axis{:d1}(2)] == A[ax1(2)]
@@ -115,6 +128,7 @@ AxisArrays.axistrait(::AbstractVector{IntLike}) = AxisArrays.Dimensional
115128
end
116129

117130
for (r, Irel) in ((0.1:0.1:10.0, -0.5..0.5), # FloatRange
131+
(22.1:0.1:32.0, -0.5..0.5),
118132
(linspace(0.1, 10.0, 100), -0.51..0.51), # LinSpace
119133
(IL.IntLike(1):IL.IntLike(1):IL.IntLike(100),
120134
IL.IntLike(-5)..IL.IntLike(5))) # StepRange
@@ -194,3 +208,11 @@ A = AxisArray(OffsetArrays.OffsetArray([1 2; 3 4], 0:1, 1:2),
194208
@test_throws ArgumentError A[BigFloat(1.0)]
195209
@test_throws ArgumentError A[1.0f0]
196210
@test_throws ArgumentError A[:,6.1]
211+
212+
# Test using dates
213+
using Base.Dates: Day, Month
214+
A = AxisArray(1:365, Date(2017,1,1):Date(2017,12,31))
215+
@test A[Date(2017,2,1) .. Date(2017,2,28)] == collect(31 + (1:28)) # February
216+
@test A[(-Day(13)..Day(14)) + Date(2017,2,14)] == collect(31 + (1:28))
217+
@test A[(-Day(14)..Day(14)) + DateTime(2017,2,14,12)] == collect(31 + (1:28))
218+
@test A[(Day(0)..Day(6)) + (Date(2017,1,1):Month(1):Date(2017,4,12))] == [1:7 32:38 60:66 91:97]

test/search.jl

+20-20
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,25 @@ import AxisArrays: searchsortednearest
1010
@test searchsortednearest([1,1,2,2,3,3], Inf) === 6
1111
@test searchsortednearest([1,1,2,2,3,3], -Inf) === 1
1212

13-
# unsafe searchsorted for ranges
14-
import AxisArrays: unsafe_searchsorted
15-
@test unsafe_searchsorted(1:10, -1 .. 1) === -1:1
16-
@test unsafe_searchsorted(1:10, 12 .. 15) === 12:15
17-
@test unsafe_searchsorted(0:2:10, -3 .. -1) === 0:0
18-
@test unsafe_searchsorted(0:2:10, -5 .. 3) === -1:2
13+
# Extrapolated searching for ranges
14+
import AxisArrays: Extrapolated
15+
@test Extrapolated.searchsorted(1:10, -1 .. 1) === -1:1
16+
@test Extrapolated.searchsorted(1:10, 12 .. 15) === 12:15
17+
@test Extrapolated.searchsorted(0:2:10, -3 .. -1) === 0:0
18+
@test Extrapolated.searchsorted(0:2:10, -5 .. 3) === -1:2
1919

20-
@test unsafe_searchsorted(1:2, 4.5 .. 4.5) === 5:4
21-
@test unsafe_searchsorted(1:2, 3.5 .. 3.5) === 4:3
22-
@test unsafe_searchsorted(1:2, 2.5 .. 2.5) === 3:2 === searchsorted(1:2, 2.5 .. 2.5)
23-
@test unsafe_searchsorted(1:2, 1.5 .. 1.5) === 2:1 === searchsorted(1:2, 1.5 .. 1.5)
24-
@test unsafe_searchsorted(1:2, 0.5 .. 0.5) === 1:0 === searchsorted(1:2, 0.5 .. 0.5)
25-
@test unsafe_searchsorted(1:2, -0.5 .. -0.5) === 0:-1
26-
@test unsafe_searchsorted(1:2, -1.5 .. -1.5) === -1:-2
20+
@test Extrapolated.searchsorted(1:2, 4.5 .. 4.5) === 5:4
21+
@test Extrapolated.searchsorted(1:2, 3.5 .. 3.5) === 4:3
22+
@test Extrapolated.searchsorted(1:2, 2.5 .. 2.5) === 3:2 === searchsorted(1:2, 2.5 .. 2.5)
23+
@test Extrapolated.searchsorted(1:2, 1.5 .. 1.5) === 2:1 === searchsorted(1:2, 1.5 .. 1.5)
24+
@test Extrapolated.searchsorted(1:2, 0.5 .. 0.5) === 1:0 === searchsorted(1:2, 0.5 .. 0.5)
25+
@test Extrapolated.searchsorted(1:2, -0.5 .. -0.5) === 0:-1
26+
@test Extrapolated.searchsorted(1:2, -1.5 .. -1.5) === -1:-2
2727

28-
@test unsafe_searchsorted(2:2:4, 0x6 .. 0x6) === 3:3
29-
@test unsafe_searchsorted(2:2:4, 0x5 .. 0x5) === searchsorted(2:2:4, 0x5 .. 0x5) === 3:2
30-
@test unsafe_searchsorted(2:2:4, 0x4 .. 0x4) === searchsorted(2:2:4, 0x4 .. 0x4) === 2:2
31-
@test unsafe_searchsorted(2:2:4, 0x3 .. 0x3) === searchsorted(2:2:4, 0x3 .. 0x3) === 2:1
32-
@test unsafe_searchsorted(2:2:4, 0x2 .. 0x2) === searchsorted(2:2:4, 0x2 .. 0x2) === 1:1
33-
@test unsafe_searchsorted(2:2:4, 0x1 .. 0x1) === searchsorted(2:2:4, 0x1 .. 0x1) === 1:0
34-
@test unsafe_searchsorted(2:2:4, 0x0 .. 0x0) === 0:0
28+
@test Extrapolated.searchsorted(2:2:4, 0x6 .. 0x6) === 3:3
29+
@test Extrapolated.searchsorted(2:2:4, 0x5 .. 0x5) === searchsorted(2:2:4, 0x5 .. 0x5) === 3:2
30+
@test Extrapolated.searchsorted(2:2:4, 0x4 .. 0x4) === searchsorted(2:2:4, 0x4 .. 0x4) === 2:2
31+
@test Extrapolated.searchsorted(2:2:4, 0x3 .. 0x3) === searchsorted(2:2:4, 0x3 .. 0x3) === 2:1
32+
@test Extrapolated.searchsorted(2:2:4, 0x2 .. 0x2) === searchsorted(2:2:4, 0x2 .. 0x2) === 1:1
33+
@test Extrapolated.searchsorted(2:2:4, 0x1 .. 0x1) === searchsorted(2:2:4, 0x1 .. 0x1) === 1:0
34+
@test Extrapolated.searchsorted(2:2:4, 0x0 .. 0x0) === 0:0

0 commit comments

Comments
 (0)