Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up search and find API #24673

Merged
merged 4 commits into from
Jan 4, 2018
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Deprecate (r)searchindex(...) in favor of first(findnext/findprev(...))
nalimilan committed Jan 4, 2018

Unverified

This user has not yet uploaded their public signing key.
commit d5f74cddc3d367baafa49077f931281fc61e3c3f
10 changes: 10 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
@@ -3850,6 +3850,16 @@ end
@deprecate rsearch(a::String, b::Union{Int8,UInt8}, i::Integer = endof(a)) findprev(equalto(Char(b)), a, i)
@deprecate rsearch(a::ByteArray, b::Char, i::Integer = endof(a)) findprev(equalto(UInt8(b)), a, i)

@deprecate searchindex(s::AbstractString, t::AbstractString) first(findfirst(t, s))
@deprecate searchindex(s::AbstractString, t::AbstractString, i::Integer) first(findnext(t, s, i))
@deprecate rsearchindex(s::AbstractString, t::AbstractString) first(findlast(t, s))
@deprecate rsearchindex(s::AbstractString, t::AbstractString, i::Integer) first(findprev(t, s, i))

@deprecate searchindex(s::AbstractString, c::Char) first(findfirst(equalto(c), s))
@deprecate searchindex(s::AbstractString, c::Char, i::Integer) first(findnext(equalto(c), s, i))
@deprecate rsearchindex(s::AbstractString, c::Char) first(findlast(equalto(c), s))
@deprecate rsearchindex(s::AbstractString, c::Char, i::Integer) first(findprev(equalto(c), s, i))

# END 0.7 deprecations
# BEGIN 1.0 deprecations

2 changes: 0 additions & 2 deletions base/exports.jl
Original file line number Diff line number Diff line change
@@ -507,8 +507,6 @@ export
occursin,
match,
matchall,
rsearchindex,
searchindex,
searchsorted,
searchsortedfirst,
searchsortedlast,
2 changes: 0 additions & 2 deletions base/precompile.jl
Original file line number Diff line number Diff line change
@@ -621,7 +621,6 @@ precompile(Tuple{typeof(Base.unique), Array{String, 1}})
precompile(Tuple{typeof(Base.REPL.beforecursor), Base.GenericIOBuffer{Array{UInt8, 1}}})
precompile(Tuple{typeof(Base.REPLCompletions.completions), String, Int64})
precompile(Tuple{typeof(Base.incomplete_tag), Symbol})
precompile(Tuple{typeof(Base.rsearchindex), String, String, Int64})
precompile(Tuple{typeof(Base._rsearch), String, String, Int64})
precompile(Tuple{typeof(Base.pushfirst!), Array{Base.SubString{String}, 1}, Base.SubString{String}})
precompile(Tuple{typeof(Base.startswith), String, Base.SubString{String}})
@@ -918,7 +917,6 @@ precompile(Tuple{typeof(Base.Markdown.parseinline), Base.GenericIOBuffer{Array{U
precompile(Tuple{typeof(Base.Markdown.config), Base.Markdown.MD})
precompile(Tuple{typeof(Base.Markdown.parseinline), Base.GenericIOBuffer{Array{UInt8, 1}}, Base.Markdown.MD, Base.Markdown.Config})
precompile(Tuple{typeof(Base.Markdown.list), Base.GenericIOBuffer{Array{UInt8, 1}}, Base.Markdown.MD})
precompile(Tuple{typeof(Base.searchindex), String, String})
precompile(Tuple{typeof(Base._searchindex), Base.SubString{String}, String, Int64})
precompile(Tuple{getfield(Base.Markdown, Symbol("#kw##skipwhitespace")), Array{Any, 1}, typeof(Base.Markdown.skipwhitespace), Base.GenericIOBuffer{Array{UInt8, 1}}})
precompile(Tuple{typeof(Base.ht_keyindex), Base.Dict{Symbol, Base.Markdown.Config}, Symbol})
122 changes: 29 additions & 93 deletions base/strings/search.jl
Original file line number Diff line number Diff line change
@@ -156,8 +156,11 @@ end
_nthbyte(s::String, i) = codeunit(s, i)
_nthbyte(a::Union{AbstractVector{UInt8},AbstractVector{Int8}}, i) = a[i]

_searchindex(s::String, t::String, i::Integer) =
function _searchindex(s::String, t::String, i::Integer)
# Check for fast case of a single byte
endof(t) == 1 && return findnext(equalto(t[1]), s, i)
_searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i)
end

function _searchindex(s::ByteArray, t::ByteArray, i::Integer)
n = sizeof(t)
@@ -220,43 +223,10 @@ function _searchindex(s::ByteArray, t::ByteArray, i::Integer)
0
end

searchindex(s::ByteArray, t::ByteArray, i::Integer) = _searchindex(s,t,i)

"""
searchindex(s::AbstractString, substring, [start::Integer])

Similar to `search`, but return only the start index at which
the substring is found, or `0` if it is not.

# Examples
```jldoctest
julia> searchindex("Hello to the world", "z")
0

julia> searchindex("JuliaLang","Julia")
1

julia> searchindex("JuliaLang","Lang")
6
```
"""
searchindex(s::AbstractString, t::AbstractString, i::Integer) = _searchindex(s,t,i)
searchindex(s::AbstractString, t::AbstractString) = searchindex(s,t,start(s))
searchindex(s::AbstractString, c::Char, i::Integer) = _searchindex(s,c,i)
searchindex(s::AbstractString, c::Char) = searchindex(s,c,start(s))

function searchindex(s::String, t::String, i::Integer=1)
# Check for fast case of a single byte
# (for multi-byte UTF-8 sequences, use searchindex on byte arrays instead)
if endof(t) == 1
findnext(equalto(t[1]), s, i)
else
_searchindex(s, t, i)
end
end

function _search(s, t, i::Integer)
idx = searchindex(s,t,i)
function _search(s::Union{AbstractString,ByteArray},
t::Union{AbstractString,Char,Int8,UInt8},
i::Integer)
idx = _searchindex(s,t,i)
if isempty(t)
idx:idx-1
else
@@ -291,8 +261,6 @@ julia> findnext("Julia", "JuliaLang", 2)
```
"""
findnext(t::AbstractString, s::AbstractString, i::Integer) = _search(s, t, i)
# TODO: remove?
findnext(t::ByteArray, s::ByteArray, i::Integer) = _search(s, t, i)

"""
findlast(pattern::AbstractString, string::AbstractString)
@@ -363,8 +331,21 @@ function _rsearchindex(s::AbstractString,
end
end

_rsearchindex(s::String, t::String, i::Integer) =
_rsearchindex(unsafe_wrap(Vector{UInt8}, s), unsafe_wrap(Vector{UInt8}, t), i)
function _rsearchindex(s::String, t::String, i::Integer)
# Check for fast case of a single byte
if endof(t) == 1
return findprev(equalto(t[1]), s, i)
elseif endof(t) != 0
j = i ≤ ncodeunits(s) ? nextind(s, i)-1 : i
return _rsearchindex(unsafe_wrap(Vector{UInt8}, s), unsafe_wrap(Vector{UInt8}, t), j)
elseif i > sizeof(s)
return 0
elseif i == 0
return 1
else
return i
end
end

function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer)
n = sizeof(t)
@@ -427,54 +408,10 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer)
0
end

rsearchindex(s::ByteArray, t::ByteArray, i::Integer) = _rsearchindex(s,t,i)

"""
rsearchindex(s::AbstractString, substring, [start::Integer])

Similar to `rsearch`, but return only the start index at which the substring is found, or `0` if it is not.

# Examples
```jldoctest
julia> rsearchindex("aaabbb","b")
6

julia> rsearchindex("aaabbb","a")
3
```
"""
rsearchindex(s::AbstractString, t::AbstractString, i::Integer) = _rsearchindex(s,t,i)
rsearchindex(s::AbstractString, t::AbstractString) = (isempty(s) && isempty(t)) ? 1 : rsearchindex(s,t,endof(s))

function rsearchindex(s::String, t::String)
# Check for fast case of a single byte
# (for multi-byte UTF-8 sequences, use rsearchindex instead)
if endof(t) == 1
findprev(equalto(t[1]), s)
else
_rsearchindex(s, t, sizeof(s))
end
end

function rsearchindex(s::String, t::String, i::Integer)
# Check for fast case of a single byte
# (for multi-byte UTF-8 sequences, use rsearchindex instead)
if endof(t) == 1
findprev(equalto(t[1]), s, i)
elseif endof(t) != 0
j = i ≤ ncodeunits(s) ? nextind(s, i)-1 : i
_rsearchindex(s, t, j)
elseif i > sizeof(s)
return 0
elseif i == 0
return 1
else
return i
end
end

function _rsearch(s, t, i::Integer)
idx = rsearchindex(s,t,i)
function _rsearch(s::Union{AbstractString,ByteArray},
t::Union{AbstractString,Char,Int8,UInt8},
i::Integer)
idx = _rsearchindex(s,t,i)
if isempty(t)
idx:idx-1
else
@@ -509,8 +446,6 @@ julia> findprev("Julia", "JuliaLang", 6)
```
"""
findprev(t::AbstractString, s::AbstractString, i::Integer) = _rsearch(s, t, i)
# TODO: remove?
findprev(t::ByteArray, s::ByteArray, i::Integer) = _rsearch(s, t, i)

"""
contains(haystack::AbstractString, needle::Union{AbstractString,Char})
@@ -523,6 +458,7 @@ julia> contains("JuliaLang is pretty cool!", "Julia")
true
```
"""
contains(haystack::AbstractString, needle::Union{AbstractString,Char}) = searchindex(haystack,needle)!=0
contains(haystack::AbstractString, needle::Union{AbstractString,Char}) =
_searchindex(haystack, needle, start(haystack)) != 0

in(::AbstractString, ::AbstractString) = error("use contains(x,y) for string containment")
2 changes: 0 additions & 2 deletions doc/src/stdlib/strings.md
Original file line number Diff line number Diff line change
@@ -37,8 +37,6 @@ Base.findfirst(::AbstractString, ::AbstractString)
Base.findnext(::AbstractString, ::AbstractString, ::Integer)
Base.findlast(::AbstractString, ::AbstractString)
Base.findprev(::AbstractString, ::AbstractString, ::Integer)
Base.searchindex
Base.rsearchindex
Base.contains(::AbstractString, ::AbstractString)
Base.reverse(::Union{String,SubString{String}})
Base.replace(s::AbstractString, ::Pair)
60 changes: 2 additions & 58 deletions test/strings/search.jl
Original file line number Diff line number Diff line change
@@ -9,27 +9,18 @@ u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
for ind in (0, 5)
@test_throws BoundsError findnext(SubString("",1,1), "foo", ind)
@test_throws BoundsError findprev(SubString("",1,1), "foo", ind)
@test_throws BoundsError searchindex("foo", SubString("",1,1), ind)
@test_throws BoundsError rsearchindex("foo", SubString("",1,1), ind)
end

# Note: the commented out tests will be enabled after fixes to make
# sure that findnext/findprev/searchindex/rsearchindex are consistent
# Note: the commented out test will be enabled after fixes to make
# sure that findnext/findprev are consistent
# no matter what type of AbstractString the second argument is
@test_throws BoundsError findnext(equalto('a'), "foo", 0)
@test_throws BoundsError findnext(occursin(Char[]), "foo", 5)
# @test_throws BoundsError findprev(occursin(Char[]), "foo", 0)
@test_throws BoundsError findprev(occursin(Char[]), "foo", 5)

# @test_throws BoundsError searchindex("foo", Char[], 0)
# @test_throws BoundsError searchindex("foo", Char[], 5)
# @test_throws BoundsError rsearchindex("foo", Char[], 0)
# @test_throws BoundsError rsearchindex("foo", Char[], 5)

# @test_throws ErrorException in("foobar","bar")
@test_throws BoundsError findnext(equalto(0x1),b"\x1\x2",0)
@test rsearchindex(b"foo",b"o",0) == 0
@test rsearchindex(SubString("",1,0),SubString("",1,0)) == 1

# ascii forward search
for str in [astr, GenericString(astr)]
@@ -308,10 +299,6 @@ end
@test findlast("az", "foo,bar,baz") == 10:11
@test findprev("az", "foo,bar,baz", 10) == 0:-1

# array backward search
@test findprev(UInt8[2,3],UInt8[1,2,3],3) == 2:3
@test findprev(UInt8[2,3],UInt8[1,2,3],1) == 0:-1

# string search with a two-char regex
@test findfirst(r"xx", "foo,bar,baz") == 0:-1
@test findfirst(r"fo", "foo,bar,baz") == 1:2
@@ -326,53 +313,10 @@ end
@test findfirst(r"az", "foo,bar,baz") == 10:11
@test findnext(r"az", "foo,bar,baz", 12) == 0:-1

@test searchindex("foo", 'o') == 2
@test searchindex("foo", 'o', 3) == 3

# string searchindex with a two-char UTF-8 (2 byte) string literal
@test searchindex("ééé", "éé") == 1
@test searchindex("ééé", "éé", 1) == 1
# string searchindex with a two-char UTF-8 (3 byte) string literal
@test searchindex("€€€", "€€") == 1
@test searchindex("€€€", "€€", 1) == 1
# string searchindex with a two-char UTF-8 (4 byte) string literal
@test searchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 1
@test searchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596", 1) == 1

# string searchindex with a two-char UTF-8 (2 byte) string literal
@test searchindex("éé", "éé") == 1
@test searchindex("éé", "éé", 1) == 1
# string searchindex with a two-char UTF-8 (3 byte) string literal
@test searchindex("€€", "€€") == 1
@test searchindex("€€", "€€", 1) == 1
# string searchindex with a two-char UTF-8 (4 byte) string literal
@test searchindex("\U1f596\U1f596", "\U1f596\U1f596") == 1
@test searchindex("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1

# contains with a String and Char needle
@test contains("foo", "o")
@test contains("foo", 'o')

# string rsearchindex with a two-char UTF-8 (2 byte) string literal
@test rsearchindex("ééé", "éé") == 3
@test rsearchindex("ééé", "éé", endof("ééé")) == 3
# string rsearchindex with a two-char UTF-8 (3 byte) string literal
@test rsearchindex("€€€", "€€") == 4
@test rsearchindex("€€€", "€€", endof("€€€")) == 4
# string rsearchindex with a two-char UTF-8 (4 byte) string literal
@test rsearchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 5
@test rsearchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 5

# string rsearchindex with a two-char UTF-8 (2 byte) string literal
@test rsearchindex("éé", "éé") == 1
@test rsearchindex("éé", "éé", endof("ééé")) == 1
# string searchindex with a two-char UTF-8 (3 byte) string literal
@test rsearchindex("€€", "€€") == 1
@test rsearchindex("€€", "€€", endof("€€€")) == 1
# string searchindex with a two-char UTF-8 (4 byte) string literal
@test rsearchindex("\U1f596\U1f596", "\U1f596\U1f596") == 1
@test rsearchindex("\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 1

@test_throws ErrorException "ab" ∈ "abc"

# issue #15723