Skip to content

Commit c5178ef

Browse files
remove RevString; efficient generic reverseind
These seem unrelated, but they're actually linked: * If you reverse generic strings by wrapping them in `RevString` then then this generic `reverseind` is incorrect. * In order to have a correct generic `reverseind` one needs to assume that `reverse(s)` returns a string of the same type and encoding as `s` with code points in reverse order; one also needs to assume that the code units encoding each character remain the same when reversed. This is a valid assumption for UTF-8, UTF-16 and (trivially) UTF-32. Reverse string search functions are pretty messed up by this and I've fixed them well enough to work but they may be quite inefficient for long strings now. I'm not going to spend too much time on this since there's other work going on to generalize and unify searching APIs. Close #22611 Close #24613 See also: #10593 #23612 #24103
1 parent 49a564a commit c5178ef

12 files changed

+78
-107
lines changed

NEWS.md

+11
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,12 @@ This section lists changes that do not have deprecation warnings.
290290
Its return value has been removed. Use the `process_running` function
291291
to determine if a process has already exited.
292292

293+
* The `RevString` type has been removed from the language; `reverse(::String)` returns
294+
a `String` with code points (or fragments thereof) in reverse order. In general,
295+
`reverse(s)` should return a string of the same type and encoding as `s` with code
296+
points in reverse order; any string type that does so much override the `reverseind`
297+
function to correctly compute reversed indices into the string type it returns.
298+
293299
Library improvements
294300
--------------------
295301

@@ -392,6 +398,11 @@ Library improvements
392398
The generic definition is constant time but calls `endof(s)` which may be inefficient.
393399
Therefore custom string types may want to define direct `ncodeunits` methods.
394400

401+
* `reverseind(s::AbstractString, i::Integer)` now has an efficient generic fallback, so
402+
custom string types do not need to provide their own efficient defintions. The generic
403+
definition relies on `ncodeunits` however, so for optimal performance you may need to
404+
define a custom method for that function.
405+
395406
Compiler/Runtime improvements
396407
-----------------------------
397408

base/exports.jl

-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ export
8888
Rational,
8989
Regex,
9090
RegexMatch,
91-
RevString,
9291
RoundFromZero,
9392
RoundDown,
9493
RoundingMode,

base/precompile.jl

-3
Original file line numberDiff line numberDiff line change
@@ -581,9 +581,6 @@ precompile(Tuple{typeof(Base.LineEdit.complete_line), Base.LineEdit.PromptState,
581581
precompile(Tuple{typeof(Base.LineEdit.input_string_newlines_aftercursor), Base.LineEdit.PromptState})
582582
precompile(Tuple{typeof(Base.LineEdit.complete_line), Base.REPL.REPLCompletionProvider, Base.LineEdit.PromptState})
583583
precompile(Tuple{getfield(Base, Symbol("#kw##parse")), Array{Any, 1}, typeof(Base.parse), String})
584-
precompile(Tuple{typeof(Base.isvalid), Base.RevString{String}, Int64})
585-
precompile(Tuple{typeof(Base.nextind), Base.RevString{String}, Int64})
586-
precompile(Tuple{typeof(Base.search), Base.RevString{String}, Array{Char, 1}, Int64})
587584
precompile(Tuple{typeof(Base.rsearch), String, Array{Char, 1}, Int64})
588585
precompile(Tuple{getfield(Base.REPLCompletions, Symbol("#kw##find_start_brace")), Array{Any, 1}, typeof(Base.REPLCompletions.find_start_brace), String})
589586
precompile(Tuple{typeof(Core.Inference.isbits), Tuple{Void, Void, Void}})

base/repl/REPLCompletions.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ end
225225
# closed start brace from the end of the string.
226226
function find_start_brace(s::AbstractString; c_start='(', c_end=')')
227227
braces = 0
228-
r = RevString(s)
228+
r = reverse(s)
229229
i = start(r)
230230
in_single_quotes = false
231231
in_double_quotes = false
@@ -259,7 +259,7 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')')
259259
braces == 1 && break
260260
end
261261
braces != 1 && return 0:-1, -1
262-
method_name_end = reverseind(r, i)
262+
method_name_end = reverseind(s, i)
263263
startind = nextind(s, rsearch(s, non_identifier_chars, method_name_end))
264264
return (startind:endof(s), method_name_end)
265265
end

base/shell.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
1414
special::AbstractString="")
1515
s = lstrip(str)
1616
# strips the end but respects the space when the string ends with "\\ "
17-
r = RevString(s)
17+
r = reverse(s)
1818
i = start(r)
1919
c_old = nothing
2020
while !done(r,i)

base/strings/search.jl

+26-26
Original file line numberDiff line numberDiff line change
@@ -194,12 +194,6 @@ end
194194
search(s::AbstractString, t::AbstractString, i::Integer=start(s)) = _search(s, t, i)
195195
search(s::ByteArray, t::ByteArray, i::Integer=start(s)) = _search(s, t, i)
196196

197-
function rsearch(s::AbstractString, c::Chars)
198-
j = search(RevString(s), c)
199-
j == 0 && return 0
200-
endof(s)-j+1
201-
end
202-
203197
"""
204198
rsearch(s::AbstractString, chars::Chars, [start::Integer])
205199
@@ -212,44 +206,50 @@ julia> rsearch("aaabbb","b")
212206
6:6
213207
```
214208
"""
215-
function rsearch(s::AbstractString, c::Chars, i::Integer)
216-
e = endof(s)
217-
j = search(RevString(s), c, e-i+1)
218-
j == 0 && return 0
219-
e-j+1
209+
function rsearch(s::AbstractString, c::Chars, i::Integer=start(s))
210+
if i < 1
211+
return i == 0 ? 0 : throw(BoundsError(s, i))
212+
end
213+
n = ncodeunits(s)
214+
if i > n
215+
return i == n+1 ? 0 : throw(BoundsError(s, i))
216+
end
217+
# r[reverseind(r,i)] == reverse(r)[i] == s[i]
218+
# s[reverseind(s,j)] == reverse(s)[j] == r[j]
219+
r = reverse(s)
220+
j = search(r, c, reverseind(r, i))
221+
j == 0 ? 0 : reverseind(s, j)
220222
end
221223

222224
function _rsearchindex(s, t, i)
223225
if isempty(t)
224-
return 1 <= i <= nextind(s,endof(s)) ? i :
226+
return 1 <= i <= nextind(s, endof(s)) ? i :
225227
throw(BoundsError(s, i))
226228
end
227-
t = RevString(t)
228-
rs = RevString(s)
229+
t = reverse(t)
230+
rs = reverse(s)
229231
l = endof(s)
230-
t1, j2 = next(t,start(t))
232+
t1, j2 = next(t, start(t))
231233
while true
232-
i = rsearch(s,t1,i)
233-
if i == 0 return 0 end
234-
c, ii = next(rs,l-i+1)
234+
i = rsearch(s, t1, i)
235+
i == 0 && return 0
236+
c, ii = next(rs, reverseind(rs, i))
235237
j = j2; k = ii
236238
matched = true
237-
while !done(t,j)
238-
if done(rs,k)
239+
while !done(t, j)
240+
if done(rs, k)
239241
matched = false
240242
break
241243
end
242-
c, k = next(rs,k)
243-
d, j = next(t,j)
244+
c, k = next(rs, k)
245+
d, j = next(t, j)
244246
if c != d
245247
matched = false
246248
break
247249
end
248250
end
249-
if matched
250-
return nextind(s,l-k+1)
251-
end
252-
i = l-ii+1
251+
matched && return nextind(s, reverseind(s, k))
252+
i = reverseind(s, ii)
253253
end
254254
end
255255

base/strings/string.jl

-8
Original file line numberDiff line numberDiff line change
@@ -295,14 +295,6 @@ function first_utf8_byte(ch::Char)
295295
return b
296296
end
297297

298-
function reverseind(s::String, i::Integer)
299-
j = sizeof(s) + 1 - i
300-
@inbounds while is_valid_continuation(codeunit(s, j))
301-
j -= 1
302-
end
303-
return j
304-
end
305-
306298
## overload methods for efficiency ##
307299

308300
isvalid(s::String, i::Integer) =

base/strings/strings.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

33
include("strings/errors.jl")
4-
include("strings/types.jl")
4+
include("strings/substring.jl")
55
include("strings/basic.jl")
66
include("strings/search.jl")
77
include("strings/util.jl")

base/strings/types.jl base/strings/substring.jl

+15-36
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

3-
# SubString and RevString types
4-
5-
## substrings reference original strings ##
6-
73
"""
84
SubString(s::AbstractString, i::Integer, j::Integer=endof(s))
95
SubString(s::AbstractString, r::UnitRange{<:Integer})
@@ -123,32 +119,13 @@ function unsafe_convert(::Type{Ptr{R}}, s::SubString{String}) where R<:Union{Int
123119
convert(Ptr{R}, pointer(s.string)) + s.offset
124120
end
125121

126-
## reversed strings without data movement ##
127-
128-
struct RevString{T<:AbstractString} <: AbstractString
129-
string::T
130-
end
131-
132-
endof(s::RevString) = endof(s.string)
133-
length(s::RevString) = length(s.string)
134-
sizeof(s::RevString) = sizeof(s.string)
135-
136-
function next(s::RevString, i::Int)
137-
n = endof(s); j = n-i+1
138-
(s.string[j], n-prevind(s.string,j)+1)
139-
end
140-
141122
"""
142-
reverse(s::AbstractString) -> AbstractString
143-
144-
Reverses a string.
123+
reverse(s::AbstractString) -> String
145124
146-
Technically, this function reverses the codepoints in a string, and its
147-
main utility is for reversed-order string processing, especially for reversed
148-
regular-expression searches. See also [`reverseind`](@ref) to convert indices
149-
in `s` to indices in `reverse(s)` and vice-versa, and [`graphemes`](@ref)
150-
to operate on user-visible "characters" (graphemes) rather than codepoints.
151-
See also [`Iterators.reverse`](@ref) for reverse-order iteration without making a copy.
125+
Reverses a string. Technically, this function reverses the codepoints in a string. See also
126+
[`reverseind`](@ref) to convert indices in `s` to indices in `reverse(s)` and vice-versa, and
127+
[`graphemes`](@ref) to operate on user-visible "characters" (graphemes) rather than codepoints. See
128+
also [`Iterators.reverse`](@ref) for reverse-order iteration without making a copy.
152129
153130
# Examples
154131
```jldoctest
@@ -162,10 +139,15 @@ julia> join(reverse(collect(graphemes("ax̂e")))) # reverses graphemes
162139
"ex̂a"
163140
```
164141
"""
165-
reverse(s::AbstractString) = RevString(s)
166-
reverse(s::RevString) = s.string
167-
168-
## reverse an index i so that reverse(s)[i] == s[reverseind(s,i)]
142+
function reverse(s::AbstractString)::String
143+
sprint() do io
144+
i, j = start(s), endof(s)
145+
while i j
146+
c, j = s[j], prevind(s, j)
147+
write(io, c)
148+
end
149+
end
150+
end
169151

170152
"""
171153
reverseind(v, i)
@@ -185,10 +167,7 @@ julia> for i in 1:length(r)
185167
Julia
186168
```
187169
"""
188-
reverseind(s::AbstractString, i) = chr2ind(s, length(s) + 1 - ind2chr(reverse(s), i))
189-
reverseind(s::RevString, i::Integer) = endof(s) - i + 1
190-
reverseind(s::SubString{String}, i::Integer) =
191-
reverseind(s.string, nextind(s.string, endof(s.string))-s.offset-s.endof+i-1) - s.offset
170+
reverseind(s::AbstractString, i::Integer) = thisind(s, ncodeunits(s)-i+1)
192171

193172
"""
194173
repeat(s::AbstractString, r::Integer)

base/strings/util.jl

+7-8
Original file line numberDiff line numberDiff line change
@@ -190,16 +190,15 @@ julia> rstrip(a)
190190
```
191191
"""
192192
function rstrip(s::AbstractString, chars::Chars=_default_delims)
193-
r = RevString(s)
194-
i = start(r)
195-
while !done(r,i)
196-
c, j = next(r,i)
197-
if !(c in chars)
198-
return SubString(s, 1, endof(s)-i+1)
199-
end
193+
a = start(s)
194+
i = endof(s)
195+
while a  i
196+
c = s[i]
197+
j = prevind(s, i)
198+
c in chars || return SubString(s, 1:i)
200199
i = j
201200
end
202-
SubString(s, 1, 0)
201+
SubString(s, a, a-1)
203202
end
204203

205204
"""

test/replcompletions.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
using Base.REPLCompletions
44

55
let ex = quote
6-
module CompletionFoo
6+
module CompletionFoo
77
mutable struct Test_y
88
yy
99
end

test/strings/types.jl

+14-20
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

3-
## SubString, RevString and Cstring tests ##
3+
## SubString and Cstring tests ##
44

55
## SubString tests ##
66
u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
@@ -207,31 +207,13 @@ let s = "|η(α)-ϕ(κ)| < ε"
207207
@test length(SubString(s,4,11))==length(s[4:11])
208208
end
209209

210-
## Reverse strings ##
211-
212-
let rs = RevString("foobar")
213-
@test length(rs) == 6
214-
@test sizeof(rs) == 6
215-
@test isascii(rs)
216-
end
217-
218-
# issue #4586
219-
@test rsplit(RevString("ailuj"),'l') == ["ju","ia"]
220-
@test parse(Float64,RevString("64")) === 46.0
221-
222-
# reverseind
223-
for T in (String, GenericString)
210+
@testset "reverseind" for T in (String, SubString, GenericString)
224211
for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
225212
for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
226213
for c in ('X', 'δ', '\U0001d6a5')
227214
s = convert(T, string(prefix, c, suffix))
228215
r = reverse(s)
229216
ri = search(r, c)
230-
@test r == RevString(s)
231-
@test c == s[reverseind(s, ri)] == r[ri]
232-
s = RevString(s)
233-
r = reverse(s)
234-
ri = search(r, c)
235217
@test c == s[reverseind(s, ri)] == r[ri]
236218
s = convert(T, string(prefix, prefix, c, suffix, suffix))
237219
pre = convert(T, prefix)
@@ -244,6 +226,18 @@ for T in (String, GenericString)
244226
end
245227
end
246228

229+
@testset "reverseind of empty strings" begin
230+
for s in ("",
231+
SubString("", 1, 0),
232+
SubString("ab", 1, 0),
233+
SubString("ab", 2, 1),
234+
SubString("ab", 3, 2),
235+
GenericString(""))
236+
@test reverseind(s, 0) == 1
237+
@test reverseind(s, 1) == 0
238+
end
239+
end
240+
247241
## Cstring tests ##
248242

249243
# issue #13974: comparison against pointers

0 commit comments

Comments
 (0)