Skip to content

Commit c67c828

Browse files
committed
Support backcapture references in 's replacement string
1 parent a4a75dc commit c67c828

File tree

5 files changed

+108
-16
lines changed

5 files changed

+108
-16
lines changed

base/exports.jl

+1
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,7 @@ export
13371337
# notation for certain types
13381338
@b_str, # byte vector
13391339
@r_str, # regex
1340+
@s_str, # regex substitution string
13401341
@v_str, # version number
13411342

13421343
# documentation

base/pcre.jl

+17
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,23 @@ function substring_number_from_name(re, name)
140140
(Ptr{Void}, Cstring), re, name)
141141
end
142142

143+
function substring_length_bynumber(match_data, number)
144+
s = Ref{Csize_t}()
145+
rc = ccall((:pcre2_substring_length_bynumber_8, PCRE_LIB), Cint,
146+
(Ptr{Void}, UInt32, Ref{Csize_t}), match_data, number, s)
147+
rc < 0 && error("PCRE error: $(err_message(rc))")
148+
convert(Int, s[])
149+
end
150+
151+
function substring_copy_bynumber(match_data, number, buf, buf_size)
152+
s = Ref{Csize_t}(buf_size)
153+
rc = ccall((:pcre2_substring_copy_bynumber_8, PCRE_LIB), Cint,
154+
(Ptr{Void}, UInt32, Ptr{UInt8}, Ref{Csize_t}),
155+
match_data, number, buf, s)
156+
rc < 0 && error("PCRE error: $(err_message(rc))")
157+
convert(Int, s[])
158+
end
159+
143160
function capture_names(re)
144161
name_count = info(re, INFO_NAMECOUNT, UInt32)
145162
name_entry_size = info(re, INFO_NAMEENTRYSIZE, UInt32)

base/regex.jl

+78-6
Original file line numberDiff line numberDiff line change
@@ -208,14 +208,86 @@ end
208208
search(s::AbstractString, r::Regex, idx::Integer) =
209209
throw(ArgumentError("regex search is only available for bytestrings; use bytestring(s) to convert"))
210210
search(s::AbstractString, r::Regex) = search(s,r,start(s))
211-
_search(s::AbstractString, r::Regex, idx::Integer) = match(r, s, idx)
212211

213-
first(m::RegexMatch) = m.offset
214-
last(m::RegexMatch) = m.offset + length(m.match) - 1
212+
immutable SubstitutionString{T<:AbstractString} <: AbstractString
213+
string::T
214+
end
215+
216+
endof(s::SubstitutionString) = endof(s.string)
217+
next(s::SubstitutionString, idx::Int) = next(s.string, idx)
218+
function show(io::IO, s::SubstitutionString)
219+
print(io, "s")
220+
show(io, s.string)
221+
end
222+
223+
macro s_str(string) SubstitutionString(string) end
224+
225+
replace_err(repl) = error("Bad replacement string: $repl")
226+
227+
function _write_capture(io, re, group)
228+
len = PCRE.substring_length_bynumber(re.match_data, group)
229+
ensureroom(io, len+1)
230+
PCRE.substring_copy_bynumber(re.match_data, group,
231+
pointer(io.data, io.ptr), len+1)
232+
io.ptr += len
233+
end
215234

216-
function _replace(io, repl::AbstractString, str, m::RegexMatch)
217-
write(io, "repl")
218-
write(io, repl)
235+
function _replace(io, repl_s::SubstitutionString, str, r, re)
236+
const SUB_CHAR = '\\'
237+
const GROUP_CHAR = 'g'
238+
const LBRACKET = '<'
239+
const RBRACKET = '>'
240+
repl = repl_s.string
241+
i = start(repl)
242+
e = endof(repl)
243+
while i <= e
244+
if repl[i] == SUB_CHAR
245+
next_i = nextind(repl, i)
246+
next_i > e && replace_err(repl)
247+
if repl[next_i] == SUB_CHAR
248+
write(io, SUB_CHAR, SUB_CHAR)
249+
i = nextind(repl, next_i)
250+
elseif isnumber(repl[next_i])
251+
group = parse(Int, repl[next_i])
252+
i = nextind(repl, next_i)
253+
while i <= e
254+
if isnumber(repl[i])
255+
group = 10group + parse(Int, repl[i])
256+
i = nextind(repl, i)
257+
else
258+
break
259+
end
260+
end
261+
_write_capture(io, re, group)
262+
elseif repl[next_i] == GROUP_CHAR
263+
i = nextind(repl, next_i)
264+
if i > e || repl[i] != LBRACKET
265+
replace_err(repl)
266+
end
267+
i = nextind(repl, i)
268+
i > e && replace_err(repl)
269+
groupstart = i
270+
while repl[i] != RBRACKET
271+
i = nextind(repl, i)
272+
i > e && replace_err(repl)
273+
end
274+
#groupname = repl[groupstart:prevind(repl, i)]
275+
groupname = SubString(repl, groupstart, prevind(repl, i))
276+
if isnumber(groupname)
277+
_write_capture(io, re, parse(Int, groupname))
278+
else
279+
_write_capture(io, re,
280+
PCRE.substring_number_from_name(re.regex, groupname))
281+
end
282+
i = nextind(repl, i)
283+
else
284+
replace_err(repl)
285+
end
286+
else
287+
write(io, repl[i])
288+
i = nextind(repl, i)
289+
end
290+
end
219291
end
220292

221293
immutable RegexMatchIterator

base/string.jl

+5-7
Original file line numberDiff line numberDiff line change
@@ -1327,11 +1327,9 @@ function _rsplit{T<:AbstractString,U<:Array}(str::T, splitter, limit::Integer, k
13271327
end
13281328
#rsplit(str::AbstractString) = rsplit(str, _default_delims, 0, false)
13291329

1330-
_replace(io, repl, str, r) = write(io, repl)
1331-
_replace(io, repl::Function, str, r) =
1330+
_replace(io, repl, str, r, pattern) = write(io, repl)
1331+
_replace(io, repl::Function, str, r, pattern) =
13321332
write(io, repl(SubString(str, first(r), last(r))))
1333-
_search(str, pattern, offset) = search(str, pattern, offset)
1334-
13351333

13361334
function replace(str::ByteString, pattern, repl, limit::Integer)
13371335
n = 1
@@ -1340,10 +1338,11 @@ function replace(str::ByteString, pattern, repl, limit::Integer)
13401338
r = search(str,pattern,i)
13411339
j, k = first(r), last(r)
13421340
out = IOBuffer()
1341+
ensureroom(out, floor(Int, 1.2sizeof(str)))
13431342
while j != 0
13441343
if i == a || i <= k
13451344
write_sub(out, str.data, i, j-i)
1346-
_replace(out, repl, str, r)
1345+
_replace(out, repl, str, r, pattern)
13471346
end
13481347
if k<j
13491348
i = j
@@ -1354,8 +1353,7 @@ function replace(str::ByteString, pattern, repl, limit::Integer)
13541353
if j > e
13551354
break
13561355
end
1357-
r = _search(str,pattern,k)
1358-
r == nothing && break
1356+
r = search(str,pattern,k)
13591357
j, k = first(r), last(r)
13601358
n == limit && break
13611359
n += 1

test/regex.jl

+7-3
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ show(buf, r"")
3939
@test_throws ArgumentError search(utf32("this is a test"), r"test")
4040

4141
# Named subpatterns
42-
m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
43-
@test (m[:a], m[2], m["b"]) == ("x", "y", "z")
44-
@test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
42+
let m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
43+
@test (m[:a], m[2], m["b"]) == ("x", "y", "z")
44+
@test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
45+
end
46+
47+
# Backcapture reference in substitution string
48+
@test replace("abcde", r"(..)(?P<byname>d)", s"\g<byname>xy\1") == "adxybce"

0 commit comments

Comments
 (0)