Skip to content

Commit b8fc98a

Browse files
committed
Support backcapture references in replacement strings
1 parent 4ebdea0 commit b8fc98a

File tree

7 files changed

+133
-9
lines changed

7 files changed

+133
-9
lines changed

base/exports.jl

+1
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,7 @@ export
13371337
# notation for certain types
13381338
@b_str, # byte vector
13391339
@r_str, # regex
1340+
@s_str, # regex substitution string
13401341
@v_str, # version number
13411342

13421343
# documentation

base/pcre.jl

+17
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,23 @@ function substring_number_from_name(re, name)
140140
(Ptr{Void}, Cstring), re, name)
141141
end
142142

143+
function substring_length_bynumber(match_data, number)
144+
s = Ref{Csize_t}()
145+
rc = ccall((:pcre2_substring_length_bynumber_8, PCRE_LIB), Cint,
146+
(Ptr{Void}, UInt32, Ref{Csize_t}), match_data, number, s)
147+
rc < 0 && error("PCRE error: $(err_message(rc))")
148+
convert(Int, s[])
149+
end
150+
151+
function substring_copy_bynumber(match_data, number, buf, buf_size)
152+
s = Ref{Csize_t}(buf_size)
153+
rc = ccall((:pcre2_substring_copy_bynumber_8, PCRE_LIB), Cint,
154+
(Ptr{Void}, UInt32, Ptr{UInt8}, Ref{Csize_t}),
155+
match_data, number, buf, s)
156+
rc < 0 && error("PCRE error: $(err_message(rc))")
157+
convert(Int, s[])
158+
end
159+
143160
function capture_names(re)
144161
name_count = info(re, INFO_NAMECOUNT, UInt32)
145162
name_entry_size = info(re, INFO_NAMEENTRYSIZE, UInt32)

base/regex.jl

+83
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,89 @@ search(s::AbstractString, r::Regex, idx::Integer) =
209209
throw(ArgumentError("regex search is only available for bytestrings; use bytestring(s) to convert"))
210210
search(s::AbstractString, r::Regex) = search(s,r,start(s))
211211

212+
immutable SubstitutionString{T<:AbstractString} <: AbstractString
213+
string::T
214+
end
215+
216+
endof(s::SubstitutionString) = endof(s.string)
217+
next(s::SubstitutionString, idx::Int) = next(s.string, idx)
218+
function show(io::IO, s::SubstitutionString)
219+
print(io, "s")
220+
show(io, s.string)
221+
end
222+
223+
macro s_str(string) SubstitutionString(string) end
224+
225+
replace_err(repl) = error("Bad replacement string: $repl")
226+
227+
function _write_capture(io, re, group)
228+
len = PCRE.substring_length_bynumber(re.match_data, group)
229+
ensureroom(io, len+1)
230+
PCRE.substring_copy_bynumber(re.match_data, group,
231+
pointer(io.data, io.ptr), len+1)
232+
io.ptr += len
233+
io.size = max(io.size, io.ptr - 1)
234+
end
235+
236+
function _replace(io, repl_s::SubstitutionString, str, r, re)
237+
const SUB_CHAR = '\\'
238+
const GROUP_CHAR = 'g'
239+
const LBRACKET = '<'
240+
const RBRACKET = '>'
241+
repl = repl_s.string
242+
i = start(repl)
243+
e = endof(repl)
244+
while i <= e
245+
if repl[i] == SUB_CHAR
246+
next_i = nextind(repl, i)
247+
next_i > e && replace_err(repl)
248+
if repl[next_i] == SUB_CHAR
249+
write(io, SUB_CHAR, repl[next_i])
250+
i = nextind(repl, next_i)
251+
elseif isnumber(repl[next_i])
252+
group = parse(Int, repl[next_i])
253+
i = nextind(repl, next_i)
254+
while i <= e
255+
if isnumber(repl[i])
256+
group = 10group + parse(Int, repl[i])
257+
i = nextind(repl, i)
258+
else
259+
break
260+
end
261+
end
262+
_write_capture(io, re, group)
263+
elseif repl[next_i] == GROUP_CHAR
264+
i = nextind(repl, next_i)
265+
if i > e || repl[i] != LBRACKET
266+
replace_err(repl)
267+
end
268+
i = nextind(repl, i)
269+
i > e && replace_err(repl)
270+
groupstart = i
271+
while repl[i] != RBRACKET
272+
i = nextind(repl, i)
273+
i > e && replace_err(repl)
274+
end
275+
# TODO: avoid this allocation
276+
groupname = SubString(repl, groupstart, prevind(repl, i))
277+
if isnumber(groupname)
278+
_write_capture(io, re, parse(Int, groupname))
279+
else
280+
group = PCRE.substring_number_from_name(re.regex, groupname)
281+
group < 0 && replace_err("Group $groupname not found in regex $re")
282+
_write_capture(io, re, group)
283+
end
284+
i = nextind(repl, i)
285+
else
286+
replace_err(repl)
287+
end
288+
else
289+
write(io, repl[i])
290+
i = nextind(repl, i)
291+
end
292+
end
293+
end
294+
212295
immutable RegexMatchIterator
213296
regex::Regex
214297
string::UTF8String

base/strings/util.jl

+9-5
Original file line numberDiff line numberDiff line change
@@ -173,17 +173,22 @@ function _rsplit{T<:AbstractString,U<:Array}(str::T, splitter, limit::Integer, k
173173
end
174174
#rsplit(str::AbstractString) = rsplit(str, _default_delims, 0, false)
175175

176-
function replace(str::ByteString, pattern, repl::Function, limit::Integer)
176+
_replace(io, repl, str, r, pattern) = write(io, repl)
177+
_replace(io, repl::Function, str, r, pattern) =
178+
write(io, repl(SubString(str, first(r), last(r))))
179+
180+
function replace(str::ByteString, pattern, repl, limit::Integer)
177181
n = 1
178182
e = endof(str)
179183
i = a = start(str)
180184
r = search(str,pattern,i)
181185
j, k = first(r), last(r)
182186
out = IOBuffer()
187+
ensureroom(out, floor(Int, 1.2sizeof(str)))
183188
while j != 0
184189
if i == a || i <= k
185-
write(out, SubString(str,i,prevind(str,j)))
186-
write(out, string(repl(SubString(str,j,k))))
190+
write_sub(out, str.data, i, j-i)
191+
_replace(out, repl, str, r, pattern)
187192
end
188193
if k<j
189194
i = j
@@ -202,8 +207,7 @@ function replace(str::ByteString, pattern, repl::Function, limit::Integer)
202207
write(out, SubString(str,i))
203208
takebuf_string(out)
204209
end
205-
replace(s::AbstractString, pat, f::Function, n::Integer) = replace(bytestring(s), pat, f, n)
206-
replace(s::AbstractString, pat, r, n::Integer) = replace(s, pat, x->r, n)
210+
replace(s::AbstractString, pat, f, n::Integer) = replace(bytestring(s), pat, f, n)
207211
replace(s::AbstractString, pat, r) = replace(s, pat, r, 0)
208212

209213
# hex <-> bytes conversion

doc/manual/strings.rst

+14
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,20 @@ with the number or name of the capture group::
707707
julia> m[2]
708708
"45"
709709

710+
Captures can be referenced in a substitution string when using :func:`replace`
711+
by using ``\n`` to refer to the `n`th capture group and prefixing the
712+
subsitution string with ``s``. Capture group 0 refers to the entire match object.
713+
Named capture groups can be referenced in the substitution with ``g<groupname>``.
714+
For example::
715+
716+
julia> replace("first second", r"(\w+) (?P<agroup>\w+), s"\g<agroup> \1")
717+
julia> "second first"
718+
719+
Numbered capture groups can also be referenced as ``\g<n>`` for disambiguation,
720+
as in::
721+
julia> replace("a", r".", "\g<0>1")
722+
julia> a1
723+
710724
You can modify the behavior of regular expressions by some combination
711725
of the flags ``i``, ``m``, ``s``, and ``x`` after the closing double
712726
quote mark. These flags have the same meaning as they do in Perl, as

doc/stdlib/strings.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@
177177

178178
.. function:: replace(string, pat, r[, n])
179179

180-
Search for the given pattern ``pat``, and replace each occurrence with ``r``. If ``n`` is provided, replace at most ``n`` occurrences. As with search, the second argument may be a single character, a vector or a set of characters, a string, or a regular expression. If ``r`` is a function, each occurrence is replaced with ``r(s)`` where ``s`` is the matched substring.
180+
Search for the given pattern ``pat``, and replace each occurrence with ``r``. If ``n`` is provided, replace at most ``n`` occurrences. As with search, the second argument may be a single character, a vector or a set of characters, a string, or a regular expression. If ``r`` is a function, each occurrence is replaced with ``r(s)`` where ``s`` is the matched substring. If ``pat`` is a regular expression and ``r`` is a ``SubstitutionString``, then capture group references in ``r`` are replaced with the corresponding matched text.
181181

182182
.. function:: split(string, [chars]; limit=0, keep=true)
183183

test/regex.jl

+8-3
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ show(buf, r"")
3939
@test_throws ArgumentError search(utf32("this is a test"), r"test")
4040

4141
# Named subpatterns
42-
m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
43-
@test (m[:a], m[2], m["b"]) == ("x", "y", "z")
44-
@test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
42+
let m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
43+
@test (m[:a], m[2], m["b"]) == ("x", "y", "z")
44+
@test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
45+
end
46+
47+
# Backcapture reference in substitution string
48+
@test replace("abcde", r"(..)(?P<byname>d)", s"\g<byname>xy\1") == "adxybce"
49+
@test_throws ErrorException replace("a", r"(?P<x>)", s"\g<y>")

0 commit comments

Comments
 (0)