@@ -152,12 +152,16 @@ function _decompose_char!(codepoint::Union{Integer,Char}, dest::Vector{UInt32},
152
152
end
153
153
154
154
"""
155
- isequal_normalized(s1::AbstractString, s2::AbstractString; casefold=false, stripmark=false)
155
+ isequal_normalized(s1::AbstractString, s2::AbstractString; casefold=false, stripmark=false, chartransform=identity )
156
156
157
157
Return whether `s1` and `s2` are canonically equivalent Unicode strings. If `casefold=true`,
158
158
ignores case (performs Unicode case-folding); if `stripmark=true`, strips diacritical marks
159
159
and other combining characters.
160
160
161
+ As with [`Unicode.normalize`](@ref), you can also pass an arbitrary
162
+ function via the `chartransform` keyword (mapping `Integer` codepoints to codepoints)
163
+ to perform custom normalizations, such as [`Unicode.julia_chartransform`](@ref).
164
+
161
165
# Examples
162
166
163
167
For example, the string `"noël"` can be constructed in two canonically equivalent ways
@@ -184,7 +188,7 @@ julia> isequal_normalized(s1, "NOËL", casefold=true)
184
188
true
185
189
```
186
190
"""
187
- function isequal_normalized (s1:: AbstractString , s2:: AbstractString ; casefold:: Bool = false , stripmark:: Bool = false )
191
+ function isequal_normalized (s1:: AbstractString , s2:: AbstractString ; casefold:: Bool = false , stripmark:: Bool = false , chartransform = identity )
188
192
function decompose_next_char! (c, state, d, options, s)
189
193
n = _decompose_char! (c, d, options)
190
194
if n > length (d) # may be possible in future Unicode versions?
@@ -202,11 +206,11 @@ function isequal_normalized(s1::AbstractString, s2::AbstractString; casefold::Bo
202
206
while true
203
207
if j1 > n1
204
208
i1 === nothing && return i2 === nothing && j2 > n2
205
- j1, n1, i1 = decompose_next_char! (UInt32 (i1[1 ]), i1[2 ], d1, options, s1)
209
+ j1, n1, i1 = decompose_next_char! (chartransform ( UInt32 (i1[1 ]) ), i1[2 ], d1, options, s1)
206
210
end
207
211
if j2 > n2
208
212
i2 === nothing && return false
209
- j2, n2, i2 = decompose_next_char! (UInt32 (i2[1 ]), i2[2 ], d2, options, s2)
213
+ j2, n2, i2 = decompose_next_char! (chartransform ( UInt32 (i2[1 ]) ), i2[2 ], d2, options, s2)
210
214
end
211
215
d1[j1] == d2[j2] || return false
212
216
j1 += 1 ; j2 += 1
0 commit comments