Skip to content

Commit c96c8dd

Browse files
committed
Add isvalid(Type, value) methods, to replace is_valid_*
1 parent ca2ca31 commit c96c8dd

13 files changed

+150
-69
lines changed

NEWS.md

+12
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,13 @@ Deprecated or removed
367367

368368
* Instead of `linrange`, use `linspace` ([#9666]).
369369

370+
* The functions `is_valid_char`, `is_valid_ascii`, `is_valid_utf8`, `is_valid_utf16`, and
371+
`is_valid_utf32` have been replaced by generic `isvalid` methods.
372+
The single argument form `isvalid(value)` can now be used for values of type `Char`, `ASCIIString`,
373+
`UTF8String`, `UTF16String` and `UTF32String`.
374+
The two argument form `isvalid(type, value)` can be used with the above types, with values
375+
of type `Vector{UInt8}`, `Vector{UInt16}`, `Vector{UInt32}`, and `Vector{Char}` ([#11241]).
376+
370377
Julia v0.3.0 Release Notes
371378
==========================
372379

@@ -1379,6 +1386,7 @@ Too numerous to mention.
13791386
[#9779]: https://github.com/JuliaLang/julia/issues/9779
13801387
[#9862]: https://github.com/JuliaLang/julia/issues/9862
13811388
[#9957]: https://github.com/JuliaLang/julia/issues/9957
1389+
[#10008]: https://github.com/JuliaLang/julia/issues/10008
13821390
[#10024]: https://github.com/JuliaLang/julia/issues/10024
13831391
[#10031]: https://github.com/JuliaLang/julia/issues/10031
13841392
[#10075]: https://github.com/JuliaLang/julia/issues/10075
@@ -1406,5 +1414,9 @@ Too numerous to mention.
14061414
[#10888]: https://github.com/JuliaLang/julia/issues/10888
14071415
[#10893]: https://github.com/JuliaLang/julia/issues/10893
14081416
[#10914]: https://github.com/JuliaLang/julia/issues/10914
1417+
[#10955]: https://github.com/JuliaLang/julia/issues/10955
14091418
[#10994]: https://github.com/JuliaLang/julia/issues/10994
1419+
[#11105]: https://github.com/JuliaLang/julia/issues/11105
14101420
[#11145]: https://github.com/JuliaLang/julia/issues/11145
1421+
[#11171]: https://github.com/JuliaLang/julia/issues/11171
1422+
[#11241]: https://github.com/JuliaLang/julia/issues/11241

base/ascii.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ ascii(x) = convert(ASCIIString, x)
100100
convert(::Type{ASCIIString}, s::ASCIIString) = s
101101
convert(::Type{ASCIIString}, s::UTF8String) = ascii(s.data)
102102
convert(::Type{ASCIIString}, a::Vector{UInt8}) = begin
103-
is_valid_ascii(a) || throw(ArgumentError("invalid ASCII sequence"))
103+
isvalid(ASCIIString,a) || throw(ArgumentError("invalid ASCII sequence"))
104104
return ASCIIString(a)
105105
end
106106

base/deprecated.jl

+13
Original file line numberDiff line numberDiff line change
@@ -443,3 +443,16 @@ export float32_isvalid, float64_isvalid
443443
@deprecate (&)(x::Char, y::Char) Char(UInt32(x) & UInt32(y))
444444
@deprecate (|)(x::Char, y::Char) Char(UInt32(x) | UInt32(y))
445445
@deprecate ($)(x::Char, y::Char) Char(UInt32(x) $ UInt32(y))
446+
447+
# 11241
448+
449+
@deprecate is_valid_char(ch::Char) isvalid(ch)
450+
@deprecate is_valid_char(ch::Union(Unsigned, Integer)) isvalid(Char, ch)
451+
@deprecate is_valid_ascii(str::ASCIIString) isvalid(str)
452+
@deprecate is_valid_ascii(str::Union(AbstractArray{UInt8}, UTF8String)) isvalid(ASCIIString, str)
453+
@deprecate is_valid_utf8(str::UTF8String) isvalid(str)
454+
@deprecate is_valid_utf8(str::Union(AbstractArray{UInt8}, ASCIIString)) isvalid(UTF8String, str)
455+
@deprecate is_valid_utf16(str::UTF16String) isvalid(str)
456+
@deprecate is_valid_utf16(str::AbstractArray{UInt16}) isvalid(UTF16String, str)
457+
@deprecate is_valid_utf32(str::UTF32String) isvalid(str)
458+
@deprecate is_valid_utf32(str::AbstractArray{UInt32}) isvalid(UTF32String, str)

base/exports.jl

-5
Original file line numberDiff line numberDiff line change
@@ -820,11 +820,6 @@ export
820820
ind2chr,
821821
info,
822822
is_assigned_char,
823-
is_valid_ascii,
824-
is_valid_char,
825-
is_valid_utf8,
826-
is_valid_utf16,
827-
is_valid_utf32,
828823
isalnum,
829824
isalpha,
830825
isascii,

base/io.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ end
246246

247247
function readall(s::IO)
248248
b = readbytes(s)
249-
return is_valid_ascii(b) ? ASCIIString(b) : UTF8String(b)
249+
return isvalid(ASCIIString, b) ? ASCIIString(b) : UTF8String(b)
250250
end
251251
readall(filename::AbstractString) = open(readall, filename)
252252

base/string.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -968,8 +968,8 @@ byte_string_classify(s::ByteString) = byte_string_classify(s.data)
968968
# 1: valid ASCII
969969
# 2: valid UTF-8
970970

971-
is_valid_ascii(s::Union(Array{UInt8,1},ByteString)) = byte_string_classify(s) == 1
972-
is_valid_utf8(s::Union(Array{UInt8,1},ByteString)) = byte_string_classify(s) != 0
971+
isvalid(::Type{ASCIIString}, s::Union(Array{UInt8,1},ByteString)) = byte_string_classify(s) == 1
972+
isvalid(::Type{UTF8String}, s::Union(Array{UInt8,1},ByteString)) = byte_string_classify(s) != 0
973973

974974
## multiline strings ##
975975

base/utf16.jl

+3-5
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ sizeof(s::UTF16String) = sizeof(s.data) - sizeof(UInt16)
9595
unsafe_convert{T<:Union(Int16,UInt16)}(::Type{Ptr{T}}, s::UTF16String) =
9696
convert(Ptr{T}, pointer(s))
9797

98-
function is_valid_utf16(data::AbstractArray{UInt16})
98+
function isvalid(::Type{UTF16String}, data::AbstractArray{UInt16})
9999
i = 1
100100
n = length(data) # this may include NULL termination; that's okay
101101
while i < n # check for unpaired surrogates
@@ -110,10 +110,8 @@ function is_valid_utf16(data::AbstractArray{UInt16})
110110
return i > n || !utf16_is_surrogate(data[i])
111111
end
112112

113-
is_valid_utf16(s::UTF16String) = is_valid_utf16(s.data)
114-
115113
function convert(::Type{UTF16String}, data::AbstractVector{UInt16})
116-
!is_valid_utf16(data) && throw(ArgumentError("invalid UTF16 data"))
114+
!isvalid(UTF16String, data) && throw(ArgumentError("invalid UTF16 data"))
117115
len = length(data)
118116
d = Array(UInt16, len + 1)
119117
d[end] = 0 # NULL terminate
@@ -144,7 +142,7 @@ function convert(T::Type{UTF16String}, bytes::AbstractArray{UInt8})
144142
copy!(d,1, data,1, length(data)) # assume native byte order
145143
end
146144
d[end] = 0 # NULL terminate
147-
!is_valid_utf16(d) && throw(ArgumentError("invalid UTF16 data"))
145+
!isvalid(UTF16String, d) && throw(ArgumentError("invalid UTF16 data"))
148146
UTF16String(d)
149147
end
150148

base/utf32.jl

+5-4
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,14 @@ function convert(T::Type{UTF32String}, bytes::AbstractArray{UInt8})
9292
UTF32String(d)
9393
end
9494

95-
function is_valid_utf32(s::Union(Vector{Char}, Vector{UInt32}))
96-
for i=1:length(s)
97-
@inbounds if !is_valid_char(reinterpret(UInt32, s[i])) ; return false ; end
95+
function isvalid(::Type{UTF32String}, str::Union(Vector{Char}, Vector{UInt32}))
96+
for i=1:length(str)
97+
@inbounds if !isvalid(Char, reinterpret(UInt32, str[i])) ; return false ; end
9898
end
9999
return true
100100
end
101-
is_valid_utf32(s::UTF32String) = is_valid_utf32(s.data)
101+
isvalid(str::Vector{Char}) = isvalid(UTF32String, str)
102+
isvalid{T<:Union(ASCIIString,UTF8String,UTF16String,UTF32String)}(str::T) = isvalid(T, str.data)
102103

103104
utf32(p::Ptr{Char}, len::Integer) = utf32(pointer_to_array(p, len))
104105
utf32(p::Union(Ptr{UInt32}, Ptr{Int32}), len::Integer) = utf32(convert(Ptr{Char}, p), len)

base/utf8.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ write(io::IO, s::UTF8String) = write(io, s.data)
212212
utf8(x) = convert(UTF8String, x)
213213
convert(::Type{UTF8String}, s::UTF8String) = s
214214
convert(::Type{UTF8String}, s::ASCIIString) = UTF8String(s.data)
215-
convert(::Type{UTF8String}, a::Array{UInt8,1}) = is_valid_utf8(a) ? UTF8String(a) : throw(ArgumentError("invalid UTF-8 sequence"))
215+
convert(::Type{UTF8String}, a::Array{UInt8,1}) = isvalid(UTF8String, a) ? UTF8String(a) : throw(ArgumentError("invalid UTF-8 sequence"))
216216
function convert(::Type{UTF8String}, a::Array{UInt8,1}, invalids_as::AbstractString)
217217
l = length(a)
218218
idx = 1

base/utf8proc.jl

+7-5
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,21 @@
33
# Various Unicode functionality from the utf8proc library
44
module UTF8proc
55

6-
import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype, start, next, done, convert
6+
import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype, start, next, done, convert, isvalid
77

88
export isgraphemebreak
99

1010
# also exported by Base:
11-
export normalize_string, graphemes, is_valid_char, is_assigned_char, charwidth,
11+
export normalize_string, graphemes, is_assigned_char, charwidth, isvalid,
1212
islower, isupper, isalpha, isdigit, isnumber, isalnum,
1313
iscntrl, ispunct, isspace, isprint, isgraph, isblank
1414

1515
# whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff
16-
is_valid_char(ch::Unsigned) = !Bool((ch-0xd800<0x800)|(ch>0x10ffff))
17-
is_valid_char(ch::Integer) = is_valid_char(Unsigned(ch))
18-
is_valid_char(ch::Char) = is_valid_char(UInt32(ch))
16+
isvalid(::Type{Char}, ch::Unsigned) = !((ch - 0xd800 < 0x800) | (ch > 0x10ffff))
17+
isvalid(::Type{Char}, ch::Integer) = isvalid(Char, Unsigned(ch))
18+
isvalid(::Type{Char}, ch::Char) = isvalid(Char, UInt32(ch))
19+
20+
isvalid(ch::Char) = isvalid(Char, ch)
1921

2022
# utf8 category constants
2123
const UTF8PROC_CATEGORY_CN = 0

doc/manual/strings.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,14 @@ convert an integer value back to a :obj:`Char` just as easily:
9999
Not all integer values are valid Unicode code points, but for
100100
performance, the :func:`Char` conversion does not check that every character
101101
value is valid. If you want to check that each converted value is a
102-
valid code point, use the :func:`is_valid_char` function:
102+
valid code point, use the :func:`isvalid` function:
103103

104104
.. doctest::
105105

106106
julia> Char(0x110000)
107107
'\U110000'
108108

109-
julia> is_valid_char(0x110000)
109+
julia> isvalid(Char, 0x110000)
110110
false
111111

112112
As of this writing, the valid Unicode code points are ``U+00`` through

doc/stdlib/strings.rst

+10-12
Original file line numberDiff line numberDiff line change
@@ -109,17 +109,19 @@
109109
even though they may contain more than one codepoint; for example
110110
a letter combined with an accent mark is a single grapheme.)
111111

112-
.. function:: is_valid_ascii(s) -> Bool
112+
.. function:: isvalid(value) -> Bool
113113

114-
Returns true if the argument (``ASCIIString``, ``UTF8String``, or byte vector) is valid ASCII, false otherwise.
114+
Returns true if the given value is valid for its type,
115+
which currently can be one of ``Char``, ``ASCIIString``, ``UTF8String``, ``UTF16String``, or ``UTF32String``
115116

116-
.. function:: is_valid_utf8(s) -> Bool
117+
.. function:: isvalid(T, value) -> Bool
117118

118-
Returns true if the argument (``ASCIIString``, ``UTF8String``, or byte vector) is valid UTF-8, false otherwise.
119-
120-
.. function:: is_valid_char(c) -> Bool
121-
122-
Returns true if the given char or integer is a valid Unicode code point.
119+
Returns true if the given value is valid for that type.
120+
Types currently can be ``Char``, ``ASCIIString``, ``UTF8String``, ``UTF16String``, or ``UTF32String``
121+
Values for ``Char`` can be of type ``Char`` or ``UInt32``
122+
Values for ``ASCIIString`` and ``UTF8String`` can be of that type, or ``Vector{UInt8}``
123+
Values for ``UTF16String`` can be ``UTF16String`` or ``Vector{UInt16}``
124+
Values for ``UTF32String`` can be ``UTF32String``, ``Vector{Char}`` or ``Vector{UInt32}``
123125

124126
.. function:: is_assigned_char(c) -> Bool
125127

@@ -379,10 +381,6 @@
379381

380382
Create a string from the address of a NUL-terminated UTF-16 string. A copy is made; the pointer can be safely freed. If ``length`` is specified, the string does not have to be NUL-terminated.
381383

382-
.. function:: is_valid_utf16(s) -> Bool
383-
384-
Returns true if the argument (``UTF16String`` or ``UInt16`` array) is valid UTF-16.
385-
386384
.. function:: utf32(s)
387385

388386
Create a UTF-32 string from a byte array, array of ``UInt32``, or

0 commit comments

Comments
 (0)