From 940ed1d678ce324120cc502eaf836d237e915ec0 Mon Sep 17 00:00:00 2001 From: Sukera Date: Wed, 17 Feb 2021 19:04:37 +0100 Subject: [PATCH 1/6] Widen type signature of bytes2hex --- base/strings/util.jl | 22 ++++++++++++---------- test/strings/util.jl | 4 ++++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 140c5a31194f1..1ed15a69bbfc3 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -666,12 +666,12 @@ end throw(ArgumentError("byte is not an ASCII hexadecimal digit")) """ - bytes2hex(a::AbstractArray{UInt8}) -> String - bytes2hex(io::IO, a::AbstractArray{UInt8}) + bytes2hex(itr) -> String + bytes2hex(io::IO, itr) -Convert an array `a` of bytes to its hexadecimal string representation, either -returning a `String` via `bytes2hex(a)` or writing the string to an `io` stream -via `bytes2hex(io, a)`. The hexadecimal characters are all lowercase. +Convert an iterator `itr` of bytes to its hexadecimal string representation, either +returning a `String` via `bytes2hex(itr)` or writing the string to an `io` stream +via `bytes2hex(io, itr)`. The hexadecimal characters are all lowercase. # Examples ```jldoctest @@ -689,17 +689,19 @@ julia> bytes2hex(b) """ function bytes2hex end -function bytes2hex(a::Union{Tuple{Vararg{UInt8}}, AbstractArray{UInt8}}) - b = Base.StringVector(2*length(a)) - @inbounds for (i, x) in enumerate(a) +function bytes2hex(itr) + eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8")) + b = Base.StringVector(2*length(itr)) + @inbounds for (i, x) in enumerate(itr) b[2i - 1] = hex_chars[1 + x >> 4] b[2i ] = hex_chars[1 + x & 0xf] end return String(b) end -function bytes2hex(io::IO, a::Union{Tuple{Vararg{UInt8}}, AbstractArray{UInt8}}) - for x in a +function bytes2hex(io::IO, itr) + eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8")) + for x in itr print(io, Char(hex_chars[1 + x >> 4]), Char(hex_chars[1 + x & 0xf])) end end diff --git a/test/strings/util.jl b/test/strings/util.jl index 617ff31106634..7431ce333f21d 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -376,6 +376,10 @@ end #non-hex characters @test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH") end + + @testset "Issue 39284" begin + @test "efcdabefcdab8967452301" == bytes2hex(Iterators.reverse(hex2bytes("0123456789abcdefABCDEF"))) + end end # b"" should be immutable From 70df7d76032ad28ca5a6a0956693e53641703c0a Mon Sep 17 00:00:00 2001 From: Sukera Date: Sun, 21 Feb 2021 12:00:07 +0100 Subject: [PATCH 2/6] Widen type signature of hex2bytes and make it slightly faster --- base/strings/util.jl | 44 +++++++++++++++++++++++++------------------- test/strings/util.jl | 1 + 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 1ed15a69bbfc3..32b8ed0e91d16 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -633,30 +633,36 @@ julia> hex2bytes(a) function hex2bytes end hex2bytes(s::AbstractString) = hex2bytes(String(s)) -hex2bytes(s::Union{String,AbstractVector{UInt8}}) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s) - -_firstbyteidx(s::String) = 1 -_firstbyteidx(s::AbstractVector{UInt8}) = first(eachindex(s)) -_lastbyteidx(s::String) = sizeof(s) -_lastbyteidx(s::AbstractVector{UInt8}) = lastindex(s) +hex2bytes(s::String) = hex2bytes(transcode(UInt8, s)) +hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s) """ - hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}}) + hex2bytes!(dest::AbstractVector{UInt8}, itr) -Convert an array `s` of bytes representing a hexadecimal string to its binary +Convert an iterable `itr` of bytes representing a hexadecimal string to its binary representation, similar to [`hex2bytes`](@ref) except that the output is written in-place -in `d`. The length of `s` must be exactly twice the length of `d`. -""" -function hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}}) - if 2length(d) != sizeof(s) - isodd(sizeof(s)) && throw(ArgumentError("input hex array must have even length")) - throw(ArgumentError("output array must be half length of input array")) +to `dest`. The length of `dest` must be at least half the length of `itr`. +""" +function hex2bytes!(dest::AbstractArray{UInt8}, itr) + isodd(length(itr)) && throw(ArgumentError("length of iterable must be even")) + @boundscheck 2*length(dest) < length(itr) && throw(ArgumentError("length of output array must be at least half of the length of input array")) + iszero(length(itr)) && return dest + + # we know these iterations always work because of the checks above + # as a bonus, we don't have to check the result of `iterate` for `nothing`! + # unfortunately, we have to do them explicitly here because the first `iterate` has a different signature + (x,state) = iterate(itr) + (y,state) = iterate(itr, state) + dest[firstindex(dest)] = number_from_hex(x) << 4 + number_from_hex(y) + + # incorporating the iterations into this loop via Iterators.partition was slower than the original function + @inbounds for i in Iterators.drop(eachindex(dest),1) + (x,state) = iterate(itr, state) + (y,state) = iterate(itr, state) + dest[i] = number_from_hex(x) << 4 + number_from_hex(y) end - j = first(eachindex(d)) - 1 - for i = _firstbyteidx(s):2:_lastbyteidx(s) - @inbounds d[j += 1] = number_from_hex(_nthbyte(s,i)) << 4 + number_from_hex(_nthbyte(s,i+1)) - end - return d + + return dest end @inline number_from_hex(c) = diff --git a/test/strings/util.jl b/test/strings/util.jl index 7431ce333f21d..2ca6df529ba51 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -379,6 +379,7 @@ end @testset "Issue 39284" begin @test "efcdabefcdab8967452301" == bytes2hex(Iterators.reverse(hex2bytes("0123456789abcdefABCDEF"))) + @test hex2bytes(Iterators.reverse(b"CE1A85EECc")) == UInt8[0xcc, 0xee, 0x58, 0xa1, 0xec] end end From 163d86ebf442cf63e80709dad603babd80ab3296 Mon Sep 17 00:00:00 2001 From: Sukera Date: Mon, 22 Feb 2021 19:43:02 +0100 Subject: [PATCH 3/6] Incorporate feedback about nfh and iteration in hex2bytes --- base/strings/util.jl | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 32b8ed0e91d16..dccdbefca43c9 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -632,44 +632,42 @@ julia> hex2bytes(a) """ function hex2bytes end -hex2bytes(s::AbstractString) = hex2bytes(String(s)) hex2bytes(s::String) = hex2bytes(transcode(UInt8, s)) hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s) +# special case - valid bytes are checked in the generic implementation +hex2bytes!(dest::AbstractArray{UInt8}, s::String) = hex2bytes!(dest, transcode(UInt8, s)) + """ hex2bytes!(dest::AbstractVector{UInt8}, itr) Convert an iterable `itr` of bytes representing a hexadecimal string to its binary representation, similar to [`hex2bytes`](@ref) except that the output is written in-place -to `dest`. The length of `dest` must be at least half the length of `itr`. +to `dest`. The length of `dest` must be half the length of `itr`. """ function hex2bytes!(dest::AbstractArray{UInt8}, itr) isodd(length(itr)) && throw(ArgumentError("length of iterable must be even")) - @boundscheck 2*length(dest) < length(itr) && throw(ArgumentError("length of output array must be at least half of the length of input array")) + @boundscheck 2*length(dest) != length(itr) && throw(ArgumentError("length of output array must be half of the length of input iterable")) iszero(length(itr)) && return dest - # we know these iterations always work because of the checks above - # as a bonus, we don't have to check the result of `iterate` for `nothing`! - # unfortunately, we have to do them explicitly here because the first `iterate` has a different signature - (x,state) = iterate(itr) - (y,state) = iterate(itr, state) - dest[firstindex(dest)] = number_from_hex(x) << 4 + number_from_hex(y) - - # incorporating the iterations into this loop via Iterators.partition was slower than the original function - @inbounds for i in Iterators.drop(eachindex(dest),1) - (x,state) = iterate(itr, state) - (y,state) = iterate(itr, state) - dest[i] = number_from_hex(x) << 4 + number_from_hex(y) + next = iterate(itr) + @inbounds for i in eachindex(dest) + x,state = next + y,state = iterate(itr, state) + next = iterate(itr, state) + dest[i] = nfh(x) << 4 + nfh(y) end return dest end -@inline number_from_hex(c) = - (UInt8('0') <= c <= UInt8('9')) ? c - UInt8('0') : - (UInt8('A') <= c <= UInt8('F')) ? c - (UInt8('A') - 0x0a) : - (UInt8('a') <= c <= UInt8('f')) ? c - (UInt8('a') - 0x0a) : +@inline nfh(c::Char) = nfh(UInt8(c)) +@inline function nfh(c::UInt8) + UInt8('0') <= c <= UInt8('9') && return c - UInt8('0') + c |= 0b0100000 + UInt8('a') <= c <= UInt8('f') && return c - UInt8('a') + 0x0a throw(ArgumentError("byte is not an ASCII hexadecimal digit")) +end """ bytes2hex(itr) -> String From 529878f43e17b3c8856bc0a16b259f9aef2a8bd9 Mon Sep 17 00:00:00 2001 From: Sukera Date: Tue, 23 Feb 2021 09:02:37 +0100 Subject: [PATCH 4/6] Improve error message on hex2bytes! when passing a non-ASCII string --- base/strings/util.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index dccdbefca43c9..cce4959420ad5 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -636,7 +636,11 @@ hex2bytes(s::String) = hex2bytes(transcode(UInt8, s)) hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s) # special case - valid bytes are checked in the generic implementation -hex2bytes!(dest::AbstractArray{UInt8}, s::String) = hex2bytes!(dest, transcode(UInt8, s)) +function hex2bytes!(dest::AbstractArray{UInt8}, s::String) + sizeof(s) != length(s) && throw(ArgumentError("input string must consist of hexadecimal characters only")) + + hex2bytes!(dest, transcode(UInt8, s)) +end """ hex2bytes!(dest::AbstractVector{UInt8}, itr) From 3d5bdbc990dbb0ae35e3f9091df73fb9600bf4cd Mon Sep 17 00:00:00 2001 From: Sukera Date: Tue, 2 Mar 2021 19:47:52 +0100 Subject: [PATCH 5/6] ADD compat notice, fix implementation to be more generic in regards to AbstractString --- base/strings/util.jl | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index cce4959420ad5..5cea8d280f07c 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -595,15 +595,20 @@ replace(s::AbstractString, pat_f::Pair; count=typemax(Int)) = # hex <-> bytes conversion """ - hex2bytes(s::Union{AbstractString,AbstractVector{UInt8}}) + hex2bytes(itr) -Given a string or array `s` of ASCII codes for a sequence of hexadecimal digits, returns a +Given an iterable `itr` of ASCII codes for a sequence of hexadecimal digits, returns a `Vector{UInt8}` of bytes corresponding to the binary representation: each successive pair -of hexadecimal digits in `s` gives the value of one byte in the return vector. +of hexadecimal digits in `itr` gives the value of one byte in the return vector. -The length of `s` must be even, and the returned array has half of the length of `s`. +The length of `itr` must be even, and the returned array has half of the length of `itr`. See also [`hex2bytes!`](@ref) for an in-place version, and [`bytes2hex`](@ref) for the inverse. +!!! compat "Julia 1.7" + Calling hex2bytes with iterables producing UInt8 requires + version 1.7. In earlier versions, you can collect the iterable + before calling instead. + # Examples ```jldoctest julia> s = string(12345, base = 16) @@ -632,13 +637,12 @@ julia> hex2bytes(a) """ function hex2bytes end -hex2bytes(s::String) = hex2bytes(transcode(UInt8, s)) hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s) # special case - valid bytes are checked in the generic implementation function hex2bytes!(dest::AbstractArray{UInt8}, s::String) sizeof(s) != length(s) && throw(ArgumentError("input string must consist of hexadecimal characters only")) - + hex2bytes!(dest, transcode(UInt8, s)) end @@ -648,6 +652,11 @@ end Convert an iterable `itr` of bytes representing a hexadecimal string to its binary representation, similar to [`hex2bytes`](@ref) except that the output is written in-place to `dest`. The length of `dest` must be half the length of `itr`. + +!!! compat "Julia 1.7" + Calling hex2bytes! with iterators producing UInt8 requires + version 1.7. In earlier versions, you can collect the iterable + before calling instead. """ function hex2bytes!(dest::AbstractArray{UInt8}, itr) isodd(length(itr)) && throw(ArgumentError("length of iterable must be even")) @@ -659,14 +668,15 @@ function hex2bytes!(dest::AbstractArray{UInt8}, itr) x,state = next y,state = iterate(itr, state) next = iterate(itr, state) - dest[i] = nfh(x) << 4 + nfh(y) + dest[i] = number_from_hex(x) << 4 + number_from_hex(y) end return dest end -@inline nfh(c::Char) = nfh(UInt8(c)) -@inline function nfh(c::UInt8) +@inline number_from_hex(c::AbstractChar) = number_from_hex(Char(c)) +@inline number_from_hex(c::Char) = number_from_hex(UInt8(c)) +@inline function number_from_hex(c::UInt8) UInt8('0') <= c <= UInt8('9') && return c - UInt8('0') c |= 0b0100000 UInt8('a') <= c <= UInt8('f') && return c - UInt8('a') + 0x0a @@ -681,6 +691,11 @@ Convert an iterator `itr` of bytes to its hexadecimal string representation, eit returning a `String` via `bytes2hex(itr)` or writing the string to an `io` stream via `bytes2hex(io, itr)`. The hexadecimal characters are all lowercase. +!!! compat "Julia 1.7" + Calling bytes2hex with iterators producing UInt8 requires + version 1.7. In earlier versions, you can collect the iterable + before calling instead. + # Examples ```jldoctest julia> a = string(12345, base = 16) From 1dbf84380f05ac4202a8c8574129e6ab5004f5bf Mon Sep 17 00:00:00 2001 From: Sukera Date: Fri, 2 Apr 2021 15:32:17 +0200 Subject: [PATCH 6/6] Add NEWS.md entry for #39710 --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 4831de4a33064..ffb30b81496d4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -42,6 +42,7 @@ New library features -------------------- * The optional keyword argument `context` of `sprint` can now be set to a tuple of `:key => value` pairs to specify multiple attributes. ([#39381]) +* `bytes2hex` and `hex2bytes` are no longer limited to arguments of type `Union{String,AbstractVector{UInt8}}` and now only require that they're iterable and have a length. ([#39710]) Standard library changes ------------------------