Skip to content

Commit 84300a6

Browse files
authored
clean up and export crc32c function (#22274)
* clean up and export crc32c function * added PR to NEWS * restore crc32 of String, add crc32c(io) to read all of a stream, add optimized open(crc32c, filename), make IOBuffer checksums consistent with other streams * use crc32c block size of 8192*3, matching the underling C library * optimized IOBuffer crc32c
1 parent 70be8ab commit 84300a6

File tree

8 files changed

+93
-19
lines changed

8 files changed

+93
-19
lines changed

NEWS.md

+2
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ Library improvements
5959
* `resize!` and `sizehint!` methods no longer over-reserve memory when the
6060
requested array size is more than double of its current size ([#22038]).
6161

62+
* The `crc32c` function for CRC-32c checksums is now exported ([#22274]).
63+
6264
* The output of `versioninfo()` is now controlled with keyword arguments ([#21974]).
6365

6466
* The function `LibGit2.set_remote_url` now always sets both the fetch and push URLs for a

base/exports.jl

+1
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,7 @@ export
10471047
atexit,
10481048
atreplinit,
10491049
clipboard,
1050+
crc32c,
10501051
exit,
10511052
ntuple,
10521053
quit,

base/iobuffer.jl

+12
Original file line numberDiff line numberDiff line change
@@ -412,3 +412,15 @@ function readuntil(io::AbstractIOBuffer, delim::UInt8)
412412
end
413413
A
414414
end
415+
416+
# copy-free crc32c of IOBuffer:
417+
function crc32c(io::IOBuffer, nb::Integer, crc::UInt32=0x00000000)
418+
nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
419+
io.readable || throw(ArgumentError("read failed, IOBuffer is not readable"))
420+
n = min(nb, nb_available(io))
421+
n == 0 && return crc
422+
crc = unsafe_crc32c(pointer(io.data, io.ptr), n, crc)
423+
io.ptr += n
424+
return crc
425+
end
426+
crc32c(io::IOBuffer, crc::UInt32=0x00000000) = crc32c(io, nb_available(io), crc)

base/loading.jl

+3-10
Original file line numberDiff line numberDiff line change
@@ -683,10 +683,7 @@ function compilecache(name::String)
683683
if success(create_expr_cache(path, cachefile, concrete_deps))
684684
# append checksum to the end of the .ji file:
685685
open(cachefile, "a+") do f
686-
data = Mmap.mmap(f, Vector{UInt8}, filesize(f), 0)
687-
checksum = crc32c(data)
688-
finalize(data)
689-
write(f, hton(checksum))
686+
write(f, hton(crc32c(seekstart(f))))
690687
end
691688
else
692689
error("Failed to precompile $name to $cachefile.")
@@ -809,12 +806,8 @@ function stale_cachefile(modpath::String, cachefile::String)
809806
end
810807

811808
# finally, verify that the cache file has a valid checksum
812-
data = Mmap.mmap(io, Vector{UInt8}, filesize(io), 0)
813-
# checksum = UInt32 read in bigendian format from the last 4 bytes:
814-
checksum = UInt32(data[end]) + UInt32(data[end-1])<<8 + UInt32(data[end-2])<<16 + UInt32(data[end-3])<<24
815-
crc = crc32c(@view(data[1:end-4]))
816-
finalize(data)
817-
if checksum != crc
809+
crc = crc32c(seekstart(io), filesize(io)-4)
810+
if crc != ntoh(read(io, UInt32))
818811
DEBUG_LOADING[] && info("JL_DEBUG_LOADING: Rejecting cache file $cachefile because it has an invalid checksum.")
819812
return true
820813
end

base/util.jl

+42-6
Original file line numberDiff line numberDiff line change
@@ -765,10 +765,6 @@ if is_windows()
765765

766766
end
767767

768-
# compute sizeof correctly for strings, arrays, and subarrays of bytes
769-
_sizeof(a) = sizeof(a)
770-
_sizeof(a::FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N) = length(a)
771-
772768
"""
773769
crc32c(data, crc::UInt32=0x00000000)
774770
@@ -778,9 +774,49 @@ a starting `crc` integer to be mixed in with the checksum. The `crc` parameter
778774
can be used to compute a checksum on data divided into chunks: performing
779775
`crc32c(data2, crc32c(data1))` is equivalent to the checksum of `[data1; data2]`.
780776
(Technically, a little-endian checksum is computed.)
777+
778+
There is also a method `crc32c(io, nb, crc)` to checksum `nb` bytes from
779+
a stream `io`, or `crc32c(io, crc)` to checksum all the remaining bytes.
780+
Hence you can do [`open(crc32c, filename)`](@ref) to checksum an entire file,
781+
or `crc32c(seekstart(buf))` to checksum an [`IOBuffer`](@ref) without
782+
calling [`take!`](@ref).
783+
784+
For a `String`, note that the result is specific to the UTF-8 encoding
785+
(a different checksum would be obtained from a different Unicode encoding).
786+
To checksum an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`,
787+
but note that the result may be endian-dependent.
781788
"""
782-
crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N,String}, crc::UInt32=0x00000000) =
783-
ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, _sizeof(a))
789+
function crc32c end
790+
791+
unsafe_crc32c(a, n, crc) = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n)
792+
793+
crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) =
794+
unsafe_crc32c(a, length(a), crc)
795+
796+
crc32c(s::String, crc::UInt32=0x00000000) = unsafe_crc32c(s, sizeof(s), crc)
797+
798+
"""
799+
crc32c(io::IO, [nb::Integer,] crc::UInt32=0x00000000)
800+
801+
Read up to `nb` bytes from `io` and return the CRC-32c checksum, optionally
802+
mixed with a starting `crc` integer. If `nb` is not supplied, then
803+
`io` will be read until the end of the stream.
804+
"""
805+
function crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000)
806+
nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
807+
# use block size 24576=8192*3, since that is the threshold for
808+
# 3-way parallel SIMD code in the underlying jl_crc32c C function.
809+
buf = Array{UInt8}(min(nb, 24576))
810+
while !eof(io) && nb > 24576
811+
n = readbytes!(io, buf)
812+
crc = unsafe_crc32c(buf, n, crc)
813+
nb -= n
814+
end
815+
return unsafe_crc32c(buf, readbytes!(io, buf, min(nb, length(buf))), crc)
816+
end
817+
crc32c(io::IO, crc::UInt32=0x00000000) = crc32c(io, typemax(Int64), crc)
818+
crc32c(io::IOStream, crc::UInt32=0x00000000) = crc32c(io, filesize(io)-position(io), crc)
819+
784820

785821
"""
786822
@kwdef typedef

doc/src/stdlib/arrays.md

+1
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ Base.cumprod!
131131
Base.cumsum
132132
Base.cumsum!
133133
Base.cumsum_kbn
134+
Base.crc32c
134135
Base.LinAlg.diff
135136
Base.LinAlg.gradient
136137
Base.rot180

doc/src/stdlib/io-network.md

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Base.take!(::Base.AbstractIOBuffer)
1212
Base.fdio
1313
Base.flush
1414
Base.close
15+
Base.crc32c(::IO, ::Integer, ::UInt32)
1516
Base.write
1617
Base.read
1718
Base.read!

test/misc.jl

+31-3
Original file line numberDiff line numberDiff line change
@@ -566,14 +566,42 @@ end
566566
for force_software_crc in (1,0)
567567
ccall(:jl_crc32c_init, Void, (Cint,), force_software_crc)
568568
for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)]
569-
@test Base.crc32c(UInt8[1:n;]) == crc
569+
@test crc32c(UInt8[1:n;]) == crc == crc32c(String(UInt8[1:n;]))
570570
end
571571
# test that crc parameter is equivalent to checksum of concatenated data,
572572
# and test crc of subarrays:
573573
a = UInt8[1:255;]
574-
crc_256 = Base.crc32c(UInt8[1:255;])
574+
crc_256 = crc32c(a)
575575
@views for n = 1:255
576-
@test Base.crc32c(a[n+1:end], Base.crc32c(a[1:n])) == crc_256
576+
@test crc32c(a[n+1:end], crc32c(a[1:n])) == crc_256
577+
end
578+
579+
@test crc32c(IOBuffer(a)) == crc_256
580+
let buf = IOBuffer()
581+
write(buf, a[1:3])
582+
@test crc32c(seekstart(buf)) == crc32c(a[1:3])
583+
@test crc32c(buf) == 0x00000000
584+
@test crc32c(seek(buf, 1)) == crc32c(a[2:3])
585+
@test crc32c(seek(buf, 0), 2) == crc32c(a[1:2])
586+
@test crc32c(buf) == crc32c(a[3:3])
587+
end
588+
589+
let f = tempname()
590+
try
591+
write(f, a)
592+
@test open(crc32c, f) == crc_256
593+
open(f, "r") do io
594+
@test crc32c(io, 16) == crc32c(a[1:16])
595+
@test crc32c(io, 16) == crc32c(a[17:32])
596+
@test crc32c(io) == crc32c(a[33:end])
597+
@test crc32c(io, 1000) == 0x00000000
598+
end
599+
a = rand(UInt8, 30000)
600+
write(f, a)
601+
@test open(crc32c, f) == crc32c(a) == open(io -> crc32c(io, 10^6), f)
602+
finally
603+
rm(f, force=true)
604+
end
577605
end
578606
end
579607

0 commit comments

Comments
 (0)