Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct bias in random float sampling. #33251

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ Standard library changes

#### Sockets

#### Random

* The default sampler for floats had a small bias ([#33222]), which is now fixed ([#33251]), and the default random sampler for floats now generates interior values in `(0, 1)`. The previous default sampler is available as `Random.CloseOpen01`.


Deprecated or removed
---------------------
Expand Down
11 changes: 10 additions & 1 deletion stdlib/Random/src/DSFMT.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ using Base.GMP.MPZ
export DSFMT_state, dsfmt_get_min_array_size, dsfmt_get_idstring,
dsfmt_init_gen_rand, dsfmt_init_by_array, dsfmt_gv_init_by_array,
dsfmt_fill_array_close_open!, dsfmt_fill_array_close1_open2!,
win32_SystemFunction036!
dsfmt_fill_array_open_open!, win32_SystemFunction036!

"Mersenne Exponent"
const MEXP = 19937
Expand Down Expand Up @@ -89,6 +89,15 @@ function dsfmt_fill_array_close1_open2!(s::DSFMT_state, A::Ptr{Float64}, n::Int)
s.val, A, n)
end

function dsfmt_fill_array_open_open!(s::DSFMT_state, A::Ptr{Float64}, n::Int)
@assert Csize_t(A) % 16 == 0 # the underlying C array must be 16-byte aligned
@assert dsfmt_min_array_size <= n && iseven(n)
ccall((:dsfmt_fill_array_open_open,:libdSFMT),
Cvoid,
(Ptr{Cvoid}, Ptr{Float64}, Int),
s.val, A, n)
end

function dsfmt_fill_array_close_open!(s::DSFMT_state, A::Ptr{Float64}, n::Int)
@assert Csize_t(A) % 16 == 0 # the underlying C array must be 16-byte aligned
@assert dsfmt_min_array_size <= n && iseven(n)
Expand Down
22 changes: 20 additions & 2 deletions stdlib/Random/src/RNGs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ rand!(::_GLOBAL_RNG, A::AbstractArray{Float64}, I::SamplerTrivial{<:FloatInterva
rand!(::_GLOBAL_RNG, A::Array{Float64}, I::SamplerTrivial{<:FloatInterval_64}) = rand!(default_rng(), A, I)
for T in (Float16, Float32)
@eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerTrivial{CloseOpen12{$T}}) = rand!(default_rng(), A, I)
@eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerTrivial{CloseOpen01{$T}}) = rand!(default_rng(), A, I)
@eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerTrivial{OpenOpen01{$T}}) = rand!(default_rng(), A, I)
end
for T in BitInteger_types
@eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerType{$T}) = rand!(default_rng(), A, I)
Expand All @@ -357,8 +357,10 @@ rng_native_52(::MersenneTwister) = Float64

# precondition: !mt_empty(r)
rand_inbounds(r::MersenneTwister, ::CloseOpen12_64) = mt_pop!(r)
rand_inbounds(r::MersenneTwister, ::CloseOpen01_64=CloseOpen01()) =
rand_inbounds(r::MersenneTwister, ::CloseOpen01_64) =
rand_inbounds(r, CloseOpen12()) - 1.0
rand_inbounds(r::MersenneTwister, ::OpenOpen01_64=OpenOpen01()) =
rand_inbounds(r, CloseOpen12()) - prevfloat(1.0)

rand_inbounds(r::MersenneTwister, ::UInt52Raw{T}) where {T<:BitInteger} =
reinterpret(UInt64, rand_inbounds(r, CloseOpen12())) % T
Expand Down Expand Up @@ -457,6 +459,10 @@ function _rand_max383!(r::MersenneTwister, A::UnsafeView{Float64}, I::FloatInter
for i=1:n
A[i] -= 1.0
end
elseif I isa OpenOpen01
for i in 1:n
A[i] -= prevfloat(1.0)
end
end
A
end
Expand All @@ -465,6 +471,9 @@ end
fill_array!(s::DSFMT_state, A::Ptr{Float64}, n::Int, ::CloseOpen01_64) =
dsfmt_fill_array_close_open!(s, A, n)

fill_array!(s::DSFMT_state, A::Ptr{Float64}, n::Int, ::OpenOpen01_64) =
dsfmt_fill_array_open_open!(s, A, n)

fill_array!(s::DSFMT_state, A::Ptr{Float64}, n::Int, ::CloseOpen12_64) =
dsfmt_fill_array_close1_open2!(s, A, n)

Expand Down Expand Up @@ -552,6 +561,15 @@ for T in (Float16, Float32)
end
A
end

@eval function rand!(r::MersenneTwister, A::Array{$T}, ::SamplerTrivial{OpenOpen01{$T}})
rand!(r, A, CloseOpen12($T))
I32 = one(Float32) - eps($T)/2
for i in eachindex(A)
@inbounds A[i] = Float32(A[i])-I32 # faster than "A[i] -= one(T)" for T==Float16
end
A
end
end

#### arrays of integers
Expand Down
7 changes: 5 additions & 2 deletions stdlib/Random/src/Random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,16 @@ gentype(::Type{<:UniformBits{T}}) where {T} = T
abstract type FloatInterval{T<:AbstractFloat} end

struct CloseOpen01{T<:AbstractFloat} <: FloatInterval{T} end # interval [0,1)
struct OpenOpen01{T<:AbstractFloat} <: FloatInterval{T} end # interval (0,1)
struct CloseOpen12{T<:AbstractFloat} <: FloatInterval{T} end # interval [1,2)

const FloatInterval_64 = FloatInterval{Float64}
const CloseOpen01_64 = CloseOpen01{Float64}
const OpenOpen01_64 = OpenOpen01{Float64}
const CloseOpen12_64 = CloseOpen12{Float64}

CloseOpen01(::Type{T}=Float64) where {T<:AbstractFloat} = CloseOpen01{T}()
OpenOpen01(::Type{T}=Float64) where {T<:AbstractFloat} = OpenOpen01{T}()
CloseOpen12(::Type{T}=Float64) where {T<:AbstractFloat} = CloseOpen12{T}()

gentype(::Type{<:FloatInterval{T}}) where {T<:AbstractFloat} = T
Expand Down Expand Up @@ -309,8 +312,8 @@ Pick a random element or array of random elements from the set of values specifi
* an `AbstractDict` or `AbstractSet` object,
* a string (considered as a collection of characters), or
* a type: the set of values to pick from is then equivalent to `typemin(S):typemax(S)` for
integers (this is not applicable to [`BigInt`](@ref)), to ``[0, 1)`` for floating
point numbers and to ``[0, 1)+i[0, 1)]`` for complex floating point numbers;
integers (this is not applicable to [`BigInt`](@ref)), to ``(0, 1)`` for floating
point numbers and to ``(0, 1)+i(0, 1)]`` for complex floating point numbers;

`S` defaults to [`Float64`](@ref).

Expand Down
23 changes: 22 additions & 1 deletion stdlib/Random/src/generation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,28 @@
### random floats

Sampler(::Type{RNG}, ::Type{T}, n::Repetition) where {RNG<:AbstractRNG,T<:AbstractFloat} =
Sampler(RNG, CloseOpen01(T), n)
Sampler(RNG, OpenOpen01(T), n)

# generic random generation function which can be used by RNG implementors
# it is not defined as a fallback rand method as this could create ambiguities

# NOTE: bias correction for OpenOpen01 samplers, see #33222

function rand(r::AbstractRNG, ::SamplerTrivial{OpenOpen01{Float16}})
z = reinterpret(Float32, (rand(r, UInt10(UInt32)) << 13) | 0x3f800000)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can make z equal prevfloat(Float32(2)), so that Float16(z - prevfloat(Float16(1))) rounds to 1.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I am surprised, in
#33222 (comment)
I checked the extrema of the new generator. Or we should really use a hard-coded constant Float16(reinterpret(Float32,n(UInt32(i) << 13) | 0x3f800000) - 0.9995117f0) as I proposed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry. I didn't think this fully through. You're right, it can't become 1.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@perrutquist, thanks for bringing this up anyway, I am always uneasy operating around eps() precision, and @mschauer, thanks for checking this.

Float16(z - Float32(prevfloat(Float16(1))))
end

function rand(r::AbstractRNG, ::SamplerTrivial{OpenOpen01{Float32}})
reinterpret(Float32, rand(r, UInt23()) | 0x3f800000) - prevfloat(1f0)
end

function rand(r::AbstractRNG, ::SamplerTrivial{OpenOpen01_64})
rand(r, CloseOpen12()) - prevfloat(1.0)
end

# CloseOpen01 samplers are biased by eps(T)/2, see #33222

rand(r::AbstractRNG, ::SamplerTrivial{CloseOpen01{Float16}}) =
Float16(reinterpret(Float32,
(rand(r, UInt10(UInt32)) << 13) | 0x3f800000) - 1)
Expand Down Expand Up @@ -86,6 +103,10 @@ function _rand(rng::AbstractRNG, sp::SamplerBigFloat, ::CloseOpen01{BigFloat})
z
end

function _rand(rng::AbstractRNG, sp::SamplerBigFloat, ::OpenOpen01{BigFloat})
_rand(rng, sp, CloseOpen01{BigFloat}())
end

# alternative, with 1 bit less of precision
# TODO: make an API for requesting full or not-full precision
function _rand(rng::AbstractRNG, sp::SamplerBigFloat, ::CloseOpen01{BigFloat}, ::Nothing)
Expand Down
24 changes: 12 additions & 12 deletions stdlib/Random/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ end
@test length(randn(ComplexF64, 4, 5)) == 20
@test length(bitrand(4, 5)) == 20

@test rand(MersenneTwister(0)) == 0.8236475079774124
@test rand(MersenneTwister(42)) == 0.5331830160438613
@test rand(MersenneTwister(0)) == 0.8236475079774125
@test rand(MersenneTwister(42)) == 0.5331830160438614
# Try a seed larger than 2^32
@test rand(MersenneTwister(5294967296)) == 0.3498809918210497
@test rand(MersenneTwister(5294967296)) == 0.3498809918210498

# Test array filling, Issues #7643, #8360
@test rand(MersenneTwister(0), 1) == [0.8236475079774124]
@test rand(MersenneTwister(0), 1) == [0.8236475079774125]
let A = zeros(2, 2)
rand!(MersenneTwister(0), A)
@test A == [0.8236475079774124 0.16456579813368521;
0.9103565379264364 0.17732884646626457]
@test A == [0.8236475079774125 0.16456579813368533;
0.9103565379264366 0.17732884646626468]
end
let A = zeros(2, 2)
@test_throws ArgumentError rand!(MersenneTwister(0), A, 5)
Expand Down Expand Up @@ -273,15 +273,15 @@ let mt = MersenneTwister(0)
rand!(mt, A)
rand!(mt, B)
@test A[end] == Any[21, 0x7b, 17385, 0x3086, -1574090021, 0xadcb4460, 6797283068698303107, 0xc8e6453e139271f3,
69855512850528774484795047199183096941, Float16(0.16895), 0.21086597f0][i]
69855512850528774484795047199183096941, Float16(0.1694), 0.21086603f0][i]
@test B[end] == Any[49, 0x65, -3725, 0x719d, 814246081, 0xdf61843a, 2120308604158549401, 0xcb28c236e9c0f608,
61881313582466480231846019869039259750, Float16(0.38672), 0.20027375f0][i]
61881313582466480231846019869039259750, Float16(0.3872), 0.20027381f0][i]
end

Random.seed!(mt, 0)
AF64 = Vector{Float64}(undef, Random.dsfmt_get_min_array_size()-1)
@test rand!(mt, AF64)[end] == 0.957735065345398
@test rand!(mt, AF64)[end] == 0.6492481059865669
@test rand!(mt, AF64)[end] == 0.9577350653453981
@test rand!(mt, AF64)[end] == 0.649248105986567
resize!(AF64, 2*length(mt.vals))
@test invoke(rand!, Tuple{MersenneTwister,AbstractArray{Float64},Random.SamplerTrivial{Random.CloseOpen01_64}},
mt, AF64, Random.SamplerTrivial(Random.CloseOpen01()))[end] == 0.1142787906708973
Expand All @@ -307,7 +307,7 @@ let mt = MersenneTwister(0)
Random.seed!(mt, 0)
rand(mt) # this is to fill mt.vals, cf. #9040
rand!(mt, A) # must not segfault even if Int(pointer(A)) % 16 != 0
@test A[end-4:end] == [0.3371041633752143, 0.41147647589610803, 0.6063082992397912, 0.9103565379264364, 0.16456579813368521]
@test A[end-4:end] == [0.33710416337521454, 0.41147647589610803, 0.6063082992397912, 0.9103565379264366, 0.16456579813368533]
end
end

Expand Down Expand Up @@ -444,7 +444,7 @@ end
# test uniform distribution of floats
for rng in [MersenneTwister(), RandomDevice()],
T in [Float16, Float32, Float64, BigFloat],
prec in (T == BigFloat ? [3, 53, 64, 100, 256, 1000] : [256])
prec in (T == BigFloat ? [3, 53, 64, 100, 256, 1000] : [256])
setprecision(BigFloat, prec) do
# array version
counts = hist(rand(rng, T, 2000), 4)
Expand Down