Skip to content
This repository was archived by the owner on Mar 11, 2020. It is now read-only.

Commit 4527927

Browse files
simonbyrnemusm
authored andcommitted
Add erf/erfc for Float32 (#14)
Extend erf and erfc to Float32 using generics.
1 parent 6e043d4 commit 4527927

File tree

5 files changed

+136
-90
lines changed

5 files changed

+136
-90
lines changed

src/Libm.jl

+1-7
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,9 @@
11
module Libm
22

3-
using Base: sign_mask, exponent_mask, significand_mask, exponent_one, exponent_bias, significand_bits
4-
5-
using Base.Math: @horner
6-
7-
export erf, erfc
3+
typealias FloatTypes Union{Float32,Float64}
84

95
include("utils.jl")
106
include("erf.jl")
11-
12-
137
include("log/tang.jl")
148

159
end

src/erf.jl

+45-42
Original file line numberDiff line numberDiff line change
@@ -69,81 +69,84 @@ const sb4 = 3.19985821950859553908e+03 # 0x40A8FFB7, 0x688C246A
6969
const sb5 = 2.55305040643316442583e+03 # 0x40A3F219, 0xCEDF3BE6
7070
const sb6 = 4.74528541206955367215e+02 # 0x407DA874, 0xE79FE763
7171
const sb7 = -2.24409524465858183362e+01 # 0xC03670E2, 0x42712D62
72-
73-
function erfc1(x::Float64)
72+
73+
function erfc1{T<:FloatTypes}(x::T)
7474
s = abs(x) - 1
75-
P = @horner s pa0 pa1 pa2 pa3 pa4 pa5 pa6
76-
Q = @horner s 1.0 qa1 qa2 qa3 qa4 qa5 qa6
77-
return 1 - erx - P/Q
75+
P = @horner_oftype s pa0 pa1 pa2 pa3 pa4 pa5 pa6
76+
Q = @horner_oftype s 1.0 qa1 qa2 qa3 qa4 qa5 qa6
77+
return 1 - T(erx) - P/Q
7878
end
7979

80-
function erfc2(ix::UInt32, x::Float64)
81-
if ix < 0x3ff40000 # |x| < 1.25
80+
function erfc2{T<:FloatTypes}(ix::UInt32, x::T)
81+
if ix < highword(T(1.25))
82+
# 0.84375 <= |x| < 1.25
8283
return erfc1(x)
8384
end
84-
x = abs(x)
85+
# 1.25 <= |x| < 28
86+
x = abs(x)
8587
s = 1/(x*x)
86-
if ix < 0x4006db6d # |x| < 1/.35 ~ 2.85714
87-
R = @horner s ra0 ra1 ra2 ra3 ra4 ra5 ra6 ra7
88-
S = @horner s 1.0 sa1 sa2 sa3 sa4 sa5 sa6 sa7 sa8
89-
else # |x| > 1/.35
90-
R = @horner s rb0 rb1 rb2 rb3 rb4 rb5 rb6
91-
S = @horner s 1.0 sb1 sb2 sb3 sb4 sb5 sb6 sb7
88+
if ix < highword(T(1/0.35000001))
89+
# 1.25 <= |x| < 1/.35 ~ 2.85714
90+
R = @horner_oftype s ra0 ra1 ra2 ra3 ra4 ra5 ra6 ra7
91+
S = @horner_oftype s 1.0 sa1 sa2 sa3 sa4 sa5 sa6 sa7 sa8
92+
else
93+
# 1/.35 <= |x| < 28
94+
R = @horner_oftype s rb0 rb1 rb2 rb3 rb4 rb5 rb6
95+
S = @horner_oftype s 1.0 sb1 sb2 sb3 sb4 sb5 sb6 sb7
9296
end
93-
z = x
94-
z = setlowword(z,UInt32(0))
95-
return exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S)/x
97+
z = trunclo(x)
98+
return exp(-z*z-T(0.5625))*exp((z-x)*(z+x)+R/S)/x
9699
end
97100

98-
function erf(x::Float64)
101+
function erf{T<:FloatTypes}(x::T)
99102
ix = highword(x)
100-
sign = (ix>>31) % Int32
103+
sign = (ix>>31) % Bool
101104
ix &= 0x7fffffff
102-
if ix >= 0x7ff00000 # erf(nan)=nan, erf(+-inf)=+-1
105+
if ix >= highword(T(Inf)) # erf(nan)=nan, erf(+-inf)=+-1
103106
return 1-2*sign + 1/x
104107
end
105-
if ix < 0x3feb0000 # |x| < 0.84375
106-
if ix < 0x3e300000 #|x| < 2**-28 avoid underflow
107-
return 0.125*(8*x + efx8*x)
108+
if ix < highword(T(0.84375)) # |x| < 0.84375
109+
if ix < highword(T(2)^-28) #|x| < 2**-28 avoid underflow
110+
return (8*x +T(efx8)*x)/8
108111
end
109112
z = x*x
110-
r = @horner z pp0 pp1 pp2 pp3 pp4
111-
s = @horner z 1.0 qq1 qq2 qq3 qq4 qq5
113+
r = @horner_oftype z pp0 pp1 pp2 pp3 pp4
114+
s = @horner_oftype z 1 qq1 qq2 qq3 qq4 qq5
112115
y = r/s
113116
return x + x*y
114117
end
115-
if ix < 0x40180000 # 0.84375 <= |x| < 6
118+
if ix < highword(T(6)) # 0.84375 <= |x| < 6
116119
y = 1 - erfc2(ix,x)
117120
else
118-
y = 1 - 0x1p-1022
121+
y = 1 - realmin(T)
119122
end
120-
return sign != 0 ? -y : y
123+
return sign ? -y : y
121124
end
122125

123-
function erfc(x::Float64)
126+
function erfc{T<:FloatTypes}(x::T)
124127
ix = highword(x)
125-
sign = (ix>>31) % Int32
128+
sign = (ix>>31) % Bool
126129
ix &= 0x7fffffff
127-
if ix >= 0x7ff00000 # erfc(nan)=nan, erfc(+-inf)=0,2
130+
if ix >= highword(T(Inf)) # erfc(nan)=nan, erfc(+-inf)=0,2
128131
return 2*sign + 1/x
129132
end
130-
if ix < 0x3feb0000 # |x| < 0.84375
131-
if ix < 0x3c700000 # |x| < 2**-56
132-
return 1.0 - x
133+
if ix < highword(T(0.84375)) # |x| < 0.84375
134+
if ix < highword(T(2)^-56) # |x| < 2**-56
135+
return 1 - x
133136
end
134137
z = x*x
135-
r = @horner z pp0 pp1 pp2 pp3 pp4
136-
s = @horner z 1.0 qq1 qq2 qq3 qq4 qq5
138+
r = @horner_oftype z pp0 pp1 pp2 pp3 pp4
139+
s = @horner_oftype z 1 qq1 qq2 qq3 qq4 qq5
137140
y = r/s
138-
if sign != 0 || ix < 0x3fd00000 # x < 1/4
139-
return 1.0 - (x+x*y)
141+
if sign || ix < highword(T(0.25)) # x < 1/4
142+
return 1 - (x+x*y)
140143
end
141-
return 0.5 - (x - 0.5 + x*y)
144+
return T(0.5) - (x - T(0.5) + x*y)
142145
end
143-
if ix < 0x403c0000 # 0.84375 <= |x| < 28
144-
return sign != 0 ? 2 - erfc2(ix,x) : erfc2(ix,x)
146+
if ix < highword(T(28)) # 0.84375 <= |x| < 28
147+
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x)
145148
end
146-
return sign != 0 ? 2 - 0x1p-1022 : 0x1p-1022*0x1p-1022
149+
return sign ? 2 - realmin(T) : realmin(T)*realmin(T)
147150
end
148151

149152
end

src/log/tang.jl

+2-8
Original file line numberDiff line numberDiff line change
@@ -141,12 +141,6 @@ const _log_tang_table_Float32 = [0.0,0.007782140442054949,0.015504186535965254,0
141141
0.6773988235918061,0.6813592248079031,0.6853040030989194,0.689233281238809,
142142
0.6931471805599453]
143143

144-
# truncate lower order bits (up to 26)
145-
# ideally, this should be able to use ANDPD instructions, see #9868.
146-
@inline function truncbits(x::Float64)
147-
reinterpret(Float64, reinterpret(UInt64,x) & 0xffff_ffff_f800_0000)
148-
end
149-
150144

151145
# Procedure 1
152146
@inline function _log_tang_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,jp::Int)
@@ -199,8 +193,8 @@ end
199193
if is_fma_fast(Float64)
200194
return u + fma(fma(-u,f,2(f-u)), g, q)
201195
else
202-
u1 = truncbits(u) # round to 24 bits
203-
f1 = truncbits(f)
196+
u1 = trunclo(u)
197+
f1 = trunclo(f)
204198
f2 = f-f1
205199
u2 = ((2*(f-u1)-u1*f1)-u1*f2)*g
206200
## Step 4

src/utils.jl

+58-7
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,69 @@
1+
2+
# Useful utilities from Base
3+
using Base: sign_mask, exponent_mask, significand_mask, exponent_one, exponent_bias, significand_bits
4+
5+
using Base.Math: @horner
6+
7+
# Similar to @horner, but converts coefficients to same type as x
8+
macro horner_oftype(x, p...)
9+
ex = :(oftype($x,$(esc(p[end]))))
10+
for i = length(p)-1:-1:1
11+
ex = :(muladd(t, $ex, oftype($x,$(esc(p[i])))))
12+
end
13+
Expr(:block, :(t = $(esc(x))), ex)
14+
end
15+
16+
117
"""
2-
highword(d::Float64)
18+
highword(x::Union{Float32,Float64}) -> UInt32
319
4-
Get the most significant 32 bits as a `UInt32` from `d`.
20+
Get the most significant 32 bits as a `UInt32` from `x`.
521
Corresponds to `GET_HIGH_WORD` in musl
622
"""
7-
highword(d::Float64) = (reinterpret(UInt64, d) >> 32) % UInt32
23+
@inline highword(x::Float64) = UInt32(reinterpret(UInt64, x) >> 32)
24+
@inline highword(x::Float32) = reinterpret(UInt32, x)
25+
826

927
"""
10-
setlowword(d::Float64, lo::UInt32)
28+
trunclo(x::Union{Float32,Float64})
29+
30+
Truncates the lower order bits of `x` so that the result of the multiplication `trunclo(x)
31+
* trunclo(y)` is exact, assuming no underflow or overflow occurs.
32+
33+
This relies on the following property: if `a` has `n` significant bits, and `b` has `m`
34+
significant bits, then exact product `a*b` has either `n+m-1` or `n+m` significant bits:
35+
36+
* `Float64` has 53 significant bits (including implicit leading bit), so we need to
37+
truncate the last 27 significant bits (leaving 26 bits).
38+
39+
* `Float32` has 24 significant bits (including implicit leading bit), so we need to
40+
truncate the last 12 significant bits (leaving 12 bits).
41+
42+
This is typically faster than other methods of truncating lower order bits (such as
43+
Veltkamp splitting, or converting `Float64`s to `Float32`s and back again). For LLVM 3.8
44+
or greater, this should give optimal `ANDPD`/`ANDSD` instructions on supported x86
45+
architectures, which doesn't require moving registers (Julia issue #9868).
46+
47+
NOTE: For odd significand sizes, such as `Float64`, when used as a replacement Veltkamp
48+
splitting for computing extended precision multiplications with a Dekker-style `mul12`
49+
algorithm, it can lose the last bit of precision. For example the function:
1150
12-
Returns the least significant 32 bits of `d` to `lo`.
13-
Corresponds to `SET_LOW_WORD` in musl
51+
function incorrect_mul2(x,y)
52+
hx = trunclo(x); tx = x-hx
53+
hy = trunclo(y); ty = y-hy
54+
p = hx*hy
55+
q = hx*ty + tx*hy
56+
z = p+q
57+
zz = p-z+q+tx*ty
58+
z, zz
59+
end
60+
61+
will give an incorrect result for the case `x = y = 0x1.800000e000001p+0`.
1462
"""
15-
setlowword(d::Float64, lo::UInt32) = reinterpret(Float64, reinterpret(UInt64, d) & 0xffff_ffff_0000_0000 | lo)
63+
@inline trunclo(x::Float64) =
64+
reinterpret(Float64, reinterpret(UInt64,x) & 0xffff_ffff_f800_0000)
65+
@inline trunclo(x::Float32) =
66+
reinterpret(Float32, reinterpret(UInt32,x) & 0xffff_f000)
1667

1768

1869
# determine if hardware FMA is available

test/erf.jl

+30-26
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,35 @@
11
@testset "erf" begin
2-
@test isnan(Libm.erf(NaN))
3-
@test Libm.erf(Inf) == 1
4-
@test Libm.erf(-Inf) == -1
5-
s = linspace(-0.84375,0.84375,100)
6-
@test_approx_eq Base.erf.(s) Libm.erf.(s)
7-
s = linspace(-2e-28,2e-28,100)
8-
@test_approx_eq Base.erf.(s) Libm.erf.(s)
9-
s = linspace(-0.84375,0.84375,100)
10-
@test_approx_eq Base.erf.(s) Libm.erf.(s)
11-
s = linspace(0.84375, 6, 100)
12-
@test_approx_eq Base.erf.(s) Libm.erf.(s)
13-
s = linspace(-6,-0.84375, 100);
14-
@test_approx_eq Base.erf.(s) Libm.erf.(s)
2+
for T in (Float32,Float64)
3+
@test isnan(Libm.erf(T(NaN)))
4+
@test Libm.erf(T(Inf)) == 1
5+
@test Libm.erf(T(-Inf)) == -1
6+
s = linspace(T(-0.84375),T(0.84375),100)
7+
@test_approx_eq Base.erf.(s) Libm.erf.(s)
8+
s = linspace(T(-2e-28),T(2e-28),100)
9+
@test_approx_eq Base.erf.(s) Libm.erf.(s)
10+
s = linspace(T(-0.84375),T(0.84375),100)
11+
@test_approx_eq Base.erf.(s) Libm.erf.(s)
12+
s = linspace(T(0.84375), T(6), 100)
13+
@test_approx_eq Base.erf.(s) Libm.erf.(s)
14+
s = linspace(T(-6),T(-0.84375), 100);
15+
@test_approx_eq Base.erf.(s) Libm.erf.(s)
16+
end
1517
end
1618

1719
@testset "erfc" begin
18-
@test isnan(Libm.erfc(NaN))
19-
@test Libm.erfc(Inf) == 0
20-
@test Libm.erfc(-Inf) == 2
21-
s = linspace(-0.84375,0.84375,100)
22-
@test_approx_eq Base.erfc.(s) Libm.erfc.(s)
23-
s = linspace(-2e-56,2e-56,100)
24-
@test_approx_eq Base.erfc.(s) Libm.erfc.(s)
25-
s = linspace(-0.25,0.25,100)
26-
@test_approx_eq Base.erfc.(s) Libm.erfc.(s)
27-
s = linspace(0.84375, 28, 100)
28-
@test_approx_eq Base.erfc.(s) Libm.erfc.(s)
29-
s = linspace(-28,-0.84375, 100);
30-
@test_approx_eq Base.erfc.(s) Libm.erfc.(s)
20+
for T in (Float32,Float64)
21+
@test isnan(Libm.erfc(T(NaN)))
22+
@test Libm.erfc(T(Inf)) == 0
23+
@test Libm.erfc(T(-Inf)) == 2
24+
s = linspace(T(-0.84375),T(0.84375),100)
25+
@test_approx_eq Base.erfc.(s) Libm.erfc.(s)
26+
s = linspace(T(-2e-56),T(2e-56),100)
27+
@test_approx_eq Base.erfc.(s) Libm.erfc.(s)
28+
s = linspace(T(-0.25),T(0.25),100)
29+
@test_approx_eq Base.erfc.(s) Libm.erfc.(s)
30+
s = linspace(T(0.84375), T(28), 100)
31+
@test_approx_eq Base.erfc.(s) Libm.erfc.(s)
32+
s = linspace(T(-28),T(-0.84375), 100);
33+
@test_approx_eq Base.erfc.(s) Libm.erfc.(s)
34+
end
3135
end

0 commit comments

Comments
 (0)