Add erf/erfc for Float32 (#14)

simonbyrne · musm · commit 4527927fd9a1 · 2016-09-22T01:19:49.000-04:00
Extend erf and erfc to Float32 using generics.
diff --git a/src/Libm.jl b/src/Libm.jl
@@ -1,15 +1,9 @@
 module Libm
 
-using Base: sign_mask, exponent_mask, significand_mask, exponent_one, exponent_bias, significand_bits
-
-using Base.Math: @horner
-
-export erf, erfc
+typealias FloatTypes Union{Float32,Float64}
 
 include("utils.jl")
 include("erf.jl")
-
-
 include("log/tang.jl")
 
 end
diff --git a/src/erf.jl b/src/erf.jl
@@ -69,81 +69,84 @@ const sb4  =  3.19985821950859553908e+03 # 0x40A8FFB7, 0x688C246A
 const sb5  =  2.55305040643316442583e+03 # 0x40A3F219, 0xCEDF3BE6
 const sb6  =  4.74528541206955367215e+02 # 0x407DA874, 0xE79FE763
 const sb7  = -2.24409524465858183362e+01 # 0xC03670E2, 0x42712D62
-
-function erfc1(x::Float64)
+    
+function erfc1{T<:FloatTypes}(x::T)
     s = abs(x) - 1
-    P = @horner s pa0 pa1 pa2 pa3 pa4 pa5 pa6
-    Q = @horner s 1.0 qa1 qa2 qa3 qa4 qa5 qa6
-    return 1 - erx - P/Q
+    P = @horner_oftype s pa0 pa1 pa2 pa3 pa4 pa5 pa6
+    Q = @horner_oftype s 1.0 qa1 qa2 qa3 qa4 qa5 qa6
+    return 1 - T(erx) - P/Q
 end
 
-function erfc2(ix::UInt32, x::Float64)
-    if ix < 0x3ff40000  # |x| < 1.25
+function erfc2{T<:FloatTypes}(ix::UInt32, x::T)
+    if ix < highword(T(1.25))
+        # 0.84375 <= |x| < 1.25
         return erfc1(x)
     end
-    x = abs(x) 
+    # 1.25 <= |x| < 28
+    x = abs(x)
     s = 1/(x*x)
-    if ix < 0x4006db6d # |x| < 1/.35 ~ 2.85714
-        R = @horner s ra0 ra1 ra2 ra3 ra4 ra5 ra6 ra7
-        S = @horner s 1.0 sa1 sa2 sa3 sa4 sa5 sa6 sa7 sa8
-    else # |x| > 1/.35
-        R = @horner s rb0 rb1 rb2 rb3 rb4 rb5 rb6
-        S = @horner s 1.0 sb1 sb2 sb3 sb4 sb5 sb6 sb7
+    if ix < highword(T(1/0.35000001))
+        # 1.25 <= |x| < 1/.35 ~ 2.85714
+        R = @horner_oftype s ra0 ra1 ra2 ra3 ra4 ra5 ra6 ra7
+        S = @horner_oftype s 1.0 sa1 sa2 sa3 sa4 sa5 sa6 sa7 sa8
+    else
+        # 1/.35 <= |x| < 28
+        R = @horner_oftype s rb0 rb1 rb2 rb3 rb4 rb5 rb6
+        S = @horner_oftype s 1.0 sb1 sb2 sb3 sb4 sb5 sb6 sb7
     end
-    z = x
-    z = setlowword(z,UInt32(0))
-    return exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S)/x
+    z = trunclo(x)
+    return exp(-z*z-T(0.5625))*exp((z-x)*(z+x)+R/S)/x
 end
 
-function erf(x::Float64)
+function erf{T<:FloatTypes}(x::T)
     ix = highword(x)
-    sign = (ix>>31) % Int32
+    sign = (ix>>31) % Bool
     ix &= 0x7fffffff
-    if ix >= 0x7ff00000 # erf(nan)=nan, erf(+-inf)=+-1
+    if ix >= highword(T(Inf)) # erf(nan)=nan, erf(+-inf)=+-1
         return 1-2*sign + 1/x
     end
-    if ix < 0x3feb0000 # |x| < 0.84375
-        if ix < 0x3e300000 #|x| < 2**-28  avoid underflow
-            return 0.125*(8*x + efx8*x)
+    if ix < highword(T(0.84375)) # |x| < 0.84375
+        if ix < highword(T(2)^-28) #|x| < 2**-28  avoid underflow
+            return (8*x +T(efx8)*x)/8
         end
         z = x*x
-        r = @horner z pp0 pp1 pp2 pp3 pp4
-        s = @horner z 1.0 qq1 qq2 qq3 qq4 qq5
+        r = @horner_oftype z pp0 pp1 pp2 pp3 pp4
+        s = @horner_oftype z 1 qq1 qq2 qq3 qq4 qq5
         y = r/s
         return x + x*y
     end
-    if ix < 0x40180000 # 0.84375 <= |x| < 6
+    if ix < highword(T(6)) # 0.84375 <= |x| < 6
         y = 1 - erfc2(ix,x)
     else
-        y = 1 - 0x1p-1022
+        y = 1 - realmin(T)
     end
-    return sign != 0 ? -y : y
+    return sign ? -y : y
 end
 
-function erfc(x::Float64)
+function erfc{T<:FloatTypes}(x::T)
     ix = highword(x)
-    sign = (ix>>31) % Int32
+    sign = (ix>>31) % Bool
     ix &= 0x7fffffff
-    if ix >= 0x7ff00000 # erfc(nan)=nan, erfc(+-inf)=0,2
+    if ix >= highword(T(Inf)) # erfc(nan)=nan, erfc(+-inf)=0,2
         return 2*sign + 1/x
     end
-    if ix < 0x3feb0000 # |x| < 0.84375
-        if ix < 0x3c700000  # |x| < 2**-56
-            return 1.0 - x
+    if ix < highword(T(0.84375)) # |x| < 0.84375
+        if ix < highword(T(2)^-56)  # |x| < 2**-56
+            return 1 - x
         end
         z = x*x
-        r = @horner z pp0 pp1 pp2 pp3 pp4
-        s = @horner z 1.0 qq1 qq2 qq3 qq4 qq5
+        r = @horner_oftype z pp0 pp1 pp2 pp3 pp4
+        s = @horner_oftype z 1 qq1 qq2 qq3 qq4 qq5
         y = r/s
-        if sign != 0 || ix < 0x3fd00000 # x < 1/4 
-            return 1.0 - (x+x*y)
+        if sign || ix < highword(T(0.25)) # x < 1/4 
+            return 1 - (x+x*y)
         end
-        return 0.5 - (x - 0.5 + x*y)
+        return T(0.5) - (x - T(0.5) + x*y)
     end
-    if ix < 0x403c0000 # 0.84375 <= |x| < 28
-        return sign != 0 ? 2 - erfc2(ix,x) : erfc2(ix,x)
+    if ix < highword(T(28)) # 0.84375 <= |x| < 28
+        return sign ? 2 - erfc2(ix,x) : erfc2(ix,x)
     end
-    return sign != 0 ? 2 - 0x1p-1022 : 0x1p-1022*0x1p-1022
+    return sign ? 2 - realmin(T) : realmin(T)*realmin(T)
 end
 
 end
diff --git a/src/log/tang.jl b/src/log/tang.jl
@@ -141,12 +141,6 @@ const _log_tang_table_Float32 = [0.0,0.007782140442054949,0.015504186535965254,0
     0.6773988235918061,0.6813592248079031,0.6853040030989194,0.689233281238809,
     0.6931471805599453]
 
-# truncate lower order bits (up to 26)
-# ideally, this should be able to use ANDPD instructions, see #9868.
-@inline function truncbits(x::Float64)
-    reinterpret(Float64, reinterpret(UInt64,x) & 0xffff_ffff_f800_0000)
-end
-
 
 # Procedure 1
 @inline function _log_tang_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,jp::Int)
@@ -199,8 +193,8 @@ end
     if is_fma_fast(Float64)
         return u + fma(fma(-u,f,2(f-u)), g, q)
     else
-        u1 = truncbits(u) # round to 24 bits
-        f1 = truncbits(f)
+        u1 = trunclo(u)
+        f1 = trunclo(f)
         f2 = f-f1
         u2 = ((2*(f-u1)-u1*f1)-u1*f2)*g
         ## Step 4
diff --git a/src/utils.jl b/src/utils.jl
@@ -1,18 +1,69 @@
+
+# Useful utilities from Base
+using Base: sign_mask, exponent_mask, significand_mask, exponent_one, exponent_bias, significand_bits
+
+using Base.Math: @horner
+
+# Similar to @horner, but converts coefficients to same type as x
+macro horner_oftype(x, p...)
+    ex = :(oftype($x,$(esc(p[end]))))
+    for i = length(p)-1:-1:1
+        ex = :(muladd(t, $ex, oftype($x,$(esc(p[i])))))
+    end
+    Expr(:block, :(t = $(esc(x))), ex)
+end
+
+
 """
-    highword(d::Float64)
+    highword(x::Union{Float32,Float64}) -> UInt32
 
-Get the most significant 32 bits as a `UInt32` from `d`.
+Get the most significant 32 bits as a `UInt32` from `x`.
 Corresponds to `GET_HIGH_WORD` in musl
 """
-highword(d::Float64) = (reinterpret(UInt64, d) >> 32) % UInt32
+@inline highword(x::Float64) = UInt32(reinterpret(UInt64, x) >> 32)
+@inline highword(x::Float32) =  reinterpret(UInt32, x)
+
 
 """
-    setlowword(d::Float64, lo::UInt32)
+    trunclo(x::Union{Float32,Float64})
+
+Truncates the lower order bits of `x` so that the result of the multiplication `trunclo(x)
+* trunclo(y)` is exact, assuming no underflow or overflow occurs.
+
+This relies on the following property: if `a` has `n` significant bits, and `b` has `m`
+significant bits, then exact product `a*b` has either `n+m-1` or `n+m` significant bits:
+
+* `Float64` has 53 significant bits (including implicit leading bit), so we need to
+  truncate the last 27 significant bits (leaving 26 bits).
+
+* `Float32` has 24 significant bits (including implicit leading bit), so we need to
+  truncate the last 12 significant bits (leaving 12 bits).
+
+This is typically faster than other methods of truncating lower order bits (such as
+Veltkamp splitting, or converting `Float64`s to `Float32`s and back again). For LLVM 3.8
+or greater, this should give optimal `ANDPD`/`ANDSD` instructions on supported x86
+architectures, which doesn't require moving registers (Julia issue #9868).
+
+NOTE: For odd significand sizes, such as `Float64`, when used as a replacement Veltkamp
+splitting for computing extended precision multiplications with a Dekker-style `mul12`
+algorithm, it can lose the last bit of precision. For example the function:
 
-Returns the least significant 32 bits of `d` to `lo`.
-Corresponds to `SET_LOW_WORD` in musl
+    function incorrect_mul2(x,y)
+        hx = trunclo(x); tx = x-hx
+        hy = trunclo(y); ty = y-hy
+        p = hx*hy
+        q = hx*ty + tx*hy
+        z = p+q
+        zz = p-z+q+tx*ty
+        z, zz
+    end
+    
+will give an incorrect result for the case `x = y = 0x1.800000e000001p+0`.
 """
-setlowword(d::Float64, lo::UInt32) = reinterpret(Float64, reinterpret(UInt64, d) & 0xffff_ffff_0000_0000 | lo)
+@inline trunclo(x::Float64) =
+    reinterpret(Float64, reinterpret(UInt64,x) & 0xffff_ffff_f800_0000)
+@inline trunclo(x::Float32) =
+    reinterpret(Float32, reinterpret(UInt32,x) & 0xffff_f000)
 
 
 # determine if hardware FMA is available
diff --git a/test/erf.jl b/test/erf.jl
@@ -1,31 +1,35 @@
 @testset "erf" begin
-    @test isnan(Libm.erf(NaN))
-    @test Libm.erf(Inf) == 1
-    @test Libm.erf(-Inf) == -1
-    s = linspace(-0.84375,0.84375,100)
-    @test_approx_eq Base.erf.(s) Libm.erf.(s)
-    s = linspace(-2e-28,2e-28,100)
-    @test_approx_eq Base.erf.(s) Libm.erf.(s)
-    s = linspace(-0.84375,0.84375,100)
-    @test_approx_eq Base.erf.(s) Libm.erf.(s)
-    s = linspace(0.84375, 6, 100)
-    @test_approx_eq Base.erf.(s) Libm.erf.(s)
-    s = linspace(-6,-0.84375, 100);
-    @test_approx_eq Base.erf.(s) Libm.erf.(s)
+    for T in (Float32,Float64)
+        @test isnan(Libm.erf(T(NaN)))
+        @test Libm.erf(T(Inf)) == 1
+        @test Libm.erf(T(-Inf)) == -1
+        s = linspace(T(-0.84375),T(0.84375),100)
+        @test_approx_eq Base.erf.(s) Libm.erf.(s)
+        s = linspace(T(-2e-28),T(2e-28),100)
+        @test_approx_eq Base.erf.(s) Libm.erf.(s)
+        s = linspace(T(-0.84375),T(0.84375),100)
+        @test_approx_eq Base.erf.(s) Libm.erf.(s)
+        s = linspace(T(0.84375), T(6), 100)
+        @test_approx_eq Base.erf.(s) Libm.erf.(s)
+        s = linspace(T(-6),T(-0.84375), 100);
+        @test_approx_eq Base.erf.(s) Libm.erf.(s)
+    end
 end
 
 @testset "erfc" begin
-    @test isnan(Libm.erfc(NaN))
-    @test Libm.erfc(Inf) == 0
-    @test Libm.erfc(-Inf) == 2
-    s = linspace(-0.84375,0.84375,100)
-    @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
-    s = linspace(-2e-56,2e-56,100)
-    @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
-    s = linspace(-0.25,0.25,100)
-    @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
-    s = linspace(0.84375, 28, 100)
-    @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
-    s = linspace(-28,-0.84375, 100);
-    @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+    for T in (Float32,Float64)
+        @test isnan(Libm.erfc(T(NaN)))
+        @test Libm.erfc(T(Inf)) == 0
+        @test Libm.erfc(T(-Inf)) == 2
+        s = linspace(T(-0.84375),T(0.84375),100)
+        @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+        s = linspace(T(-2e-56),T(2e-56),100)
+        @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+        s = linspace(T(-0.25),T(0.25),100)
+        @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+        s = linspace(T(0.84375), T(28), 100)
+        @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+        s = linspace(T(-28),T(-0.84375), 100);
+        @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+    end
 end