JuliaMath · simonbyrne · Aug 22, 2016 · Aug 19, 2016 · tkelman · Aug 19, 2016
diff --git a/LICENSE.md b/LICENSE.md
@@ -20,3 +20,7 @@ The Libm.jl package is licensed under the MIT "Expat" License:
 > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 > SOFTWARE.
 > 
+
+Libm.jl includes code from the following projects, which have their own licenses:
+
+- [MUSL](http://git.musl-libc.org/cgit/musl/tree/COPYRIGHT) [MIT]
diff --git a/benchmarks/bench.jl b/benchmarks/bench.jl
@@ -0,0 +1,39 @@
+using Libm
+using BenchmarkTools
+
+xerf1 = linspace(-0.84375,0.84375,10000)
+xerf2 = linspace(-2e-28,2e-28,10000)
+xerf3 = linspace(-0.84375,0.84375,10000)
+xerf4 = linspace(0.84375, 6,10000)
+xerf5 = linspace(-6,-0.84375,10000);
+xerf = union(xerf1,xerf2,xerf3,xerf4,xerf5);
+
+xerfc1 = linspace(-0.84375,0.84375,10000)
+xerfc2 = linspace(-2e-56,2e-56,10000)
+xerfc3 = linspace(-0.25,0.25,10000)
+xerfc4 = linspace(0.84375, 28,10000)
+xerfc5 = linspace(-28,-0.84375,10000);
+xercf = union(xerfc1,xerfc2,xerfc3,xerfc4,xerfc5)
+
+t1 = @benchmark Libm.erf.($xerf)
+t2 = @benchmark Base.erf.($xerf)
+t3 = @benchmark Libm.erfc.($xercf)
+t4 = @benchmark Base.erfc.($xercf)
+
+println("***** Benchmark erf *****")
+println()
+println(ratio(mean(t1),mean(t2)))
+println()
+println("Details\n")
+println(t1,"\n")
+println(t2,"\n")
+
+
+println("***** Benchmark erfc *****")
+println()
+println(ratio(mean(t3),mean(t4)))
+println()
+println("Details\n")
+println(t3,"\n")
+println(t4,"\n")
+
diff --git a/src/Libm.jl b/src/Libm.jl
@@ -1,5 +1,10 @@
 module Libm
 
-# package code goes here
+using Base.Math.@horner
 
-end # module
+export erf, erfc
+
+include("utils.jl")
+include("erf.jl")
+
+end
diff --git a/src/erf.jl b/src/erf.jl
@@ -0,0 +1,149 @@
+# port form musl library, commit e216951f509b71da193da2fc63e25b998740d58b
+# see http://git.musl-libc.org/cgit/musl/tree/src/math/erf.c for implementation details
+# origin: FreeBSD /usr/src/lib/msun/src/s_erf.c Copyright (c) 1993: Sun Microsystems, Inc.
+
+let
+global erf
+global erfc
+
+const erx  = 8.45062911510467529297e-01 # 0x3FEB0AC1, 0x60000000
+
+# Coefficients for approximation to  erf on [0,0.84375]
+const efx8 =  1.02703333676410069053e+00 # 0x3FF06EBA, 0x8214DB69
+const pp0  =  1.28379167095512558561e-01 # 0x3FC06EBA, 0x8214DB68
+const pp1  = -3.25042107247001499370e-01 # 0xBFD4CD7D, 0x691CB913
+const pp2  = -2.84817495755985104766e-02 # 0xBF9D2A51, 0xDBD7194F
+const pp3  = -5.77027029648944159157e-03 # 0xBF77A291, 0x236668E4
+const pp4  = -2.37630166566501626084e-05 # 0xBEF8EAD6, 0x120016AC
+const qq1  =  3.97917223959155352819e-01 # 0x3FD97779, 0xCDDADC09
+const qq2  =  6.50222499887672944485e-02 # 0x3FB0A54C, 0x5536CEBA
+const qq3  =  5.08130628187576562776e-03 # 0x3F74D022, 0xC4D36B0F
+const qq4  =  1.32494738004321644526e-04 # 0x3F215DC9, 0x221C1A10
+const qq5  = -3.96022827877536812320e-06 # 0xBED09C43, 0x42A26120
+
+# Coefficients for approximation to  erf  in [0.84375,1.25]
+const pa0  = -2.36211856075265944077e-03 # 0xBF6359B8, 0xBEF77538
+const pa1  =  4.14856118683748331666e-01 # 0x3FDA8D00, 0xAD92B34D
+const pa2  = -3.72207876035701323847e-01 # 0xBFD7D240, 0xFBB8C3F1
+const pa3  =  3.18346619901161753674e-01 # 0x3FD45FCA, 0x805120E4
+const pa4  = -1.10894694282396677476e-01 # 0xBFBC6398, 0x3D3E28EC
+const pa5  =  3.54783043256182359371e-02 # 0x3FA22A36, 0x599795EB
+const pa6  = -2.16637559486879084300e-03 # 0xBF61BF38, 0x0A96073F
+const qa1  =  1.06420880400844228286e-01 # 0x3FBB3E66, 0x18EEE323
+const qa2  =  5.40397917702171048937e-01 # 0x3FE14AF0, 0x92EB6F33
+const qa3  =  7.18286544141962662868e-02 # 0x3FB2635C, 0xD99FE9A7
+const qa4  =  1.26171219808761642112e-01 # 0x3FC02660, 0xE763351F
+const qa5  =  1.36370839120290507362e-02 # 0x3F8BEDC2, 0x6B51DD1C
+const qa6  =  1.19844998467991074170e-02 # 0x3F888B54, 0x5735151D
+
+# Coefficients for approximation to  erfc in [1.25,1/0.35]
+const ra0  = -9.86494403484714822705e-03 # 0xBF843412, 0x600D6435 
+const ra1  = -6.93858572707181764372e-01 # 0xBFE63416, 0xE4BA7360 
+const ra2  = -1.05586262253232909814e+01 # 0xC0251E04, 0x41B0E726 
+const ra3  = -6.23753324503260060396e+01 # 0xC04F300A, 0xE4CBA38D 
+const ra4  = -1.62396669462573470355e+02 # 0xC0644CB1, 0x84282266 
+const ra5  = -1.84605092906711035994e+02 # 0xC067135C, 0xEBCCABB2 
+const ra6  = -8.12874355063065934246e+01 # 0xC0545265, 0x57E4D2F2 
+const ra7  = -9.81432934416914548592e+00 # 0xC023A0EF, 0xC69AC25C 
+const sa1  =  1.96512716674392571292e+01 # 0x4033A6B9, 0xBD707687 
+const sa2  =  1.37657754143519042600e+02 # 0x4061350C, 0x526AE721 
+const sa3  =  4.34565877475229228821e+02 # 0x407B290D, 0xD58A1A71 
+const sa4  =  6.45387271733267880336e+02 # 0x40842B19, 0x21EC2868 
+const sa5  =  4.29008140027567833386e+02 # 0x407AD021, 0x57700314 
+const sa6  =  1.08635005541779435134e+02 # 0x405B28A3, 0xEE48AE2C 
+const sa7  =  6.57024977031928170135e+00 # 0x401A47EF, 0x8E484A93 
+const sa8  = -6.04244152148580987438e-02 # 0xBFAEEFF2, 0xEE749A62 
+
+# Coefficients for approximation to  erfc in [1/.35,28]
+const rb0  = -9.86494292470009928597e-03 # 0xBF843412, 0x39E86F4A
+const rb1  = -7.99283237680523006574e-01 # 0xBFE993BA, 0x70C285DE
+const rb2  = -1.77579549177547519889e+01 # 0xC031C209, 0x555F995A
+const rb3  = -1.60636384855821916062e+02 # 0xC064145D, 0x43C5ED98
+const rb4  = -6.37566443368389627722e+02 # 0xC083EC88, 0x1375F228
+const rb5  = -1.02509513161107724954e+03 # 0xC0900461, 0x6A2E5992
+const rb6  = -4.83519191608651397019e+02 # 0xC07E384E, 0x9BDC383F
+const sb1  =  3.03380607434824582924e+01 # 0x403E568B, 0x261D5190
+const sb2  =  3.25792512996573918826e+02 # 0x40745CAE, 0x221B9F0A
+const sb3  =  1.53672958608443695994e+03 # 0x409802EB, 0x189D5118
+const sb4  =  3.19985821950859553908e+03 # 0x40A8FFB7, 0x688C246A
+const sb5  =  2.55305040643316442583e+03 # 0x40A3F219, 0xCEDF3BE6
+const sb6  =  4.74528541206955367215e+02 # 0x407DA874, 0xE79FE763
+const sb7  = -2.24409524465858183362e+01 # 0xC03670E2, 0x42712D62
+
+function erfc1(x::Float64)
+    s = abs(x) - 1
+    P = @horner s pa0 pa1 pa2 pa3 pa4 pa5 pa6
+    Q = @horner s 1.0 qa1 qa2 qa3 qa4 qa5 qa6
+    return 1 - erx - P/Q
+end
+
+function erfc2(ix::UInt32, x::Float64)
+    if ix < 0x3ff40000  # |x| < 1.25
+        return erfc1(x)
+    end
+    x = abs(x) 
+    s = 1/(x*x)
+    if ix < 0x4006db6d # |x| < 1/.35 ~ 2.85714
+        R = @horner s ra0 ra1 ra2 ra3 ra4 ra5 ra6 ra7
+        S = @horner s 1.0 sa1 sa2 sa3 sa4 sa5 sa6 sa7 sa8
+    else # |x| > 1/.35
+        R = @horner s rb0 rb1 rb2 rb3 rb4 rb5 rb6
+        S = @horner s 1.0 sb1 sb2 sb3 sb4 sb5 sb6 sb7
+    end
+    z = x
+    z = lowword!(z,UInt32(0))
+    return exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S)/x
+end
+
+function erf(x::Float64)
+    ix = highword(x)
+    sign = (ix>>31) % Int32
+    ix &= 0x7fffffff
+    if ix >= 0x7ff00000 # erf(nan)=nan, erf(+-inf)=+-1
+        return 1-2*sign + 1/x
+    end
+    if ix < 0x3feb0000 # |x| < 0.84375
+        if ix < 0x3e300000 #|x| < 2**-28  avoid underflow
+            return 0.125*(8*x + efx8*x)
+        end
+        z = x*x
+        r = @horner z pp0 pp1 pp2 pp3 pp4
+        s = @horner z 1.0 qq1 qq2 qq3 qq4 qq5
+        y = r/s
+        return x + x*y
+    end
+    if ix < 0x40180000 # 0.84375 <= |x| < 6
+        y = 1 - erfc2(ix,x)
+    else
+        y = 1 - 0x1p-1022
+    end
+    return sign != 0 ? -y : y
+end
+
+function erfc(x::Float64)
+    ix = highword(x)
+    sign = (ix>>31) % Int32
+    ix &= 0x7fffffff
+    if ix >= 0x7ff00000 # erfc(nan)=nan, erfc(+-inf)=0,2
+        return 2*sign + 1/x
+    end
+    if ix < 0x3feb0000 # |x| < 0.84375
+        if ix < 0x3c700000  # |x| < 2**-56
+            return 1.0 - x
+        end
+        z = x*x
+        r = @horner z pp0 pp1 pp2 pp3 pp4
+        s = @horner z 1.0 qq1 qq2 qq3 qq4 qq5
+        y = r/s
+        if sign != 0 || ix < 0x3fd00000 # x < 1/4 
+            return 1.0 - (x+x*y)
+        end
+        return 0.5 - (x - 0.5 + x*y)
+    end
+    if ix < 0x403c0000 # 0.84375 <= |x| < 28
+        return sign != 0 ? 2 - erfc2(ix,x) : erfc2(ix,x)
+    end
+    return sign != 0 ? 2 - 0x1p-1022 : 0x1p-1022*0x1p-1022
+end
+
+end
diff --git a/src/utils.jl b/src/utils.jl
@@ -0,0 +1,12 @@
+"""
+Get the more significant 32 bit int from a double.
+Corresponds to GET_HIGH_WORD in musl
+"""
+highword(d::Float64) = (reinterpret(UInt64, d) >> 32) % UInt32
+
+"""
+Set the less significant 32 bits of the double d to the unsigned 32 bit integer lo
+Corresponds to SET_LOW_WORD in musl
+"""
+lowword!(d::Float64, lo::UInt32) = reinterpret(Float64, reinterpret(UInt64, d) & 0xffffffff00000000 | lo)
+
diff --git a/test/erf.jl b/test/erf.jl
@@ -0,0 +1,31 @@
+@testset "erf" begin
+    @test isnan(Libm.erf(NaN))
+    @test Libm.erf(Inf) == 1
+    @test Libm.erf(-Inf) == -1
+    s = linspace(-0.84375,0.84375,100)
+    @test_approx_eq Base.erf.(s) Libm.erf.(s)
+    s = linspace(-2e-28,2e-28,100)
+    @test_approx_eq Base.erf.(s) Libm.erf.(s)
+    s = linspace(-0.84375,0.84375,100)
+    @test_approx_eq Base.erf.(s) Libm.erf.(s)
+    s = linspace(0.84375, 6, 100)
+    @test_approx_eq Base.erf.(s) Libm.erf.(s)
+    s = linspace(-6,-0.84375, 100);
+    @test_approx_eq Base.erf.(s) Libm.erf.(s)
+end
+
+@testset "erfc" begin
+    @test isnan(Libm.erfc(NaN))
+    @test Libm.erfc(Inf) == 0
+    @test Libm.erfc(-Inf) == 2
+    s = linspace(-0.84375,0.84375,100)
+    @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+    s = linspace(-2e-56,2e-56,100)
+    @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+    s = linspace(-0.25,0.25,100)
+    @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+    s = linspace(0.84375, 28, 100)
+    @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+    s = linspace(-28,-0.84375, 100);
+    @test_approx_eq Base.erfc.(s) Libm.erfc.(s)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,5 +1,8 @@
 using Libm
 using Base.Test
 
-# write your own tests here
-@test 1 == 2
+@testset "Libm" begin
+
+include("erf.jl")
+
+end