only use accurate powf function

vtjnash · vtjnash · commit 81a0cc66642b · 2017-01-05T23:35:29.000-05:00
The powi intrinsic optimization over calling powf is that it is inaccurate. We don't need that. When it is equally accurate (e.g. tiny constant powers), LLVM will already recognize and optimize any call to a function named `powf`, and produce the same speedup. fix #19872
diff --git a/base/fastmath.jl b/base/fastmath.jl
@@ -23,7 +23,7 @@ module FastMath
 
 export @fastmath
 
-import Core.Intrinsics: box, unbox, powi_llvm, sqrt_llvm_fast
+import Core.Intrinsics: box, unbox, powf_llvm, sqrt_llvm_fast
 
 const fast_op =
     Dict(# basic arithmetic
@@ -250,9 +250,9 @@ end
 
 # builtins
 
-pow_fast{T<:FloatTypes}(x::T, y::Integer) = pow_fast(x, Int32(y))
-pow_fast{T<:FloatTypes}(x::T, y::Int32) =
-    box(T, Base.powi_llvm(unbox(T,x), unbox(Int32,y)))
+pow_fast{T<:FloatTypes}(x::T, y::Integer) = pow_fast(x, convert(T, y))
+pow_fast{T<:FloatTypes}(x::T, y::T) =
+    box(T, Base.powf_llvm(unbox(T,x), unbox(T,y)))
 
 # TODO: Change sqrt_llvm intrinsic to avoid nan checking; add nan
 # checking to sqrt in math.jl; remove sqrt_llvm_fast intrinsic
diff --git a/base/math.jl b/base/math.jl
@@ -31,7 +31,7 @@ import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
              significand_mask, significand_bits, exponent_bits, exponent_bias
 
 
-import Core.Intrinsics: sqrt_llvm, box, unbox, powi_llvm
+import Core.Intrinsics: sqrt_llvm, box, unbox, powf_llvm
 
 # non-type specific math functions
 
@@ -648,9 +648,9 @@ end
 ^(x::Float32, y::Float32) = nan_dom_err(ccall((:powf,libm), Float32, (Float32,Float32), x, y), x+y)
 
 ^(x::Float64, y::Integer) =
-    box(Float64, powi_llvm(unbox(Float64,x), unbox(Int32,Int32(y))))
+    box(Float64, powf_llvm(unbox(Float64,x), unbox(Float64,Float64(y))))
 ^(x::Float32, y::Integer) =
-    box(Float32, powi_llvm(unbox(Float32,x), unbox(Int32,Int32(y))))
+    box(Float32, powf_llvm(unbox(Float32,x), unbox(Float32,Float32(y))))
 ^(x::Float16, y::Integer) = Float16(Float32(x)^y)
 
 function angle_restrict_symm(theta)
diff --git a/src/codegen.cpp b/src/codegen.cpp
@@ -389,10 +389,8 @@ static Function *expect_func;
 static Function *jldlsym_func;
 static Function *jlnewbits_func;
 static Function *jltypeassert_func;
-#if JL_LLVM_VERSION < 30600
 static Function *jlpow_func;
 static Function *jlpowf_func;
-#endif
 //static Function *jlgetnthfield_func;
 static Function *jlgetnthfieldchecked_func;
 //static Function *jlsetnthfield_func;
@@ -5946,7 +5944,6 @@ static void init_julia_llvm_env(Module *m)
                          "jl_gc_diff_total_bytes", m);
     add_named_global(diff_gc_total_bytes_func, *jl_gc_diff_total_bytes);
 
-#if JL_LLVM_VERSION < 30600
     Type *powf_type[2] = { T_float32, T_float32 };
     jlpowf_func = Function::Create(FunctionType::get(T_float32, powf_type, false),
                                    Function::ExternalLinkage,
@@ -5964,7 +5961,7 @@ static void init_julia_llvm_env(Module *m)
         &pow,
 #endif
         false);
-#endif
+
     std::vector<Type*> array_owner_args(0);
     array_owner_args.push_back(T_pjlvalue);
     jlarray_data_owner_func =
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
@@ -1452,23 +1452,14 @@ static Value *emit_untyped_intrinsic(intrinsic f, Value *x, Value *y, Value *z,
                                                             ArrayRef<Type*>(x->getType())),
                                   x);
     }
-    case powi_llvm: {
+    case powf_llvm: {
         x = FP(x);
-        y = JL_INT(y);
-        Type *tx = x->getType(); // TODO: LLVM expects this to be i32
-#if JL_LLVM_VERSION >= 30600
-        Type *ts[1] = { tx };
-        Value *powi = Intrinsic::getDeclaration(jl_Module, Intrinsic::powi,
-            ArrayRef<Type*>(ts));
+        y = FP(y);
+        Function *powf = (x->getType() == T_float64 ? jlpow_func : jlpowf_func);
 #if JL_LLVM_VERSION >= 30700
-        return builder.CreateCall(powi, {x, y});
+        return builder.CreateCall(prepare_call(powf), {x, y});
 #else
-        return builder.CreateCall2(powi, x, y);
-#endif
-#else
-        // issue #6506
-        return builder.CreateCall2(prepare_call(tx == T_float64 ? jlpow_func : jlpowf_func),
-                x, builder.CreateSIToFP(y, tx));
+        return builder.CreateCall2(prepare_call(powf), x, y);
 #endif
     }
     case sqrt_llvm_fast: {
diff --git a/src/intrinsics.h b/src/intrinsics.h
@@ -92,7 +92,7 @@
     ADD_I(trunc_llvm, 1) \
     ADD_I(rint_llvm, 1) \
     ADD_I(sqrt_llvm, 1) \
-    ADD_I(powi_llvm, 2) \
+    ADD_I(powf_llvm, 2) \
     ALIAS(sqrt_llvm_fast, sqrt_llvm) \
     /*  pointer access */ \
     ADD_I(pointerref, 3) \
diff --git a/src/julia_internal.h b/src/julia_internal.h
@@ -678,7 +678,7 @@ JL_DLLEXPORT jl_value_t *jl_floor_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_trunc_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_rint_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_sqrt_llvm(jl_value_t *a);
-JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT jl_value_t *jl_powf_llvm(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
@@ -947,6 +947,8 @@ bi_iintrinsic_fast(jl_LLVMFlipSign, flipsign, flipsign_int,  )
         *pr = fp_select(a, sqrt)
 #define copysign_float(a, b) \
         fp_select2(a, b, copysign)
+#define pow_float(a, b) \
+        fp_select2(a, b, pow)
 
 un_fintrinsic(abs_float,abs_float)
 bi_fintrinsic(copysign_float,copysign_float)
@@ -955,31 +957,7 @@ un_fintrinsic(floor_float,floor_llvm)
 un_fintrinsic(trunc_float,trunc_llvm)
 un_fintrinsic(rint_float,rint_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm)
-
-JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_value_t *ty = jl_typeof(a);
-    if (!jl_is_bitstype(ty))
-        jl_error("powi_llvm: a is not a bitstype");
-    if (!jl_is_bitstype(jl_typeof(b)) || jl_datatype_size(jl_typeof(b)) != 4)
-        jl_error("powi_llvm: b is not a 32-bit bitstype");
-    int sz = jl_datatype_size(ty);
-    jl_value_t *newv = jl_gc_alloc(ptls, sz, ty);
-    void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv);
-    switch (sz) {
-    /* choose the right size c-type operation */
-    case 4:
-        *(float*)pr = powf(*(float*)pa, (float)jl_unbox_int32(b));
-        break;
-    case 8:
-        *(double*)pr = pow(*(double*)pa, (double)jl_unbox_int32(b));
-        break;
-    default:
-        jl_error("powi_llvm: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
-    }
-    return newv;
-}
+bi_fintrinsic(pow_float,powf_llvm)
 
 JL_DLLEXPORT jl_value_t *jl_select_value(jl_value_t *isfalse, jl_value_t *a, jl_value_t *b)
 {
diff --git a/test/math.jl b/test/math.jl
@@ -964,6 +964,13 @@ end
     end
 end
 
+@testset "issue #19872" begin
+    f19872(x) = x ^ 3
+    @test issubnormal(2.0 ^ (-1024))
+    @test f19872(2.0) === 8.0
+    @test !issubnormal(0.0)
+end
+
 @test Base.Math.f32(complex(1.0,1.0)) == complex(Float32(1.),Float32(1.))
 @test Base.Math.f16(complex(1.0,1.0)) == complex(Float16(1.),Float16(1.))