Move the cast_float_to_int fallback code to GCC

cuviper · cuviper · commit 147032a61896 · 2022-08-16T15:46:17.000-07:00
Now that we require at least LLVM 13, that codegen backend is always
using its intrinsic `fptosi.sat` and `fptoui.sat` conversions, so it
doesn't need the manual implementation. However, the GCC backend still
needs it, so we can move all of that code down there.
diff --git a/Cargo.lock b/Cargo.lock
@@ -3721,7 +3721,6 @@ dependencies = [
  "object 0.29.0",
  "pathdiff",
  "regex",
- "rustc_apfloat",
  "rustc_arena",
  "rustc_ast",
  "rustc_attr",
diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -15,8 +15,11 @@ use gccjit::{
     Type,
     UnaryOp,
 };
+use rustc_apfloat::{ieee, Float, Round, Status};
 use rustc_codegen_ssa::MemFlags;
-use rustc_codegen_ssa::common::{AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope};
+use rustc_codegen_ssa::common::{
+    AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope, TypeKind,
+};
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::{
@@ -31,6 +34,7 @@ use rustc_codegen_ssa::traits::{
     StaticBuilderMethods,
 };
 use rustc_data_structures::fx::FxHashSet;
+use rustc_middle::bug;
 use rustc_middle::ty::{ParamEnv, Ty, TyCtxt};
 use rustc_middle::ty::layout::{FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasParamEnv, HasTyCtxt, LayoutError, LayoutOfHelpers, TyAndLayout};
 use rustc_span::Span;
@@ -1271,12 +1275,12 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         val
     }
 
-    fn fptoui_sat(&mut self, _val: RValue<'gcc>, _dest_ty: Type<'gcc>) -> Option<RValue<'gcc>> {
-        None
+    fn fptoui_sat(&mut self, val: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        self.fptoint_sat(false, val, dest_ty)
     }
 
-    fn fptosi_sat(&mut self, _val: RValue<'gcc>, _dest_ty: Type<'gcc>) -> Option<RValue<'gcc>> {
-        None
+    fn fptosi_sat(&mut self, val: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        self.fptoint_sat(true, val, dest_ty)
     }
 
     fn instrprof_increment(&mut self, _fn_name: RValue<'gcc>, _hash: RValue<'gcc>, _num_counters: RValue<'gcc>, _index: RValue<'gcc>) {
@@ -1285,6 +1289,166 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 }
 
 impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
+    fn fptoint_sat(&mut self, signed: bool, val: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        let src_ty = self.cx.val_ty(val);
+        let (float_ty, int_ty) = if self.cx.type_kind(src_ty) == TypeKind::Vector {
+            assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
+            (self.cx.element_type(src_ty), self.cx.element_type(dest_ty))
+        } else {
+            (src_ty, dest_ty)
+        };
+
+        // FIXME(jistone): the following was originally the fallback SSA implementation, before LLVM 13
+        // added native `fptosi.sat` and `fptoui.sat` conversions, but it was used by GCC as well.
+        // Now that LLVM always relies on its own, the code has been moved to GCC, but the comments are
+        // still LLVM-specific. This should be updated, and use better GCC specifics if possible.
+
+        let int_width = self.cx.int_width(int_ty);
+        let float_width = self.cx.float_width(float_ty);
+        // LLVM's fpto[su]i returns undef when the input val is infinite, NaN, or does not fit into the
+        // destination integer type after rounding towards zero. This `undef` value can cause UB in
+        // safe code (see issue #10184), so we implement a saturating conversion on top of it:
+        // Semantically, the mathematical value of the input is rounded towards zero to the next
+        // mathematical integer, and then the result is clamped into the range of the destination
+        // integer type. Positive and negative infinity are mapped to the maximum and minimum value of
+        // the destination integer type. NaN is mapped to 0.
+        //
+        // Define f_min and f_max as the largest and smallest (finite) floats that are exactly equal to
+        // a value representable in int_ty.
+        // They are exactly equal to int_ty::{MIN,MAX} if float_ty has enough significand bits.
+        // Otherwise, int_ty::MAX must be rounded towards zero, as it is one less than a power of two.
+        // int_ty::MIN, however, is either zero or a negative power of two and is thus exactly
+        // representable. Note that this only works if float_ty's exponent range is sufficiently large.
+        // f16 or 256 bit integers would break this property. Right now the smallest float type is f32
+        // with exponents ranging up to 127, which is barely enough for i128::MIN = -2^127.
+        // On the other hand, f_max works even if int_ty::MAX is greater than float_ty::MAX. Because
+        // we're rounding towards zero, we just get float_ty::MAX (which is always an integer).
+        // This already happens today with u128::MAX = 2^128 - 1 > f32::MAX.
+        let int_max = |signed: bool, int_width: u64| -> u128 {
+            let shift_amount = 128 - int_width;
+            if signed { i128::MAX as u128 >> shift_amount } else { u128::MAX >> shift_amount }
+        };
+        let int_min = |signed: bool, int_width: u64| -> i128 {
+            if signed { i128::MIN >> (128 - int_width) } else { 0 }
+        };
+
+        let compute_clamp_bounds_single = |signed: bool, int_width: u64| -> (u128, u128) {
+            let rounded_min =
+                ieee::Single::from_i128_r(int_min(signed, int_width), Round::TowardZero);
+            assert_eq!(rounded_min.status, Status::OK);
+            let rounded_max =
+                ieee::Single::from_u128_r(int_max(signed, int_width), Round::TowardZero);
+            assert!(rounded_max.value.is_finite());
+            (rounded_min.value.to_bits(), rounded_max.value.to_bits())
+        };
+        let compute_clamp_bounds_double = |signed: bool, int_width: u64| -> (u128, u128) {
+            let rounded_min =
+                ieee::Double::from_i128_r(int_min(signed, int_width), Round::TowardZero);
+            assert_eq!(rounded_min.status, Status::OK);
+            let rounded_max =
+                ieee::Double::from_u128_r(int_max(signed, int_width), Round::TowardZero);
+            assert!(rounded_max.value.is_finite());
+            (rounded_min.value.to_bits(), rounded_max.value.to_bits())
+        };
+        // To implement saturation, we perform the following steps:
+        //
+        // 1. Cast val to an integer with fpto[su]i. This may result in undef.
+        // 2. Compare val to f_min and f_max, and use the comparison results to select:
+        //  a) int_ty::MIN if val < f_min or val is NaN
+        //  b) int_ty::MAX if val > f_max
+        //  c) the result of fpto[su]i otherwise
+        // 3. If val is NaN, return 0.0, otherwise return the result of step 2.
+        //
+        // This avoids resulting undef because values in range [f_min, f_max] by definition fit into the
+        // destination type. It creates an undef temporary, but *producing* undef is not UB. Our use of
+        // undef does not introduce any non-determinism either.
+        // More importantly, the above procedure correctly implements saturating conversion.
+        // Proof (sketch):
+        // If val is NaN, 0 is returned by definition.
+        // Otherwise, val is finite or infinite and thus can be compared with f_min and f_max.
+        // This yields three cases to consider:
+        // (1) if val in [f_min, f_max], the result of fpto[su]i is returned, which agrees with
+        //     saturating conversion for inputs in that range.
+        // (2) if val > f_max, then val is larger than int_ty::MAX. This holds even if f_max is rounded
+        //     (i.e., if f_max < int_ty::MAX) because in those cases, nextUp(f_max) is already larger
+        //     than int_ty::MAX. Because val is larger than int_ty::MAX, the return value of int_ty::MAX
+        //     is correct.
+        // (3) if val < f_min, then val is smaller than int_ty::MIN. As shown earlier, f_min exactly equals
+        //     int_ty::MIN and therefore the return value of int_ty::MIN is correct.
+        // QED.
+
+        let float_bits_to_llval = |bx: &mut Self, bits| {
+            let bits_llval = match float_width {
+                32 => bx.cx().const_u32(bits as u32),
+                64 => bx.cx().const_u64(bits as u64),
+                n => bug!("unsupported float width {}", n),
+            };
+            bx.bitcast(bits_llval, float_ty)
+        };
+        let (f_min, f_max) = match float_width {
+            32 => compute_clamp_bounds_single(signed, int_width),
+            64 => compute_clamp_bounds_double(signed, int_width),
+            n => bug!("unsupported float width {}", n),
+        };
+        let f_min = float_bits_to_llval(self, f_min);
+        let f_max = float_bits_to_llval(self, f_max);
+        let int_max = self.cx.const_uint_big(int_ty, int_max(signed, int_width));
+        let int_min = self.cx.const_uint_big(int_ty, int_min(signed, int_width) as u128);
+        let zero = self.cx.const_uint(int_ty, 0);
+
+        // If we're working with vectors, constants must be "splatted": the constant is duplicated
+        // into each lane of the vector.  The algorithm stays the same, we are just using the
+        // same constant across all lanes.
+        let maybe_splat = |bx: &mut Self, val| {
+            if bx.cx().type_kind(dest_ty) == TypeKind::Vector {
+                bx.vector_splat(bx.vector_length(dest_ty), val)
+            } else {
+                val
+            }
+        };
+        let f_min = maybe_splat(self, f_min);
+        let f_max = maybe_splat(self, f_max);
+        let int_max = maybe_splat(self, int_max);
+        let int_min = maybe_splat(self, int_min);
+        let zero = maybe_splat(self, zero);
+
+        // Step 1 ...
+        let fptosui_result = if signed { self.fptosi(val, dest_ty) } else { self.fptoui(val, dest_ty) };
+        let less_or_nan = self.fcmp(RealPredicate::RealULT, val, f_min);
+        let greater = self.fcmp(RealPredicate::RealOGT, val, f_max);
+
+        // Step 2: We use two comparisons and two selects, with %s1 being the
+        // result:
+        //     %less_or_nan = fcmp ult %val, %f_min
+        //     %greater = fcmp olt %val, %f_max
+        //     %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
+        //     %s1 = select %greater, int_ty::MAX, %s0
+        // Note that %less_or_nan uses an *unordered* comparison. This
+        // comparison is true if the operands are not comparable (i.e., if val is
+        // NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
+        // val is NaN.
+        //
+        // Performance note: Unordered comparison can be lowered to a "flipped"
+        // comparison and a negation, and the negation can be merged into the
+        // select. Therefore, it not necessarily any more expensive than an
+        // ordered ("normal") comparison. Whether these optimizations will be
+        // performed is ultimately up to the backend, but at least x86 does
+        // perform them.
+        let s0 = self.select(less_or_nan, int_min, fptosui_result);
+        let s1 = self.select(greater, int_max, s0);
+
+        // Step 3: NaN replacement.
+        // For unsigned types, the above step already yielded int_ty::MIN == 0 if val is NaN.
+        // Therefore we only need to execute this step for signed integer types.
+        if signed {
+            // LLVM has no isNaN predicate, so we use (val == val) instead
+            let cmp = self.fcmp(RealPredicate::RealOEQ, val, val);
+            self.select(cmp, s1, zero)
+        } else {
+            s1
+        }
+    }
+
     #[cfg(feature="master")]
     pub fn shuffle_vector(&mut self, v1: RValue<'gcc>, v2: RValue<'gcc>, mask: RValue<'gcc>) -> RValue<'gcc> {
         let struct_type = mask.get_type().is_struct().expect("mask of struct type");
diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -19,6 +19,7 @@
 #![warn(rust_2018_idioms)]
 #![warn(unused_lifetimes)]
 
+extern crate rustc_apfloat;
 extern crate rustc_ast;
 extern crate rustc_codegen_ssa;
 extern crate rustc_data_structures;
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -725,11 +725,11 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unsafe { llvm::LLVMBuildSExt(self.llbuilder, val, dest_ty, UNNAMED) }
     }
 
-    fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
+    fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
         self.fptoint_sat(false, val, dest_ty)
     }
 
-    fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
+    fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
         self.fptoint_sat(true, val, dest_ty)
     }
 
@@ -1429,12 +1429,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         }
     }
 
-    fn fptoint_sat(
-        &mut self,
-        signed: bool,
-        val: &'ll Value,
-        dest_ty: &'ll Type,
-    ) -> Option<&'ll Value> {
+    fn fptoint_sat(&mut self, signed: bool, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
         let src_ty = self.cx.val_ty(val);
         let (float_ty, int_ty, vector_length) = if self.cx.type_kind(src_ty) == TypeKind::Vector {
             assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
@@ -1459,7 +1454,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
             format!("llvm.{}.sat.i{}.f{}", instr, int_width, float_width)
         };
         let f = self.declare_cfn(&name, llvm::UnnamedAddr::No, self.type_func(&[src_ty], dest_ty));
-        Some(self.call(self.type_func(&[src_ty], dest_ty), f, &[val], None))
+        self.call(self.type_func(&[src_ty], dest_ty), f, &[val], None)
     }
 
     pub(crate) fn landing_pad(
diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml
@@ -26,7 +26,6 @@ rustc_arena = { path = "../rustc_arena" }
 rustc_ast = { path = "../rustc_ast" }
 rustc_span = { path = "../rustc_span" }
 rustc_middle = { path = "../rustc_middle" }
-rustc_apfloat = { path = "../rustc_apfloat" }
 rustc_attr = { path = "../rustc_attr" }
 rustc_symbol_mangling = { path = "../rustc_symbol_mangling" }
 rustc_data_structures = { path = "../rustc_data_structures" }
diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs

Original file line number	Diff line number	Diff line change
`@@ -725,11 +725,11 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {`
`725`	`725`	`unsafe { llvm::LLVMBuildSExt(self.llbuilder, val, dest_ty, UNNAMED) }`
`726`	`726`	`}`
`727`	`727`
`728`		`- fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {`
	`728`	`+ fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {`
`729`	`729`	`self.fptoint_sat(false, val, dest_ty)`
`730`	`730`	`}`
`731`	`731`
`732`		`- fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {`
	`732`	`+ fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {`
`733`	`733`	`self.fptoint_sat(true, val, dest_ty)`
`734`	`734`	`}`
`735`	`735`
`@@ -1429,12 +1429,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {`
`1429`	`1429`	`}`
`1430`	`1430`	`}`
`1431`	`1431`
`1432`		`- fn fptoint_sat(`
`1433`		`- &mut self,`
`1434`		`- signed: bool,`
`1435`		`- val: &'ll Value,`
`1436`		`- dest_ty: &'ll Type,`
`1437`		`- ) -> Option<&'ll Value> {`
	`1432`	`+ fn fptoint_sat(&mut self, signed: bool, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {`
`1438`	`1433`	`let src_ty = self.cx.val_ty(val);`
`1439`	`1434`	`let (float_ty, int_ty, vector_length) = if self.cx.type_kind(src_ty) == TypeKind::Vector {`
`1440`	`1435`	`assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));`
`@@ -1459,7 +1454,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {`
`1459`	`1454`	`format!("llvm.{}.sat.i{}.f{}", instr, int_width, float_width)`
`1460`	`1455`	`};`
`1461`	`1456`	`let f = self.declare_cfn(&name, llvm::UnnamedAddr::No, self.type_func(&[src_ty], dest_ty));`
`1462`		`- Some(self.call(self.type_func(&[src_ty], dest_ty), f, &[val], None))`
	`1457`	`+ self.call(self.type_func(&[src_ty], dest_ty), f, &[val], None)`
`1463`	`1458`	`}`
`1464`	`1459`
`1465`	`1460`	`pub(crate) fn landing_pad(`