@@ -15,8 +15,11 @@ use gccjit::{
15
15
Type ,
16
16
UnaryOp ,
17
17
} ;
18
+ use rustc_apfloat:: { ieee, Float , Round , Status } ;
18
19
use rustc_codegen_ssa:: MemFlags ;
19
- use rustc_codegen_ssa:: common:: { AtomicOrdering , AtomicRmwBinOp , IntPredicate , RealPredicate , SynchronizationScope } ;
20
+ use rustc_codegen_ssa:: common:: {
21
+ AtomicOrdering , AtomicRmwBinOp , IntPredicate , RealPredicate , SynchronizationScope , TypeKind ,
22
+ } ;
20
23
use rustc_codegen_ssa:: mir:: operand:: { OperandRef , OperandValue } ;
21
24
use rustc_codegen_ssa:: mir:: place:: PlaceRef ;
22
25
use rustc_codegen_ssa:: traits:: {
@@ -31,6 +34,7 @@ use rustc_codegen_ssa::traits::{
31
34
StaticBuilderMethods ,
32
35
} ;
33
36
use rustc_data_structures:: fx:: FxHashSet ;
37
+ use rustc_middle:: bug;
34
38
use rustc_middle:: ty:: { ParamEnv , Ty , TyCtxt } ;
35
39
use rustc_middle:: ty:: layout:: { FnAbiError , FnAbiOfHelpers , FnAbiRequest , HasParamEnv , HasTyCtxt , LayoutError , LayoutOfHelpers , TyAndLayout } ;
36
40
use rustc_span:: Span ;
@@ -1271,12 +1275,12 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
1271
1275
val
1272
1276
}
1273
1277
1274
- fn fptoui_sat ( & mut self , _val : RValue < ' gcc > , _dest_ty : Type < ' gcc > ) -> Option < RValue < ' gcc > > {
1275
- None
1278
+ fn fptoui_sat ( & mut self , val : RValue < ' gcc > , dest_ty : Type < ' gcc > ) -> RValue < ' gcc > {
1279
+ self . fptoint_sat ( false , val , dest_ty )
1276
1280
}
1277
1281
1278
- fn fptosi_sat ( & mut self , _val : RValue < ' gcc > , _dest_ty : Type < ' gcc > ) -> Option < RValue < ' gcc > > {
1279
- None
1282
+ fn fptosi_sat ( & mut self , val : RValue < ' gcc > , dest_ty : Type < ' gcc > ) -> RValue < ' gcc > {
1283
+ self . fptoint_sat ( true , val , dest_ty )
1280
1284
}
1281
1285
1282
1286
fn instrprof_increment ( & mut self , _fn_name : RValue < ' gcc > , _hash : RValue < ' gcc > , _num_counters : RValue < ' gcc > , _index : RValue < ' gcc > ) {
@@ -1285,6 +1289,166 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
1285
1289
}
1286
1290
1287
1291
impl < ' a , ' gcc , ' tcx > Builder < ' a , ' gcc , ' tcx > {
1292
+ fn fptoint_sat ( & mut self , signed : bool , val : RValue < ' gcc > , dest_ty : Type < ' gcc > ) -> RValue < ' gcc > {
1293
+ let src_ty = self . cx . val_ty ( val) ;
1294
+ let ( float_ty, int_ty) = if self . cx . type_kind ( src_ty) == TypeKind :: Vector {
1295
+ assert_eq ! ( self . cx. vector_length( src_ty) , self . cx. vector_length( dest_ty) ) ;
1296
+ ( self . cx . element_type ( src_ty) , self . cx . element_type ( dest_ty) )
1297
+ } else {
1298
+ ( src_ty, dest_ty)
1299
+ } ;
1300
+
1301
+ // FIXME(jistone): the following was originally the fallback SSA implementation, before LLVM 13
1302
+ // added native `fptosi.sat` and `fptoui.sat` conversions, but it was used by GCC as well.
1303
+ // Now that LLVM always relies on its own, the code has been moved to GCC, but the comments are
1304
+ // still LLVM-specific. This should be updated, and use better GCC specifics if possible.
1305
+
1306
+ let int_width = self . cx . int_width ( int_ty) ;
1307
+ let float_width = self . cx . float_width ( float_ty) ;
1308
+ // LLVM's fpto[su]i returns undef when the input val is infinite, NaN, or does not fit into the
1309
+ // destination integer type after rounding towards zero. This `undef` value can cause UB in
1310
+ // safe code (see issue #10184), so we implement a saturating conversion on top of it:
1311
+ // Semantically, the mathematical value of the input is rounded towards zero to the next
1312
+ // mathematical integer, and then the result is clamped into the range of the destination
1313
+ // integer type. Positive and negative infinity are mapped to the maximum and minimum value of
1314
+ // the destination integer type. NaN is mapped to 0.
1315
+ //
1316
+ // Define f_min and f_max as the largest and smallest (finite) floats that are exactly equal to
1317
+ // a value representable in int_ty.
1318
+ // They are exactly equal to int_ty::{MIN,MAX} if float_ty has enough significand bits.
1319
+ // Otherwise, int_ty::MAX must be rounded towards zero, as it is one less than a power of two.
1320
+ // int_ty::MIN, however, is either zero or a negative power of two and is thus exactly
1321
+ // representable. Note that this only works if float_ty's exponent range is sufficiently large.
1322
+ // f16 or 256 bit integers would break this property. Right now the smallest float type is f32
1323
+ // with exponents ranging up to 127, which is barely enough for i128::MIN = -2^127.
1324
+ // On the other hand, f_max works even if int_ty::MAX is greater than float_ty::MAX. Because
1325
+ // we're rounding towards zero, we just get float_ty::MAX (which is always an integer).
1326
+ // This already happens today with u128::MAX = 2^128 - 1 > f32::MAX.
1327
+ let int_max = |signed : bool , int_width : u64 | -> u128 {
1328
+ let shift_amount = 128 - int_width;
1329
+ if signed { i128:: MAX as u128 >> shift_amount } else { u128:: MAX >> shift_amount }
1330
+ } ;
1331
+ let int_min = |signed : bool , int_width : u64 | -> i128 {
1332
+ if signed { i128:: MIN >> ( 128 - int_width) } else { 0 }
1333
+ } ;
1334
+
1335
+ let compute_clamp_bounds_single = |signed : bool , int_width : u64 | -> ( u128 , u128 ) {
1336
+ let rounded_min =
1337
+ ieee:: Single :: from_i128_r ( int_min ( signed, int_width) , Round :: TowardZero ) ;
1338
+ assert_eq ! ( rounded_min. status, Status :: OK ) ;
1339
+ let rounded_max =
1340
+ ieee:: Single :: from_u128_r ( int_max ( signed, int_width) , Round :: TowardZero ) ;
1341
+ assert ! ( rounded_max. value. is_finite( ) ) ;
1342
+ ( rounded_min. value . to_bits ( ) , rounded_max. value . to_bits ( ) )
1343
+ } ;
1344
+ let compute_clamp_bounds_double = |signed : bool , int_width : u64 | -> ( u128 , u128 ) {
1345
+ let rounded_min =
1346
+ ieee:: Double :: from_i128_r ( int_min ( signed, int_width) , Round :: TowardZero ) ;
1347
+ assert_eq ! ( rounded_min. status, Status :: OK ) ;
1348
+ let rounded_max =
1349
+ ieee:: Double :: from_u128_r ( int_max ( signed, int_width) , Round :: TowardZero ) ;
1350
+ assert ! ( rounded_max. value. is_finite( ) ) ;
1351
+ ( rounded_min. value . to_bits ( ) , rounded_max. value . to_bits ( ) )
1352
+ } ;
1353
+ // To implement saturation, we perform the following steps:
1354
+ //
1355
+ // 1. Cast val to an integer with fpto[su]i. This may result in undef.
1356
+ // 2. Compare val to f_min and f_max, and use the comparison results to select:
1357
+ // a) int_ty::MIN if val < f_min or val is NaN
1358
+ // b) int_ty::MAX if val > f_max
1359
+ // c) the result of fpto[su]i otherwise
1360
+ // 3. If val is NaN, return 0.0, otherwise return the result of step 2.
1361
+ //
1362
+ // This avoids resulting undef because values in range [f_min, f_max] by definition fit into the
1363
+ // destination type. It creates an undef temporary, but *producing* undef is not UB. Our use of
1364
+ // undef does not introduce any non-determinism either.
1365
+ // More importantly, the above procedure correctly implements saturating conversion.
1366
+ // Proof (sketch):
1367
+ // If val is NaN, 0 is returned by definition.
1368
+ // Otherwise, val is finite or infinite and thus can be compared with f_min and f_max.
1369
+ // This yields three cases to consider:
1370
+ // (1) if val in [f_min, f_max], the result of fpto[su]i is returned, which agrees with
1371
+ // saturating conversion for inputs in that range.
1372
+ // (2) if val > f_max, then val is larger than int_ty::MAX. This holds even if f_max is rounded
1373
+ // (i.e., if f_max < int_ty::MAX) because in those cases, nextUp(f_max) is already larger
1374
+ // than int_ty::MAX. Because val is larger than int_ty::MAX, the return value of int_ty::MAX
1375
+ // is correct.
1376
+ // (3) if val < f_min, then val is smaller than int_ty::MIN. As shown earlier, f_min exactly equals
1377
+ // int_ty::MIN and therefore the return value of int_ty::MIN is correct.
1378
+ // QED.
1379
+
1380
+ let float_bits_to_llval = |bx : & mut Self , bits| {
1381
+ let bits_llval = match float_width {
1382
+ 32 => bx. cx ( ) . const_u32 ( bits as u32 ) ,
1383
+ 64 => bx. cx ( ) . const_u64 ( bits as u64 ) ,
1384
+ n => bug ! ( "unsupported float width {}" , n) ,
1385
+ } ;
1386
+ bx. bitcast ( bits_llval, float_ty)
1387
+ } ;
1388
+ let ( f_min, f_max) = match float_width {
1389
+ 32 => compute_clamp_bounds_single ( signed, int_width) ,
1390
+ 64 => compute_clamp_bounds_double ( signed, int_width) ,
1391
+ n => bug ! ( "unsupported float width {}" , n) ,
1392
+ } ;
1393
+ let f_min = float_bits_to_llval ( self , f_min) ;
1394
+ let f_max = float_bits_to_llval ( self , f_max) ;
1395
+ let int_max = self . cx . const_uint_big ( int_ty, int_max ( signed, int_width) ) ;
1396
+ let int_min = self . cx . const_uint_big ( int_ty, int_min ( signed, int_width) as u128 ) ;
1397
+ let zero = self . cx . const_uint ( int_ty, 0 ) ;
1398
+
1399
+ // If we're working with vectors, constants must be "splatted": the constant is duplicated
1400
+ // into each lane of the vector. The algorithm stays the same, we are just using the
1401
+ // same constant across all lanes.
1402
+ let maybe_splat = |bx : & mut Self , val| {
1403
+ if bx. cx ( ) . type_kind ( dest_ty) == TypeKind :: Vector {
1404
+ bx. vector_splat ( bx. vector_length ( dest_ty) , val)
1405
+ } else {
1406
+ val
1407
+ }
1408
+ } ;
1409
+ let f_min = maybe_splat ( self , f_min) ;
1410
+ let f_max = maybe_splat ( self , f_max) ;
1411
+ let int_max = maybe_splat ( self , int_max) ;
1412
+ let int_min = maybe_splat ( self , int_min) ;
1413
+ let zero = maybe_splat ( self , zero) ;
1414
+
1415
+ // Step 1 ...
1416
+ let fptosui_result = if signed { self . fptosi ( val, dest_ty) } else { self . fptoui ( val, dest_ty) } ;
1417
+ let less_or_nan = self . fcmp ( RealPredicate :: RealULT , val, f_min) ;
1418
+ let greater = self . fcmp ( RealPredicate :: RealOGT , val, f_max) ;
1419
+
1420
+ // Step 2: We use two comparisons and two selects, with %s1 being the
1421
+ // result:
1422
+ // %less_or_nan = fcmp ult %val, %f_min
1423
+ // %greater = fcmp olt %val, %f_max
1424
+ // %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
1425
+ // %s1 = select %greater, int_ty::MAX, %s0
1426
+ // Note that %less_or_nan uses an *unordered* comparison. This
1427
+ // comparison is true if the operands are not comparable (i.e., if val is
1428
+ // NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
1429
+ // val is NaN.
1430
+ //
1431
+ // Performance note: Unordered comparison can be lowered to a "flipped"
1432
+ // comparison and a negation, and the negation can be merged into the
1433
+ // select. Therefore, it not necessarily any more expensive than an
1434
+ // ordered ("normal") comparison. Whether these optimizations will be
1435
+ // performed is ultimately up to the backend, but at least x86 does
1436
+ // perform them.
1437
+ let s0 = self . select ( less_or_nan, int_min, fptosui_result) ;
1438
+ let s1 = self . select ( greater, int_max, s0) ;
1439
+
1440
+ // Step 3: NaN replacement.
1441
+ // For unsigned types, the above step already yielded int_ty::MIN == 0 if val is NaN.
1442
+ // Therefore we only need to execute this step for signed integer types.
1443
+ if signed {
1444
+ // LLVM has no isNaN predicate, so we use (val == val) instead
1445
+ let cmp = self . fcmp ( RealPredicate :: RealOEQ , val, val) ;
1446
+ self . select ( cmp, s1, zero)
1447
+ } else {
1448
+ s1
1449
+ }
1450
+ }
1451
+
1288
1452
#[ cfg( feature="master" ) ]
1289
1453
pub fn shuffle_vector ( & mut self , v1 : RValue < ' gcc > , v2 : RValue < ' gcc > , mask : RValue < ' gcc > ) -> RValue < ' gcc > {
1290
1454
let struct_type = mask. get_type ( ) . is_struct ( ) . expect ( "mask of struct type" ) ;
0 commit comments