@@ -419,106 +419,58 @@ pub const unsafe fn swap<T>(x: *mut T, y: *mut T) {
419
419
#[ stable( feature = "swap_nonoverlapping" , since = "1.27.0" ) ]
420
420
#[ rustc_const_unstable( feature = "const_swap" , issue = "83163" ) ]
421
421
pub const unsafe fn swap_nonoverlapping < T > ( x : * mut T , y : * mut T , count : usize ) {
422
- let x = x as * mut u8 ;
423
- let y = y as * mut u8 ;
424
- let len = mem:: size_of :: < T > ( ) * count;
425
- // SAFETY: the caller must guarantee that `x` and `y` are
426
- // valid for writes and properly aligned.
427
- unsafe { swap_nonoverlapping_bytes ( x, y, len) }
428
- }
422
+ macro_rules! attempt_swap_as_chunks {
423
+ ( $ChunkTy: ty) => {
424
+ if mem:: align_of:: <T >( ) >= mem:: align_of:: <$ChunkTy>( )
425
+ && mem:: size_of:: <T >( ) % mem:: size_of:: <$ChunkTy>( ) == 0
426
+ {
427
+ let x: * mut MaybeUninit <$ChunkTy> = x. cast( ) ;
428
+ let y: * mut MaybeUninit <$ChunkTy> = y. cast( ) ;
429
+ let count = count * ( mem:: size_of:: <T >( ) / mem:: size_of:: <$ChunkTy>( ) ) ;
430
+ // SAFETY: these are the same bytes that the caller promised were
431
+ // ok, just typed as `MaybeUninit<ChunkTy>`s instead of as `T`s.
432
+ // The `if` condition above ensures that we're not violating
433
+ // alignment requirements, and that the division is exact so
434
+ // that we don't lose any bytes off the end.
435
+ return unsafe { swap_nonoverlapping_simple( x, y, count) } ;
436
+ }
437
+ } ;
438
+ }
429
439
430
- #[ inline]
431
- #[ rustc_const_unstable( feature = "const_swap" , issue = "83163" ) ]
432
- pub ( crate ) const unsafe fn swap_nonoverlapping_one < T > ( x : * mut T , y : * mut T ) {
433
- // NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
434
- // reinterpretation of values as (chunkable) byte arrays, and the loop in the
435
- // block optimization in `swap_nonoverlapping_bytes` is hard to rewrite back
436
- // into the (unoptimized) direct swapping implementation, so we disable it.
437
- // FIXME(eddyb) the block optimization also prevents MIR optimizations from
438
- // understanding `mem::replace`, `Option::take`, etc. - a better overall
439
- // solution might be to make `swap_nonoverlapping` into an intrinsic, which
440
- // a backend can choose to implement using the block optimization, or not.
441
- #[ cfg( not( target_arch = "spirv" ) ) ]
440
+ // Split up the slice into small power-of-two-sized chunks that LLVM is able
441
+ // to vectorize (unless it's a special type with more-than-pointer alignment,
442
+ // because we don't want to pessimize things like slices of SIMD vectors.)
443
+ if mem:: align_of :: < T > ( ) <= mem:: size_of :: < usize > ( )
444
+ && ( !mem:: size_of :: < T > ( ) . is_power_of_two ( )
445
+ || mem:: size_of :: < T > ( ) > mem:: size_of :: < usize > ( ) * 2 )
442
446
{
443
- // Only apply the block optimization in `swap_nonoverlapping_bytes` for types
444
- // at least as large as the block size, to avoid pessimizing codegen.
445
- if mem:: size_of :: < T > ( ) >= 32 {
446
- // SAFETY: the caller must uphold the safety contract for `swap_nonoverlapping`.
447
- unsafe { swap_nonoverlapping ( x, y, 1 ) } ;
448
- return ;
449
- }
447
+ attempt_swap_as_chunks ! ( usize ) ;
448
+ attempt_swap_as_chunks ! ( u8 ) ;
450
449
}
451
450
452
- // Direct swapping, for the cases not going through the block optimization.
453
- // SAFETY: the caller must guarantee that `x` and `y` are valid
454
- // for writes, properly aligned, and non-overlapping.
455
- unsafe {
456
- let z = read ( x) ;
457
- copy_nonoverlapping ( y, x, 1 ) ;
458
- write ( y, z) ;
459
- }
451
+ // SAFETY: Same preconditions as this function
452
+ unsafe { swap_nonoverlapping_simple ( x, y, count) }
460
453
}
461
454
455
+ /// Same behaviour and safety conditions as [`swap_nonoverlapping`]
456
+ ///
457
+ /// LLVM can vectorize this (at least it can for the power-of-two-sized types
458
+ /// `swap_nonoverlapping` tries to use) so no need to manually SIMD it.
462
459
#[ inline]
463
460
#[ rustc_const_unstable( feature = "const_swap" , issue = "83163" ) ]
464
- const unsafe fn swap_nonoverlapping_bytes ( x : * mut u8 , y : * mut u8 , len : usize ) {
465
- // The approach here is to utilize simd to swap x & y efficiently. Testing reveals
466
- // that swapping either 32 bytes or 64 bytes at a time is most efficient for Intel
467
- // Haswell E processors. LLVM is more able to optimize if we give a struct a
468
- // #[repr(simd)], even if we don't actually use this struct directly.
469
- //
470
- // FIXME repr(simd) broken on emscripten and redox
471
- #[ cfg_attr( not( any( target_os = "emscripten" , target_os = "redox" ) ) , repr( simd) ) ]
472
- struct Block ( u64 , u64 , u64 , u64 ) ;
473
- struct UnalignedBlock ( u64 , u64 , u64 , u64 ) ;
474
-
475
- let block_size = mem:: size_of :: < Block > ( ) ;
476
-
477
- // Loop through x & y, copying them `Block` at a time
478
- // The optimizer should unroll the loop fully for most types
479
- // N.B. We can't use a for loop as the `range` impl calls `mem::swap` recursively
461
+ const unsafe fn swap_nonoverlapping_simple < T > ( x : * mut T , y : * mut T , count : usize ) {
480
462
let mut i = 0 ;
481
- while i + block_size <= len {
482
- // Create some uninitialized memory as scratch space
483
- // Declaring `t` here avoids aligning the stack when this loop is unused
484
- let mut t = mem:: MaybeUninit :: < Block > :: uninit ( ) ;
485
- let t = t. as_mut_ptr ( ) as * mut u8 ;
486
-
487
- // SAFETY: As `i < len`, and as the caller must guarantee that `x` and `y` are valid
488
- // for `len` bytes, `x + i` and `y + i` must be valid addresses, which fulfills the
489
- // safety contract for `add`.
490
- //
491
- // Also, the caller must guarantee that `x` and `y` are valid for writes, properly aligned,
492
- // and non-overlapping, which fulfills the safety contract for `copy_nonoverlapping`.
493
- unsafe {
494
- let x = x. add ( i) ;
495
- let y = y. add ( i) ;
463
+ while i < count {
464
+ let x: & mut T =
465
+ // SAFETY: By precondition, `i` is in-bounds because it's below `n`
466
+ unsafe { & mut * x. add ( i) } ;
467
+ let y: & mut T =
468
+ // SAFETY: By precondition, `i` is in-bounds because it's below `n`
469
+ // and it's distinct from `x` since the ranges are non-overlapping
470
+ unsafe { & mut * y. add ( i) } ;
471
+ mem:: swap_simple ( x, y) ;
496
472
497
- // Swap a block of bytes of x & y, using t as a temporary buffer
498
- // This should be optimized into efficient SIMD operations where available
499
- copy_nonoverlapping ( x, t, block_size) ;
500
- copy_nonoverlapping ( y, x, block_size) ;
501
- copy_nonoverlapping ( t, y, block_size) ;
502
- }
503
- i += block_size;
504
- }
505
-
506
- if i < len {
507
- // Swap any remaining bytes
508
- let mut t = mem:: MaybeUninit :: < UnalignedBlock > :: uninit ( ) ;
509
- let rem = len - i;
510
-
511
- let t = t. as_mut_ptr ( ) as * mut u8 ;
512
-
513
- // SAFETY: see previous safety comment.
514
- unsafe {
515
- let x = x. add ( i) ;
516
- let y = y. add ( i) ;
517
-
518
- copy_nonoverlapping ( x, t, rem) ;
519
- copy_nonoverlapping ( y, x, rem) ;
520
- copy_nonoverlapping ( t, y, rem) ;
521
- }
473
+ i += 1 ;
522
474
}
523
475
}
524
476
0 commit comments