Skip to content

Commit 0c95c8d

Browse files
committed
Auto merge of rust-lang#135408 - RalfJung:x86-sse2, r=<try>
x86: make SSE2 required for i686 hardfloat targets and use it to pass SIMD types The primary goal of this is to make SSE2 *required* for our i686 targets (at least for the ones that use Pentium 4 as their baseline), to ensure they cannot be affected by rust-lang#114479. This has been MCPd in rust-lang/compiler-team#808, and is tracked in rust-lang#133611. We do this by defining a new ABI that these targets select, and making SSE2 required by the ABI (that's the first commit). That's kind of a hack, but (a) it is the easiest way to make a target feature required via the target spec, and (b) we actually *can* use SSE2 for the Rust ABI now that it is required, so the second commit goes ahead and does that. Specifically, we use it in two ways: to return `f64` values in a register rather than by-ptr, and to pass vectors of size up to 128bit in a register (or, well, whatever LLVM does when passing `<4 x float>` by-val, I don't actually know if this ends up in a register). Cc `@workingjubilee` Fixes rust-lang#133611 try-job: aarch64-apple try-job: aarch64-gnu try-job: aarch64-gnu-debug try-job: test-various try-job: x86_64-gnu-nopt try-job: dist-i586-gnu-i586-i686-musl
2 parents 6dfeab5 + c0ff7be commit 0c95c8d

34 files changed

+269
-111
lines changed

compiler/rustc_target/src/callconv/mod.rs

+74-44
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use rustc_abi::{
77
};
88
use rustc_macros::HashStable_Generic;
99

10-
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
10+
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi};
1111

1212
mod aarch64;
1313
mod amdgpu;
@@ -386,6 +386,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
386386
/// Pass this argument directly instead. Should NOT be used!
387387
/// Only exists because of past ABI mistakes that will take time to fix
388388
/// (see <https://github.com/rust-lang/rust/issues/115666>).
389+
#[track_caller]
389390
pub fn make_direct_deprecated(&mut self) {
390391
match self.mode {
391392
PassMode::Indirect { .. } => {
@@ -398,6 +399,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
398399

399400
/// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead.
400401
/// This is valid for both sized and unsized arguments.
402+
#[track_caller]
401403
pub fn make_indirect(&mut self) {
402404
match self.mode {
403405
PassMode::Direct(_) | PassMode::Pair(_, _) => {
@@ -412,6 +414,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
412414

413415
/// Same as `make_indirect`, but for arguments that are ignored. Only needed for ABIs that pass
414416
/// ZSTs indirectly.
417+
#[track_caller]
415418
pub fn make_indirect_from_ignore(&mut self) {
416419
match self.mode {
417420
PassMode::Ignore => {
@@ -716,27 +719,46 @@ impl<'a, Ty> FnAbi<'a, Ty> {
716719
C: HasDataLayout + HasTargetSpec,
717720
{
718721
let spec = cx.target_spec();
719-
match &spec.arch[..] {
722+
match &*spec.arch {
720723
"x86" => x86::compute_rust_abi_info(cx, self, abi),
721724
"riscv32" | "riscv64" => riscv::compute_rust_abi_info(cx, self, abi),
722725
"loongarch64" => loongarch::compute_rust_abi_info(cx, self, abi),
723726
"aarch64" => aarch64::compute_rust_abi_info(cx, self),
724727
_ => {}
725728
};
726729

730+
// Decides whether we can pass the given SIMD argument via `PassMode::Direct`.
731+
// May only return `true` if the target will always pass those arguments the same way,
732+
// no matter what the user does with `-Ctarget-feature`! In other words, whatever
733+
// target features are required to pass a SIMD value in registers must be listed in
734+
// the `abi_required_features` for the current target and ABI.
735+
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
736+
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
737+
// to 128-bit-sized vectors.
738+
"x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128,
739+
"x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => {
740+
arg.layout.size.bits() <= 128
741+
}
742+
// So far, we haven't implemented this logic for any other target.
743+
_ => false,
744+
};
745+
727746
for (arg_idx, arg) in self
728747
.args
729748
.iter_mut()
730749
.enumerate()
731750
.map(|(idx, arg)| (Some(idx), arg))
732751
.chain(iter::once((None, &mut self.ret)))
733752
{
734-
if arg.is_ignore() {
753+
// If the logic above already picked a specific type to cast the argument to, leave that
754+
// in place.
755+
if matches!(arg.mode, PassMode::Ignore | PassMode::Cast { .. }) {
735756
continue;
736757
}
737758

738759
if arg_idx.is_none()
739760
&& arg.layout.size > Primitive::Pointer(AddressSpace::DATA).size(cx) * 2
761+
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
740762
{
741763
// Return values larger than 2 registers using a return area
742764
// pointer. LLVM and Cranelift disagree about how to return
@@ -746,7 +768,8 @@ impl<'a, Ty> FnAbi<'a, Ty> {
746768
// return value independently and decide to pass it in a
747769
// register or not, which would result in the return value
748770
// being passed partially in registers and partially through a
749-
// return area pointer.
771+
// return area pointer. For large IR-level values such as `i128`,
772+
// cranelift will even split up the value into smaller chunks.
750773
//
751774
// While Cranelift may need to be fixed as the LLVM behavior is
752775
// generally more correct with respect to the surface language,
@@ -776,53 +799,60 @@ impl<'a, Ty> FnAbi<'a, Ty> {
776799
// rustc_target already ensure any return value which doesn't
777800
// fit in the available amount of return registers is passed in
778801
// the right way for the current target.
802+
//
803+
// The adjustment is not necessary nor desired for types with a vector
804+
// representation; those are handled below.
779805
arg.make_indirect();
780806
continue;
781807
}
782808

783809
match arg.layout.backend_repr {
784-
BackendRepr::Memory { .. } => {}
785-
786-
// This is a fun case! The gist of what this is doing is
787-
// that we want callers and callees to always agree on the
788-
// ABI of how they pass SIMD arguments. If we were to *not*
789-
// make these arguments indirect then they'd be immediates
790-
// in LLVM, which means that they'd used whatever the
791-
// appropriate ABI is for the callee and the caller. That
792-
// means, for example, if the caller doesn't have AVX
793-
// enabled but the callee does, then passing an AVX argument
794-
// across this boundary would cause corrupt data to show up.
795-
//
796-
// This problem is fixed by unconditionally passing SIMD
797-
// arguments through memory between callers and callees
798-
// which should get them all to agree on ABI regardless of
799-
// target feature sets. Some more information about this
800-
// issue can be found in #44367.
801-
//
802-
// Note that the intrinsic ABI is exempt here as
803-
// that's how we connect up to LLVM and it's unstable
804-
// anyway, we control all calls to it in libstd.
805-
BackendRepr::Vector { .. }
806-
if abi != ExternAbi::RustIntrinsic && spec.simd_types_indirect =>
807-
{
808-
arg.make_indirect();
809-
continue;
810+
BackendRepr::Memory { .. } => {
811+
// Compute `Aggregate` ABI.
812+
813+
let is_indirect_not_on_stack =
814+
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
815+
assert!(is_indirect_not_on_stack);
816+
817+
let size = arg.layout.size;
818+
if arg.layout.is_sized()
819+
&& size <= Primitive::Pointer(AddressSpace::DATA).size(cx)
820+
{
821+
// We want to pass small aggregates as immediates, but using
822+
// an LLVM aggregate type for this leads to bad optimizations,
823+
// so we pick an appropriately sized integer type instead.
824+
arg.cast_to(Reg { kind: RegKind::Integer, size });
825+
}
810826
}
811827

812-
_ => continue,
813-
}
814-
// Compute `Aggregate` ABI.
815-
816-
let is_indirect_not_on_stack =
817-
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
818-
assert!(is_indirect_not_on_stack);
819-
820-
let size = arg.layout.size;
821-
if !arg.layout.is_unsized() && size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
822-
// We want to pass small aggregates as immediates, but using
823-
// an LLVM aggregate type for this leads to bad optimizations,
824-
// so we pick an appropriately sized integer type instead.
825-
arg.cast_to(Reg { kind: RegKind::Integer, size });
828+
BackendRepr::Vector { .. } => {
829+
// This is a fun case! The gist of what this is doing is
830+
// that we want callers and callees to always agree on the
831+
// ABI of how they pass SIMD arguments. If we were to *not*
832+
// make these arguments indirect then they'd be immediates
833+
// in LLVM, which means that they'd used whatever the
834+
// appropriate ABI is for the callee and the caller. That
835+
// means, for example, if the caller doesn't have AVX
836+
// enabled but the callee does, then passing an AVX argument
837+
// across this boundary would cause corrupt data to show up.
838+
//
839+
// This problem is fixed by unconditionally passing SIMD
840+
// arguments through memory between callers and callees
841+
// which should get them all to agree on ABI regardless of
842+
// target feature sets. Some more information about this
843+
// issue can be found in #44367.
844+
//
845+
// Note that the intrinsic ABI is exempt here as those are not
846+
// real functions anyway, and the backend expects very specific types.
847+
if abi != ExternAbi::RustIntrinsic
848+
&& spec.simd_types_indirect
849+
&& !can_pass_simd_directly(arg)
850+
{
851+
arg.make_indirect();
852+
}
853+
}
854+
855+
_ => {}
826856
}
827857
}
828858
}

compiler/rustc_target/src/callconv/x86.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use rustc_abi::{
44
};
55

66
use crate::callconv::{ArgAttribute, FnAbi, PassMode};
7-
use crate::spec::HasTargetSpec;
7+
use crate::spec::{HasTargetSpec, RustcAbi};
88

99
#[derive(PartialEq)]
1010
pub(crate) enum Flavor {
@@ -236,8 +236,16 @@ where
236236
_ => false, // anyway not passed via registers on x86
237237
};
238238
if has_float {
239-
if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
240-
// Same size or smaller than pointer, return in a register.
239+
if cx.target_spec().rustc_abi == Some(RustcAbi::X86Sse2)
240+
&& fn_abi.ret.layout.backend_repr.is_scalar()
241+
&& fn_abi.ret.layout.size.bits() <= 128
242+
{
243+
// This is a single scalar that fits into an SSE register, and the target uses the
244+
// SSE ABI. We prefer this over integer registers as float scalars need to be in SSE
245+
// registers for float operations, so that's the best place to pass them around.
246+
fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size });
247+
} else if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
248+
// Same size or smaller than pointer, return in an integer register.
241249
fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size });
242250
} else {
243251
// Larger than a pointer, return indirectly.

compiler/rustc_target/src/spec/base/apple/mod.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ use std::borrow::Cow;
22
use std::env;
33

44
use crate::spec::{
5-
Cc, DebuginfoKind, FloatAbi, FramePointer, LinkerFlavor, Lld, SplitDebuginfo, StackProbeType,
6-
StaticCow, TargetOptions, cvs,
5+
Cc, DebuginfoKind, FloatAbi, FramePointer, LinkerFlavor, Lld, RustcAbi, SplitDebuginfo,
6+
StackProbeType, StaticCow, TargetOptions, cvs,
77
};
88

99
#[cfg(test)]
@@ -103,7 +103,7 @@ pub(crate) fn base(
103103
arch: Arch,
104104
abi: TargetAbi,
105105
) -> (TargetOptions, StaticCow<str>, StaticCow<str>) {
106-
let opts = TargetOptions {
106+
let mut opts = TargetOptions {
107107
abi: abi.target_abi().into(),
108108
llvm_floatabi: Some(FloatAbi::Hard),
109109
os: os.into(),
@@ -154,6 +154,10 @@ pub(crate) fn base(
154154

155155
..Default::default()
156156
};
157+
if matches!(arch, Arch::I386 | Arch::I686) {
158+
// All Apple x86-32 targets have SSE2.
159+
opts.rustc_abi = Some(RustcAbi::X86Sse2);
160+
}
157161
(opts, unversioned_llvm_target(os, arch, abi), arch.target_arch())
158162
}
159163

compiler/rustc_target/src/spec/mod.rs

+9
Original file line numberDiff line numberDiff line change
@@ -1109,6 +1109,8 @@ impl ToJson for FloatAbi {
11091109
/// The Rustc-specific variant of the ABI used for this target.
11101110
#[derive(Clone, Copy, PartialEq, Hash, Debug)]
11111111
pub enum RustcAbi {
1112+
/// On x86-32 only: make use of SSE and SSE2 for ABI purposes.
1113+
X86Sse2,
11121114
/// On x86-32/64 only: do not use any FPU or SIMD registers for the ABI.
11131115
X86Softfloat,
11141116
}
@@ -1118,6 +1120,7 @@ impl FromStr for RustcAbi {
11181120

11191121
fn from_str(s: &str) -> Result<RustcAbi, ()> {
11201122
Ok(match s {
1123+
"x86-sse2" => RustcAbi::X86Sse2,
11211124
"x86-softfloat" => RustcAbi::X86Softfloat,
11221125
_ => return Err(()),
11231126
})
@@ -1127,6 +1130,7 @@ impl FromStr for RustcAbi {
11271130
impl ToJson for RustcAbi {
11281131
fn to_json(&self) -> Json {
11291132
match *self {
1133+
RustcAbi::X86Sse2 => "x86-sse2",
11301134
RustcAbi::X86Softfloat => "x86-softfloat",
11311135
}
11321136
.to_json()
@@ -3264,6 +3268,11 @@ impl Target {
32643268
// Check consistency of Rust ABI declaration.
32653269
if let Some(rust_abi) = self.rustc_abi {
32663270
match rust_abi {
3271+
RustcAbi::X86Sse2 => check_matches!(
3272+
&*self.arch,
3273+
"x86",
3274+
"`x86-sse2` ABI is only valid for x86-32 targets"
3275+
),
32673276
RustcAbi::X86Softfloat => check_matches!(
32683277
&*self.arch,
32693278
"x86" | "x86_64",

compiler/rustc_target/src/spec/targets/i586_pc_nto_qnx700.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::spec::base::nto_qnx;
2-
use crate::spec::{StackProbeType, Target, TargetOptions, base};
2+
use crate::spec::{RustcAbi, StackProbeType, Target, TargetOptions, base};
33

44
pub(crate) fn target() -> Target {
55
let mut meta = nto_qnx::meta();
@@ -14,6 +14,7 @@ pub(crate) fn target() -> Target {
1414
.into(),
1515
arch: "x86".into(),
1616
options: TargetOptions {
17+
rustc_abi: Some(RustcAbi::X86Sse2),
1718
cpu: "pentium4".into(),
1819
max_atomic_width: Some(64),
1920
pre_link_args: nto_qnx::pre_link_args(

compiler/rustc_target/src/spec/targets/i586_unknown_linux_gnu.rs

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::spec::Target;
22

33
pub(crate) fn target() -> Target {
44
let mut base = super::i686_unknown_linux_gnu::target();
5+
base.rustc_abi = None; // overwrite the SSE2 ABI set by the base target
56
base.cpu = "pentium".into();
67
base.llvm_target = "i586-unknown-linux-gnu".into();
78
base

compiler/rustc_target/src/spec/targets/i586_unknown_linux_musl.rs

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::spec::Target;
22

33
pub(crate) fn target() -> Target {
44
let mut base = super::i686_unknown_linux_musl::target();
5+
base.rustc_abi = None; // overwrite the SSE2 ABI set by the base target
56
base.cpu = "pentium".into();
67
base.llvm_target = "i586-unknown-linux-musl".into();
78
// FIXME(compiler-team#422): musl targets should be dynamically linked by default.

compiler/rustc_target/src/spec/targets/i686_linux_android.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::spec::{SanitizerSet, StackProbeType, Target, TargetOptions, base};
1+
use crate::spec::{RustcAbi, SanitizerSet, StackProbeType, Target, TargetOptions, base};
22

33
// See https://developer.android.com/ndk/guides/abis.html#x86
44
// for target ABI requirements.
@@ -8,6 +8,7 @@ pub(crate) fn target() -> Target {
88

99
base.max_atomic_width = Some(64);
1010

11+
base.rustc_abi = Some(RustcAbi::X86Sse2);
1112
// https://developer.android.com/ndk/guides/abis.html#x86
1213
base.cpu = "pentium4".into();
1314
base.features = "+mmx,+sse,+sse2,+sse3,+ssse3".into();

compiler/rustc_target/src/spec/targets/i686_pc_windows_gnu.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
use crate::spec::{Cc, FramePointer, LinkerFlavor, Lld, Target, base};
1+
use crate::spec::{Cc, FramePointer, LinkerFlavor, Lld, RustcAbi, Target, base};
22

33
pub(crate) fn target() -> Target {
44
let mut base = base::windows_gnu::opts();
5+
base.rustc_abi = Some(RustcAbi::X86Sse2);
56
base.cpu = "pentium4".into();
67
base.max_atomic_width = Some(64);
78
base.frame_pointer = FramePointer::Always; // Required for backtraces

compiler/rustc_target/src/spec/targets/i686_pc_windows_gnullvm.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
use crate::spec::{Cc, FramePointer, LinkerFlavor, Lld, Target, base};
1+
use crate::spec::{Cc, FramePointer, LinkerFlavor, Lld, RustcAbi, Target, base};
22

33
pub(crate) fn target() -> Target {
44
let mut base = base::windows_gnullvm::opts();
5+
base.rustc_abi = Some(RustcAbi::X86Sse2);
56
base.cpu = "pentium4".into();
67
base.max_atomic_width = Some(64);
78
base.frame_pointer = FramePointer::Always; // Required for backtraces

compiler/rustc_target/src/spec/targets/i686_pc_windows_msvc.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
use crate::spec::{LinkerFlavor, Lld, SanitizerSet, Target, base};
1+
use crate::spec::{LinkerFlavor, Lld, RustcAbi, SanitizerSet, Target, base};
22

33
pub(crate) fn target() -> Target {
44
let mut base = base::windows_msvc::opts();
5+
base.rustc_abi = Some(RustcAbi::X86Sse2);
56
base.cpu = "pentium4".into();
67
base.max_atomic_width = Some(64);
78
base.supported_sanitizers = SanitizerSet::ADDRESS;

compiler/rustc_target/src/spec/targets/i686_unknown_freebsd.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
use crate::spec::{Cc, LinkerFlavor, Lld, StackProbeType, Target, base};
1+
use crate::spec::{Cc, LinkerFlavor, Lld, RustcAbi, StackProbeType, Target, base};
22

33
pub(crate) fn target() -> Target {
44
let mut base = base::freebsd::opts();
5+
base.rustc_abi = Some(RustcAbi::X86Sse2);
56
base.cpu = "pentium4".into();
67
base.max_atomic_width = Some(64);
78
base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32", "-Wl,-znotext"]);

compiler/rustc_target/src/spec/targets/i686_unknown_haiku.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
use crate::spec::{Cc, LinkerFlavor, Lld, StackProbeType, Target, base};
1+
use crate::spec::{Cc, LinkerFlavor, Lld, RustcAbi, StackProbeType, Target, base};
22

33
pub(crate) fn target() -> Target {
44
let mut base = base::haiku::opts();
5+
base.rustc_abi = Some(RustcAbi::X86Sse2);
56
base.cpu = "pentium4".into();
67
base.max_atomic_width = Some(64);
78
base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32"]);

0 commit comments

Comments
 (0)