-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang] Add sincos builtin using llvm.sincos
intrinsic
#114086
Conversation
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-backend-x86 Author: Benjamin Maxwell (MacDue) ChangesThis registers Full diff: https://github.com/llvm/llvm-project/pull/114086.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 9bd67e0cefebc3..27eadf80d623e6 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -3562,6 +3562,19 @@ def Frexp : FPMathTemplate, LibBuiltin<"math.h"> {
let AddBuiltinPrefixedAlias = 1;
}
+def Sincos : FPMathTemplate, GNULibBuiltin<"math.h"> {
+ let Spellings = ["sincos"];
+ let Attributes = [NoThrow];
+ let Prototype = "void(T, T*, T*)";
+ let AddBuiltinPrefixedAlias = 1;
+}
+
+def SincosF16F128 : F16F128MathTemplate, Builtin {
+ let Spellings = ["__builtin_sincos"];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow];
+ let Prototype = "void(T, T*, T*)";
+}
+
def Ldexp : FPMathTemplate, LibBuiltin<"math.h"> {
let Spellings = ["ldexp"];
let Attributes = [NoThrow, ConstIgnoringErrnoAndExceptions];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65d7f5c54a1913..5bb6851c3a2702 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -722,6 +722,36 @@ static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E,
return CGF.Builder.CreateExtractValue(Call, 0);
}
+static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E,
+ llvm::Intrinsic::ID IntrinsicID) {
+ llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *Dest0 = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Value *Dest1 = CGF.EmitScalarExpr(E->getArg(2));
+
+ llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Val->getType()});
+ llvm::Value *Call = CGF.Builder.CreateCall(F, Val);
+
+ llvm::Value *SinResult = CGF.Builder.CreateExtractValue(Call, 0);
+ llvm::Value *CosResult = CGF.Builder.CreateExtractValue(Call, 1);
+
+ QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
+ LValue SinLV = CGF.MakeNaturalAlignAddrLValue(Dest0, DestPtrType);
+ LValue CosLV = CGF.MakeNaturalAlignAddrLValue(Dest1, DestPtrType);
+
+ llvm::StoreInst *StoreSin = CGF.Builder.CreateStore(SinResult, SinLV.getAddress());
+ llvm::StoreInst *StoreCos = CGF.Builder.CreateStore(CosResult, CosLV.getAddress());
+
+ // Mark the two stores as non-aliasing with eachother. The order of stores
+ // emitted by this builtin is arbitrary, enforcing a particular order will
+ // prevent optimizations later on.
+ llvm::MDBuilder MDHelper(CGF.getLLVMContext());
+ MDNode* Domain = MDHelper.createAnonymousAliasScopeDomain();
+ MDNode* AliasScope = MDHelper.createAnonymousAliasScope(Domain);
+ MDNode* AliasScopeList = MDNode::get(Call->getContext(), AliasScope);
+ StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList);
+ StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
+}
+
/// EmitFAbs - Emit a call to @llvm.fabs().
static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
@@ -3094,6 +3124,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
*this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
+ case Builtin::BIsincos:
+ case Builtin::BIsincosf:
+ case Builtin::BIsincosl:
+ case Builtin::BI__builtin_sincos:
+ case Builtin::BI__builtin_sincosf:
+ case Builtin::BI__builtin_sincosl:
+ case Builtin::BI__builtin_sincosf128:
+ case Builtin::BI__builtin_sincosf16:
+ emitSincosBuiltin(*this, E, Intrinsic::sincos);
+ return RValue::get(nullptr);
+
case Builtin::BIsqrt:
case Builtin::BIsqrtf:
case Builtin::BIsqrtl:
diff --git a/clang/test/CodeGen/AArch64/sincos.c b/clang/test/CodeGen/AArch64/sincos.c
new file mode 100644
index 00000000000000..240d921b2b7034
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sincos.c
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple=aarch64-gnu-linux -emit-llvm %s -o - | FileCheck --check-prefix=NO-MATH-ERRNO %s
+// RUN: %clang_cc1 -triple=aarch64-gnu-linux -emit-llvm -fmath-errno %s -o - | FileCheck --check-prefix=MATH-ERRNO %s
+
+void sincos(double, double*, double*);
+void sincosf(float, float*, float*);
+
+// NO-MATH-ERRNO-LABEL: @foo
+// NO-MATH-ERRNO: [[SINCOS:%.*]] = call { double, double } @llvm.sincos.f64(double {{.*}})
+// NO-MATH-ERRNO-NEXT: [[SIN:%.*]] = extractvalue { double, double } [[SINCOS]], 0
+// NO-MATH-ERRNO-NEXT: [[COS:%.*]] = extractvalue { double, double } [[SINCOS]], 1
+// NO-MATH-ERRNO-NEXT: store double [[SIN]], ptr {{.*}}, align 8, !alias.scope [[SINCOS_ALIAS_SCOPE:![0-9]+]]
+// NO-MATH-ERRNO-NEXT: store double [[COS]], ptr {{.*}}, align 8, !noalias [[SINCOS_ALIAS_SCOPE]]
+//
+// MATH-ERRNO-LABEL: @foo
+// MATH-ERRNO: call void @sincos(
+//
+void foo(double x, double* dp0, double* dp1) {
+ sincos(x, dp0, dp1);
+}
+
+// NO-MATH-ERRNO-LABEL: @bar
+// NO-MATH-ERRNO: [[SINCOS:%.*]] = call { float, float } @llvm.sincos.f32(float {{.*}})
+// NO-MATH-ERRNO-NEXT: [[SIN:%.*]] = extractvalue { float, float } [[SINCOS]], 0
+// NO-MATH-ERRNO-NEXT: [[COS:%.*]] = extractvalue { float, float } [[SINCOS]], 1
+// NO-MATH-ERRNO-NEXT: store float [[SIN]], ptr {{.*}}, align 4, !alias.scope [[SINCOS_ALIAS_SCOPE:![0-9]+]]
+// NO-MATH-ERRNO-NEXT: store float [[COS]], ptr {{.*}}, align 4, !noalias [[SINCOS_ALIAS_SCOPE]]
+//
+// MATH-ERRNO-LABEL: @bar
+// MATH-ERRNO: call void @sincosf(
+//
+void bar(float x, float* fp0, float* fp1) {
+ sincosf(x, fp0, fp1);
+}
diff --git a/clang/test/CodeGen/X86/math-builtins.c b/clang/test/CodeGen/X86/math-builtins.c
index 48465df21cca19..20e86a50e2bf18 100644
--- a/clang/test/CodeGen/X86/math-builtins.c
+++ b/clang/test/CodeGen/X86/math-builtins.c
@@ -38,6 +38,31 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
// NO__ERRNO-NEXT: [[FREXP_F128_0:%.+]] = extractvalue { fp128, i32 } [[FREXP_F128]], 0
+// NO__ERRNO: [[SINCOS_F64:%.+]] = call { double, double } @llvm.sincos.f64(double %{{.+}})
+// NO__ERRNO-NEXT: [[SINCOS_F64_0:%.+]] = extractvalue { double, double } [[SINCOS_F64]], 0
+// NO__ERRNO-NEXT: [[SINCOS_F64_1:%.+]] = extractvalue { double, double } [[SINCOS_F64]], 1
+// NO__ERRNO-NEXT: store double [[SINCOS_F64_0]], ptr %{{.+}}, align 8
+// NO__ERRNO-NEXT: store double [[SINCOS_F64_1]], ptr %{{.+}}, align 8
+
+// NO__ERRNO: [[SINCOS_F32:%.+]] = call { float, float } @llvm.sincos.f32(float %{{.+}})
+// NO__ERRNO-NEXT: [[SINCOS_F32_0:%.+]] = extractvalue { float, float } [[SINCOS_F32]], 0
+// NO__ERRNO-NEXT: [[SINCOS_F32_1:%.+]] = extractvalue { float, float } [[SINCOS_F32]], 1
+// NO__ERRNO-NEXT: store float [[SINCOS_F32_0]], ptr %{{.+}}, align 4
+// NO__ERRNO-NEXT: store float [[SINCOS_F32_1]], ptr %{{.+}}, align 4
+
+// NO__ERRNO: [[SINCOS_F80:%.+]] = call { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %{{.+}})
+// NO__ERRNO-NEXT: [[SINCOS_F80_0:%.+]] = extractvalue { x86_fp80, x86_fp80 } [[SINCOS_F80]], 0
+// NO__ERRNO-NEXT: [[SINCOS_F80_1:%.+]] = extractvalue { x86_fp80, x86_fp80 } [[SINCOS_F80]], 1
+// NO__ERRNO-NEXT: store x86_fp80 [[SINCOS_F80_0]], ptr %{{.+}}, align 16
+// NO__ERRNO-NEXT: store x86_fp80 [[SINCOS_F80_1]], ptr %{{.+}}, align 16
+
+// NO__ERRNO: [[SINCOS_F128:%.+]] = call { fp128, fp128 } @llvm.sincos.f128(fp128 %{{.+}})
+// NO__ERRNO-NEXT: [[SINCOS_F128_0:%.+]] = extractvalue { fp128, fp128 } [[SINCOS_F128]], 0
+// NO__ERRNO-NEXT: [[SINCOS_F128_1:%.+]] = extractvalue { fp128, fp128 } [[SINCOS_F128]], 1
+// NO__ERRNO-NEXT: store fp128 [[SINCOS_F128_0]], ptr %{{.+}}, align 16
+// NO__ERRNO-NEXT: store fp128 [[SINCOS_F128_1]], ptr %{{.+}}, align 16
+
+
// HAS_ERRNO: declare double @fmod(double noundef, double noundef) [[NOT_READNONE:#[0-9]+]]
// HAS_ERRNO: declare float @fmodf(float noundef, float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @fmodl(x86_fp80 noundef, x86_fp80 noundef) [[NOT_READNONE]]
@@ -665,6 +690,16 @@ __builtin_sinh(f); __builtin_sinhf(f); __builtin_sinhl(f); __builtin_
// HAS_ERRNO: declare x86_fp80 @sinhl(x86_fp80 noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare fp128 @sinhf128(fp128 noundef) [[NOT_READNONE]]
+__builtin_sincos(f,d,d); __builtin_sincosf(f,fp,fp); __builtin_sincosl(f,l,l); __builtin_sincosf128(f,l,l);
+// NO__ERRNO: declare { double, double } @llvm.sincos.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare { float, float } @llvm.sincos.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare { fp128, fp128 } @llvm.sincos.f128(fp128) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare void @sincos(double noundef, ptr noundef, ptr noundef) [[NOT_READNONE]]
+// HAS_ERRNO: declare void @sincosf(float noundef, ptr noundef, ptr noundef) [[NOT_READNONE]]
+// HAS_ERRNO: declare void @sincosl(x86_fp80 noundef, ptr noundef, ptr noundef) [[NOT_READNONE]]
+// HAS_ERRNO: declare void @sincosf128(fp128 noundef, ptr noundef, ptr noundef) [[NOT_READNONE]]
+
__builtin_sqrt(f); __builtin_sqrtf(f); __builtin_sqrtl(f); __builtin_sqrtf128(f);
// NO__ERRNO: declare double @llvm.sqrt.f64(double) [[READNONE_INTRINSIC]]
diff --git a/clang/test/OpenMP/declare_simd_aarch64.c b/clang/test/OpenMP/declare_simd_aarch64.c
index 21c83c225963f9..e9538e7446eec9 100644
--- a/clang/test/OpenMP/declare_simd_aarch64.c
+++ b/clang/test/OpenMP/declare_simd_aarch64.c
@@ -1,8 +1,8 @@
// REQUIRES: aarch64-registered-target
// -fopemp and -fopenmp-simd behavior are expected to be the same.
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp-simd -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fmath-errno -fopenmp -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fmath-errno -fopenmp-simd -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64
#pragma omp declare simd
#pragma omp declare simd simdlen(2)
|
@llvm/pr-subscribers-clang Author: Benjamin Maxwell (MacDue) ChangesThis registers Full diff: https://github.com/llvm/llvm-project/pull/114086.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 9bd67e0cefebc3..27eadf80d623e6 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -3562,6 +3562,19 @@ def Frexp : FPMathTemplate, LibBuiltin<"math.h"> {
let AddBuiltinPrefixedAlias = 1;
}
+def Sincos : FPMathTemplate, GNULibBuiltin<"math.h"> {
+ let Spellings = ["sincos"];
+ let Attributes = [NoThrow];
+ let Prototype = "void(T, T*, T*)";
+ let AddBuiltinPrefixedAlias = 1;
+}
+
+def SincosF16F128 : F16F128MathTemplate, Builtin {
+ let Spellings = ["__builtin_sincos"];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow];
+ let Prototype = "void(T, T*, T*)";
+}
+
def Ldexp : FPMathTemplate, LibBuiltin<"math.h"> {
let Spellings = ["ldexp"];
let Attributes = [NoThrow, ConstIgnoringErrnoAndExceptions];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65d7f5c54a1913..5bb6851c3a2702 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -722,6 +722,36 @@ static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E,
return CGF.Builder.CreateExtractValue(Call, 0);
}
+static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E,
+ llvm::Intrinsic::ID IntrinsicID) {
+ llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *Dest0 = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Value *Dest1 = CGF.EmitScalarExpr(E->getArg(2));
+
+ llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Val->getType()});
+ llvm::Value *Call = CGF.Builder.CreateCall(F, Val);
+
+ llvm::Value *SinResult = CGF.Builder.CreateExtractValue(Call, 0);
+ llvm::Value *CosResult = CGF.Builder.CreateExtractValue(Call, 1);
+
+ QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
+ LValue SinLV = CGF.MakeNaturalAlignAddrLValue(Dest0, DestPtrType);
+ LValue CosLV = CGF.MakeNaturalAlignAddrLValue(Dest1, DestPtrType);
+
+ llvm::StoreInst *StoreSin = CGF.Builder.CreateStore(SinResult, SinLV.getAddress());
+ llvm::StoreInst *StoreCos = CGF.Builder.CreateStore(CosResult, CosLV.getAddress());
+
+ // Mark the two stores as non-aliasing with eachother. The order of stores
+ // emitted by this builtin is arbitrary, enforcing a particular order will
+ // prevent optimizations later on.
+ llvm::MDBuilder MDHelper(CGF.getLLVMContext());
+ MDNode* Domain = MDHelper.createAnonymousAliasScopeDomain();
+ MDNode* AliasScope = MDHelper.createAnonymousAliasScope(Domain);
+ MDNode* AliasScopeList = MDNode::get(Call->getContext(), AliasScope);
+ StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList);
+ StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
+}
+
/// EmitFAbs - Emit a call to @llvm.fabs().
static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
@@ -3094,6 +3124,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
*this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
+ case Builtin::BIsincos:
+ case Builtin::BIsincosf:
+ case Builtin::BIsincosl:
+ case Builtin::BI__builtin_sincos:
+ case Builtin::BI__builtin_sincosf:
+ case Builtin::BI__builtin_sincosl:
+ case Builtin::BI__builtin_sincosf128:
+ case Builtin::BI__builtin_sincosf16:
+ emitSincosBuiltin(*this, E, Intrinsic::sincos);
+ return RValue::get(nullptr);
+
case Builtin::BIsqrt:
case Builtin::BIsqrtf:
case Builtin::BIsqrtl:
diff --git a/clang/test/CodeGen/AArch64/sincos.c b/clang/test/CodeGen/AArch64/sincos.c
new file mode 100644
index 00000000000000..240d921b2b7034
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sincos.c
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple=aarch64-gnu-linux -emit-llvm %s -o - | FileCheck --check-prefix=NO-MATH-ERRNO %s
+// RUN: %clang_cc1 -triple=aarch64-gnu-linux -emit-llvm -fmath-errno %s -o - | FileCheck --check-prefix=MATH-ERRNO %s
+
+void sincos(double, double*, double*);
+void sincosf(float, float*, float*);
+
+// NO-MATH-ERRNO-LABEL: @foo
+// NO-MATH-ERRNO: [[SINCOS:%.*]] = call { double, double } @llvm.sincos.f64(double {{.*}})
+// NO-MATH-ERRNO-NEXT: [[SIN:%.*]] = extractvalue { double, double } [[SINCOS]], 0
+// NO-MATH-ERRNO-NEXT: [[COS:%.*]] = extractvalue { double, double } [[SINCOS]], 1
+// NO-MATH-ERRNO-NEXT: store double [[SIN]], ptr {{.*}}, align 8, !alias.scope [[SINCOS_ALIAS_SCOPE:![0-9]+]]
+// NO-MATH-ERRNO-NEXT: store double [[COS]], ptr {{.*}}, align 8, !noalias [[SINCOS_ALIAS_SCOPE]]
+//
+// MATH-ERRNO-LABEL: @foo
+// MATH-ERRNO: call void @sincos(
+//
+void foo(double x, double* dp0, double* dp1) {
+ sincos(x, dp0, dp1);
+}
+
+// NO-MATH-ERRNO-LABEL: @bar
+// NO-MATH-ERRNO: [[SINCOS:%.*]] = call { float, float } @llvm.sincos.f32(float {{.*}})
+// NO-MATH-ERRNO-NEXT: [[SIN:%.*]] = extractvalue { float, float } [[SINCOS]], 0
+// NO-MATH-ERRNO-NEXT: [[COS:%.*]] = extractvalue { float, float } [[SINCOS]], 1
+// NO-MATH-ERRNO-NEXT: store float [[SIN]], ptr {{.*}}, align 4, !alias.scope [[SINCOS_ALIAS_SCOPE:![0-9]+]]
+// NO-MATH-ERRNO-NEXT: store float [[COS]], ptr {{.*}}, align 4, !noalias [[SINCOS_ALIAS_SCOPE]]
+//
+// MATH-ERRNO-LABEL: @bar
+// MATH-ERRNO: call void @sincosf(
+//
+void bar(float x, float* fp0, float* fp1) {
+ sincosf(x, fp0, fp1);
+}
diff --git a/clang/test/CodeGen/X86/math-builtins.c b/clang/test/CodeGen/X86/math-builtins.c
index 48465df21cca19..20e86a50e2bf18 100644
--- a/clang/test/CodeGen/X86/math-builtins.c
+++ b/clang/test/CodeGen/X86/math-builtins.c
@@ -38,6 +38,31 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
// NO__ERRNO-NEXT: [[FREXP_F128_0:%.+]] = extractvalue { fp128, i32 } [[FREXP_F128]], 0
+// NO__ERRNO: [[SINCOS_F64:%.+]] = call { double, double } @llvm.sincos.f64(double %{{.+}})
+// NO__ERRNO-NEXT: [[SINCOS_F64_0:%.+]] = extractvalue { double, double } [[SINCOS_F64]], 0
+// NO__ERRNO-NEXT: [[SINCOS_F64_1:%.+]] = extractvalue { double, double } [[SINCOS_F64]], 1
+// NO__ERRNO-NEXT: store double [[SINCOS_F64_0]], ptr %{{.+}}, align 8
+// NO__ERRNO-NEXT: store double [[SINCOS_F64_1]], ptr %{{.+}}, align 8
+
+// NO__ERRNO: [[SINCOS_F32:%.+]] = call { float, float } @llvm.sincos.f32(float %{{.+}})
+// NO__ERRNO-NEXT: [[SINCOS_F32_0:%.+]] = extractvalue { float, float } [[SINCOS_F32]], 0
+// NO__ERRNO-NEXT: [[SINCOS_F32_1:%.+]] = extractvalue { float, float } [[SINCOS_F32]], 1
+// NO__ERRNO-NEXT: store float [[SINCOS_F32_0]], ptr %{{.+}}, align 4
+// NO__ERRNO-NEXT: store float [[SINCOS_F32_1]], ptr %{{.+}}, align 4
+
+// NO__ERRNO: [[SINCOS_F80:%.+]] = call { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %{{.+}})
+// NO__ERRNO-NEXT: [[SINCOS_F80_0:%.+]] = extractvalue { x86_fp80, x86_fp80 } [[SINCOS_F80]], 0
+// NO__ERRNO-NEXT: [[SINCOS_F80_1:%.+]] = extractvalue { x86_fp80, x86_fp80 } [[SINCOS_F80]], 1
+// NO__ERRNO-NEXT: store x86_fp80 [[SINCOS_F80_0]], ptr %{{.+}}, align 16
+// NO__ERRNO-NEXT: store x86_fp80 [[SINCOS_F80_1]], ptr %{{.+}}, align 16
+
+// NO__ERRNO: [[SINCOS_F128:%.+]] = call { fp128, fp128 } @llvm.sincos.f128(fp128 %{{.+}})
+// NO__ERRNO-NEXT: [[SINCOS_F128_0:%.+]] = extractvalue { fp128, fp128 } [[SINCOS_F128]], 0
+// NO__ERRNO-NEXT: [[SINCOS_F128_1:%.+]] = extractvalue { fp128, fp128 } [[SINCOS_F128]], 1
+// NO__ERRNO-NEXT: store fp128 [[SINCOS_F128_0]], ptr %{{.+}}, align 16
+// NO__ERRNO-NEXT: store fp128 [[SINCOS_F128_1]], ptr %{{.+}}, align 16
+
+
// HAS_ERRNO: declare double @fmod(double noundef, double noundef) [[NOT_READNONE:#[0-9]+]]
// HAS_ERRNO: declare float @fmodf(float noundef, float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @fmodl(x86_fp80 noundef, x86_fp80 noundef) [[NOT_READNONE]]
@@ -665,6 +690,16 @@ __builtin_sinh(f); __builtin_sinhf(f); __builtin_sinhl(f); __builtin_
// HAS_ERRNO: declare x86_fp80 @sinhl(x86_fp80 noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare fp128 @sinhf128(fp128 noundef) [[NOT_READNONE]]
+__builtin_sincos(f,d,d); __builtin_sincosf(f,fp,fp); __builtin_sincosl(f,l,l); __builtin_sincosf128(f,l,l);
+// NO__ERRNO: declare { double, double } @llvm.sincos.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare { float, float } @llvm.sincos.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare { fp128, fp128 } @llvm.sincos.f128(fp128) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare void @sincos(double noundef, ptr noundef, ptr noundef) [[NOT_READNONE]]
+// HAS_ERRNO: declare void @sincosf(float noundef, ptr noundef, ptr noundef) [[NOT_READNONE]]
+// HAS_ERRNO: declare void @sincosl(x86_fp80 noundef, ptr noundef, ptr noundef) [[NOT_READNONE]]
+// HAS_ERRNO: declare void @sincosf128(fp128 noundef, ptr noundef, ptr noundef) [[NOT_READNONE]]
+
__builtin_sqrt(f); __builtin_sqrtf(f); __builtin_sqrtl(f); __builtin_sqrtf128(f);
// NO__ERRNO: declare double @llvm.sqrt.f64(double) [[READNONE_INTRINSIC]]
diff --git a/clang/test/OpenMP/declare_simd_aarch64.c b/clang/test/OpenMP/declare_simd_aarch64.c
index 21c83c225963f9..e9538e7446eec9 100644
--- a/clang/test/OpenMP/declare_simd_aarch64.c
+++ b/clang/test/OpenMP/declare_simd_aarch64.c
@@ -1,8 +1,8 @@
// REQUIRES: aarch64-registered-target
// -fopemp and -fopenmp-simd behavior are expected to be the same.
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp-simd -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fmath-errno -fopenmp -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fmath-errno -fopenmp-simd -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64
#pragma omp declare simd
#pragma omp declare simd simdlen(2)
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
// Mark the two stores as non-aliasing with eachother. The order of stores | ||
// emitted by this builtin is arbitrary, enforcing a particular order will | ||
// prevent optimizations later on. | ||
llvm::MDBuilder MDHelper(CGF.getLLVMContext()); | ||
MDNode* Domain = MDHelper.createAnonymousAliasScopeDomain(); | ||
MDNode* AliasScope = MDHelper.createAnonymousAliasScope(Domain); | ||
MDNode* AliasScopeList = MDNode::get(Call->getContext(), AliasScope); | ||
StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList); | ||
StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do people think of this? Looking at various documentation for sincos[f]
I've never seen the store order (sin, cos) or (cos, sin) specified, and I imagine they're assumed not to alias anyway making it a moot point.
The issue is without adding this noalias
metadata the order of stores emitted here becomes significant, and later they're chained together in SDAG. This means an unnecessary stack slot may be created to ensure stores follow the arbitrary order from this built-in lowering.
For example, for the store order (sin, cos) you get:
mov x19, x1
add x1, sp, #12
bl sincosf
ldr s0, [sp, #12]
str s0, [x19]
and for (cos, sin):
mov x19, x0
add x0, sp, #12
bl sincosf
ldr s0, [sp, #12]
str s0, [x19]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would not be useful to use this with aliasing arguments. I think it's fine to assume they can be written in any order
eae78e2
to
7fd19ee
Compare
CC @rohitaggarwal007 who added sincos vectorisation for amdlibm recently - hopefully we can get ensure amdlibm uses the new builtin + intrinsic safely |
I have another patch (#114039) that allows lowering the |
2ef70d3
to
3ab305e
Compare
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
if (!getTarget().getTriple().isAArch64() || | ||
CGM.getCodeGenOpts().OptimizationLevel == 0) | ||
break; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (!getTarget().getTriple().isAArch64() || | |
CGM.getCodeGenOpts().OptimizationLevel == 0) | |
break; |
No, unconditional codegen
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't control that most targets (outside of AArch64 and a few others) don't enable AA at codegen, which means the current lowering for llvm.sincos
will generate worse code (an additional stack slot + load & store), that would not be present if using the direct call. It's annoying, but right now I don't have a fix for other targets, and I don't want to worsen codegen for them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't matter, just emit the intrinsic. Also we should just universally turn on AA
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, I've removed this. Universally using AA would be nice, but I don't know if/when that would happen.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That control has been sitting there for years. This is the problem, people introduce stuff like this and are afraid of touching anything other than what they care about and it gets stuck forever
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, I'd be happier landing this if I could get word from someone working on one of the larger targets that disable this to why combiner-global-alias-analysis
is turned off (and how hard it would be to flip the switch). x86_64
was where I noticed the issue, so perhaps @RKSimon?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Really, I only need the noalias
metadata to work, but that still requires AA to be enabled in the DAG)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given the lack of reply, I'm going to limit this initial patch to only lower to llvm.sincos.*
for the __builtin
variants (similar to frexp
). I'll consider broadening this in later patches.
9aa49b8
to
6614b4f
Compare
This registers `sincos[f|l]` as a clang builtin and updates GCBuiltin to emit the `llvm.sincos.*` intrinsic when `-fno-math-errno` is set. Note: `llvm.sincos.*` is only emitted by `__builtin_sincos[f|l]` functions in this initial patch.
…o-math-errno is set (#121763) This will allow vectorizing these calls (after a few more patches). This should not change the codegen for targets that enable the use of AA during the codegen (in `TargetSubtargetInfo::useAA()`). This includes targets such as AArch64. This notably does not include x86 but can be worked around by passing `-mllvm -combiner-global-alias-analysis=true` to clang. Follow up to #114086.
This registers
sincos[f|l]
as a clang builtin and updates GCBuiltin to emit thellvm.sincos.*
intrinsic when-fno-math-errno
is set. Note:llvm.sincos.*
is only emitted by__builtin_sincos[f|l]
functions in this initial patch.