Skip to content

Commit f01e760

Browse files
authored
[AArch64][SVE] Improve fixed-length addressing modes. (llvm#129732)
When compiling VLS SVE, the compiler often replaces VL-based offsets with immediate-based ones. This leads to a mismatch in the allowed addressing modes due to SVE loads/stores generally expecting immediate offsets relative to VL. For example, given: ```c svfloat64_t foo(const double *x) { svbool_t pg = svptrue_b64(); return svld1_f64(pg, x+svcntd()); } ``` When compiled with `-msve-vector-bits=128`, we currently generate: ```gas foo: ptrue p0.d mov x8, #2 ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ret ``` Instead, we could be generating: ```gas foo: ldr z0, [x0, #1, mul vl] ret ``` Likewise for other types, stores, and other VLS lengths. This patch achieves the above by extending `SelectAddrModeIndexedSVE` to let constants through when `vscale` is known.
1 parent 0cceac6 commit f01e760

File tree

5 files changed

+434
-54
lines changed

5 files changed

+434
-54
lines changed

clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c

+3-6
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,9 @@
1313

1414
void func(int *restrict a, int *restrict b) {
1515
// CHECK-LABEL: func
16-
// CHECK256-COUNT-1: str
17-
// CHECK256-COUNT-7: st1w
18-
// CHECK512-COUNT-1: str
19-
// CHECK512-COUNT-3: st1w
20-
// CHECK1024-COUNT-1: str
21-
// CHECK1024-COUNT-1: st1w
16+
// CHECK256-COUNT-8: str
17+
// CHECK512-COUNT-4: str
18+
// CHECK1024-COUNT-2: str
2219
// CHECK2048-COUNT-1: st1w
2320
#pragma clang loop vectorize(enable)
2421
for (int i = 0; i < 64; ++i)

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -7380,12 +7380,23 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
73807380
return false;
73817381

73827382
SDValue VScale = N.getOperand(1);
7383-
if (VScale.getOpcode() != ISD::VSCALE)
7383+
int64_t MulImm = std::numeric_limits<int64_t>::max();
7384+
if (VScale.getOpcode() == ISD::VSCALE) {
7385+
MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7386+
} else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7387+
int64_t ByteOffset = C->getSExtValue();
7388+
const auto KnownVScale =
7389+
Subtarget->getSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
7390+
7391+
if (!KnownVScale || ByteOffset % KnownVScale != 0)
7392+
return false;
7393+
7394+
MulImm = ByteOffset / KnownVScale;
7395+
} else
73847396
return false;
73857397

73867398
TypeSize TS = MemVT.getSizeInBits();
73877399
int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7388-
int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
73897400

73907401
if ((MulImm % MemWidthBytes) != 0)
73917402
return false;

llvm/lib/Target/AArch64/AArch64Subtarget.h

+11-1
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
391391
void mirFileLoaded(MachineFunction &MF) const override;
392392

393393
// Return the known range for the bit length of SVE data registers. A value
394-
// of 0 means nothing is known about that particular limit beyong what's
394+
// of 0 means nothing is known about that particular limit beyond what's
395395
// implied by the architecture.
396396
unsigned getMaxSVEVectorSizeInBits() const {
397397
assert(isSVEorStreamingSVEAvailable() &&
@@ -405,6 +405,16 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
405405
return MinSVEVectorSizeInBits;
406406
}
407407

408+
// Return the known bit length of SVE data registers. A value of 0 means the
409+
// length is unkown beyond what's implied by the architecture.
410+
unsigned getSVEVectorSizeInBits() const {
411+
assert(isSVEorStreamingSVEAvailable() &&
412+
"Tried to get SVE vector length without SVE support!");
413+
if (MinSVEVectorSizeInBits == MaxSVEVectorSizeInBits)
414+
return MaxSVEVectorSizeInBits;
415+
return 0;
416+
}
417+
408418
bool useSVEForFixedLengthVectors() const {
409419
if (!isSVEorStreamingSVEAvailable())
410420
return false;

0 commit comments

Comments
 (0)