Skip to content

Commit 7fad04e

Browse files
authored
[LSR] Fix matching vscale immediates (llvm#100080)
Somewhat confusingly a `SCEVMulExpr` is a `SCEVNAryExpr`, so can have > 2 operands. Previously, the vscale immediate matching did not check the number of operands of the `SCEVMulExpr`, so would ignore any operands after the first two. This led to incorrect codegen (and results) for ArmSME in IREE (https://github.com/iree-org/iree), which sometimes addresses things that are a `vscale * vscale` multiple away. The test added with this change shows an example reduced from IREE. The second write should be offset from the first `16 * vscale * vscale` (* 4 bytes), however, previously LSR dropped the second vscale and instead offset the write by `#4, mul vl`, which is an offset of `16 * vscale` (* 4 bytes).
1 parent 6a1b119 commit 7fad04e

File tree

2 files changed

+16
-10
lines changed

2 files changed

+16
-10
lines changed

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -946,13 +946,15 @@ static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
946946
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
947947
SCEV::FlagAnyWrap);
948948
return Result;
949-
} else if (EnableVScaleImmediates)
950-
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
949+
} else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
950+
if (EnableVScaleImmediates && M->getNumOperands() == 2) {
951951
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
952952
if (isa<SCEVVScale>(M->getOperand(1))) {
953953
S = SE.getConstant(M->getType(), 0);
954954
return Immediate::getScalable(C->getValue()->getSExtValue());
955955
}
956+
}
957+
}
956958
return Immediate::getZero();
957959
}
958960

llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll

+12-8
Original file line numberDiff line numberDiff line change
@@ -384,27 +384,31 @@ for.exit:
384384
ret void
385385
}
386386

387-
;; This test demonstrates an incorrect MUL VL address calculation. Here there
388-
;; are two writes that should be `16 * vscale * vscale` apart, however,
389-
;; loop-strength-reduce has ignored the second `vscale` and offset the second
390-
;; write by `#4, mul vl` which is an offset of `16 * vscale` dropping a vscale.
387+
;; Here are two writes that should be `16 * vscale * vscale` apart, so MUL VL
388+
;; addressing cannot be used to offset the second write, as for example,
389+
;; `#4, mul vl` would only be an offset of `16 * vscale` (dropping a vscale).
391390
define void @vscale_squared_offset(ptr %alloc) #0 {
392391
; COMMON-LABEL: vscale_squared_offset:
393392
; COMMON: // %bb.0: // %entry
393+
; COMMON-NEXT: rdvl x9, #1
394394
; COMMON-NEXT: fmov z0.s, #4.00000000
395395
; COMMON-NEXT: mov x8, xzr
396-
; COMMON-NEXT: cntw x9
396+
; COMMON-NEXT: lsr x9, x9, #4
397397
; COMMON-NEXT: fmov z1.s, #8.00000000
398+
; COMMON-NEXT: cntw x10
398399
; COMMON-NEXT: ptrue p0.s, vl1
399-
; COMMON-NEXT: cmp x8, x9
400+
; COMMON-NEXT: umull x9, w9, w9
401+
; COMMON-NEXT: lsl x9, x9, #6
402+
; COMMON-NEXT: cmp x8, x10
400403
; COMMON-NEXT: b.ge .LBB6_2
401404
; COMMON-NEXT: .LBB6_1: // %for.body
402405
; COMMON-NEXT: // =>This Inner Loop Header: Depth=1
406+
; COMMON-NEXT: add x11, x0, x9
403407
; COMMON-NEXT: st1w { z0.s }, p0, [x0]
404408
; COMMON-NEXT: add x8, x8, #1
405-
; COMMON-NEXT: st1w { z1.s }, p0, [x0, #4, mul vl]
409+
; COMMON-NEXT: st1w { z1.s }, p0, [x11]
406410
; COMMON-NEXT: addvl x0, x0, #1
407-
; COMMON-NEXT: cmp x8, x9
411+
; COMMON-NEXT: cmp x8, x10
408412
; COMMON-NEXT: b.lt .LBB6_1
409413
; COMMON-NEXT: .LBB6_2: // %for.exit
410414
; COMMON-NEXT: ret

0 commit comments

Comments
 (0)