Skip to content

Commit 81b1065

Browse files
committed
[AArch64] Fix comparison peephole opt with non-0/1 immediate (PR51476)
This is a non-intrusive fix for https://bugs.llvm.org/show_bug.cgi?id=51476 intended for backport to the 13.x release branch. It expands on the current hack by distinguishing between CmpValue of 0, 1 and 2, where 0 and 1 have the obvious meaning and 2 means "anything else". The new optimization from D98564 should only be performed for CmpValue of 0 or 1. For main, I think we should switch the analyzeCompare() and optimizeCompare() APIs to use int64_t instead of int, which is in line with MachineOperand's notion of an immediate, and avoids this problem altogether. Differential Revision: https://reviews.llvm.org/D108076
1 parent 49de607 commit 81b1065

File tree

3 files changed

+93
-15
lines changed

3 files changed

+93
-15
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

+19-15
Original file line numberDiff line numberDiff line change
@@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
11201120
if (!MI.getOperand(1).isReg())
11211121
return false;
11221122

1123+
auto NormalizeCmpValue = [](int64_t Value) -> int {
1124+
// Comparison immediates may be 64-bit, but CmpValue is only an int.
1125+
// Normalize to 0/1/2 return value, where 2 indicates any value apart from
1126+
// 0 or 1.
1127+
// TODO: Switch CmpValue to int64_t in the API to avoid this.
1128+
if (Value == 0 || Value == 1)
1129+
return Value;
1130+
return 2;
1131+
};
1132+
11231133
switch (MI.getOpcode()) {
11241134
default:
11251135
break;
@@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
11551165
SrcReg = MI.getOperand(1).getReg();
11561166
SrcReg2 = 0;
11571167
CmpMask = ~0;
1158-
// FIXME: In order to convert CmpValue to 0 or 1
1159-
CmpValue = MI.getOperand(2).getImm() != 0;
1168+
CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm());
11601169
return true;
11611170
case AArch64::ANDSWri:
11621171
case AArch64::ANDSXri:
@@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
11651174
SrcReg = MI.getOperand(1).getReg();
11661175
SrcReg2 = 0;
11671176
CmpMask = ~0;
1168-
// FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1169-
// while the type of CmpValue is int. When converting uint64_t to int,
1170-
// the high 32 bits of uint64_t will be lost.
1171-
// In fact it causes a bug in spec2006-483.xalancbmk
1172-
// CmpValue is only used to compare with zero in OptimizeCompareInstr
1173-
CmpValue = AArch64_AM::decodeLogicalImmediate(
1177+
CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate(
11741178
MI.getOperand(2).getImm(),
1175-
MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1179+
MI.getOpcode() == AArch64::ANDSWri ? 32 : 64));
11761180
return true;
11771181
}
11781182

@@ -1462,20 +1466,20 @@ bool AArch64InstrInfo::optimizeCompareInstr(
14621466
if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
14631467
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
14641468

1465-
// Continue only if we have a "ri" where immediate is zero.
1466-
// FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1467-
// function.
1468-
assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1469+
// Warning: CmpValue == 2 indicates *any* value apart from 0 or 1.
1470+
assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) &&
1471+
"CmpValue must be 0, 1, or 2!");
14691472
if (SrcReg2 != 0)
14701473
return false;
14711474

14721475
// CmpInstr is a Compare instruction if destination register is not used.
14731476
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
14741477
return false;
14751478

1476-
if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1479+
if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
14771480
return true;
1478-
return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1481+
return (CmpValue == 0 || CmpValue == 1) &&
1482+
removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
14791483
}
14801484

14811485
/// Get opcode of S version of Instr.

llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir

+39
Original file line numberDiff line numberDiff line change
@@ -307,3 +307,42 @@ body: |
307307
RET_ReallyLR
308308
309309
...
310+
---
311+
name: subswr_wrong_cmp_value
312+
tracksRegLiveness: true
313+
body: |
314+
; CHECK-LABEL: name: subswr_wrong_cmp_value
315+
; CHECK: bb.0:
316+
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
317+
; CHECK: liveins: $x1
318+
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
319+
; CHECK: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
320+
; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr killed [[DEF]], [[COPY]], implicit-def $nzcv
321+
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
322+
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[CSINCWr]], 3, 0, implicit-def $nzcv
323+
; CHECK: Bcc 1, %bb.2, implicit $nzcv
324+
; CHECK: B %bb.1
325+
; CHECK: bb.1:
326+
; CHECK: successors: %bb.2(0x80000000)
327+
; CHECK: B %bb.2
328+
; CHECK: bb.2:
329+
; CHECK: RET_ReallyLR
330+
bb.0:
331+
liveins: $x1
332+
successors: %bb.1(0x40000000), %bb.2(0x40000000)
333+
%1:gpr64common = COPY $x1
334+
%2:gpr64 = IMPLICIT_DEF
335+
%3:gpr64 = SUBSXrr killed %2:gpr64, %1:gpr64common, implicit-def $nzcv
336+
%4:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
337+
%5:gpr32 = SUBSWri killed %4:gpr32common, 3, 0, implicit-def $nzcv
338+
Bcc 1, %bb.2, implicit $nzcv
339+
B %bb.1
340+
341+
bb.1:
342+
successors: %bb.2(0x80000000)
343+
B %bb.2
344+
345+
bb.2:
346+
RET_ReallyLR
347+
348+
...

llvm/test/CodeGen/AArch64/pr51476.ll

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test(i8 %arg) nounwind {
5+
; CHECK-LABEL: test:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
8+
; CHECK-NEXT: and w8, w0, #0xff
9+
; CHECK-NEXT: cmp w8, #1
10+
; CHECK-NEXT: cset w0, ne
11+
; CHECK-NEXT: cmp w0, #3
12+
; CHECK-NEXT: strb w0, [sp, #12]
13+
; CHECK-NEXT: b.eq .LBB0_2
14+
; CHECK-NEXT: // %bb.1: // %do_call
15+
; CHECK-NEXT: bl unknown
16+
; CHECK-NEXT: .LBB0_2: // %common.ret
17+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
18+
; CHECK-NEXT: ret
19+
%tmp = alloca i8
20+
%cmp1 = icmp ne i8 %arg, 1
21+
%zext = zext i1 %cmp1 to i8
22+
store i8 %zext, i8* %tmp
23+
%zext2 = load i8, i8* %tmp
24+
%cmp2 = icmp eq i8 %zext2, 3
25+
br i1 %cmp2, label %exit, label %do_call
26+
27+
do_call:
28+
call void @unknown(i8 %zext2)
29+
ret void
30+
31+
exit:
32+
ret void
33+
}
34+
35+
declare void @unknown(i8)

0 commit comments

Comments
 (0)