[AArch64] Fix comparison peephole opt with non-0/1 immediate (PR51476)

nikic · nikic · commit 81b106584f2b · 2021-08-15T12:35:52.000+02:00
This is a non-intrusive fix for https://bugs.llvm.org/show_bug.cgi?id=51476 intended for backport to the 13.x release branch. It expands on the current hack by distinguishing between CmpValue of 0, 1 and 2, where 0 and 1 have the obvious meaning and 2 means "anything else". The new optimization from D98564 should only be performed for CmpValue of 0 or 1. For main, I think we should switch the analyzeCompare() and optimizeCompare() APIs to use int64_t instead of int, which is in line with MachineOperand's notion of an immediate, and avoids this problem altogether. Differential Revision: https://reviews.llvm.org/D108076
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
   if (!MI.getOperand(1).isReg())
     return false;
 
+  auto NormalizeCmpValue = [](int64_t Value) -> int {
+    // Comparison immediates may be 64-bit, but CmpValue is only an int.
+    // Normalize to 0/1/2 return value, where 2 indicates any value apart from
+    // 0 or 1.
+    // TODO: Switch CmpValue to int64_t in the API to avoid this.
+    if (Value == 0 || Value == 1)
+      return Value;
+    return 2;
+  };
+
   switch (MI.getOpcode()) {
   default:
     break;
@@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
     SrcReg = MI.getOperand(1).getReg();
     SrcReg2 = 0;
     CmpMask = ~0;
-    // FIXME: In order to convert CmpValue to 0 or 1
-    CmpValue = MI.getOperand(2).getImm() != 0;
+    CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm());
     return true;
   case AArch64::ANDSWri:
   case AArch64::ANDSXri:
@@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
     SrcReg = MI.getOperand(1).getReg();
     SrcReg2 = 0;
     CmpMask = ~0;
-    // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
-    // while the type of CmpValue is int. When converting uint64_t to int,
-    // the high 32 bits of uint64_t will be lost.
-    // In fact it causes a bug in spec2006-483.xalancbmk
-    // CmpValue is only used to compare with zero in OptimizeCompareInstr
-    CmpValue = AArch64_AM::decodeLogicalImmediate(
+    CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate(
                    MI.getOperand(2).getImm(),
-                   MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
+                   MI.getOpcode() == AArch64::ANDSWri ? 32 : 64));
     return true;
   }
 
@@ -1462,20 +1466,20 @@ bool AArch64InstrInfo::optimizeCompareInstr(
   if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
     return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
 
-  // Continue only if we have a "ri" where immediate is zero.
-  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
-  // function.
-  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
+  // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1.
+  assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) &&
+         "CmpValue must be 0, 1, or 2!");
   if (SrcReg2 != 0)
     return false;
 
   // CmpInstr is a Compare instruction if destination register is not used.
   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
     return false;
 
-  if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
+  if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
     return true;
-  return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
+  return (CmpValue == 0 || CmpValue == 1) &&
+         removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
 }
 
 /// Get opcode of S version of Instr.
diff --git a/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir b/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir
@@ -307,3 +307,42 @@ body:             |
     RET_ReallyLR
 
 ...
+---
+name:            subswr_wrong_cmp_value
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: subswr_wrong_cmp_value
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $x1
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64common = COPY $x1
+  ; CHECK:   [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
+  ; CHECK:   [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr killed [[DEF]], [[COPY]], implicit-def $nzcv
+  ; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+  ; CHECK:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[CSINCWr]], 3, 0, implicit-def $nzcv
+  ; CHECK:   Bcc 1, %bb.2, implicit $nzcv
+  ; CHECK:   B %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   B %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    liveins: $x1
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    %1:gpr64common = COPY $x1
+    %2:gpr64 = IMPLICIT_DEF
+    %3:gpr64 = SUBSXrr killed %2:gpr64, %1:gpr64common, implicit-def $nzcv
+    %4:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    %5:gpr32 = SUBSWri killed %4:gpr32common, 3, 0, implicit-def $nzcv
+    Bcc 1, %bb.2, implicit $nzcv
+    B %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+    B %bb.2
+
+  bb.2:
+    RET_ReallyLR
+
+...
diff --git a/llvm/test/CodeGen/AArch64/pr51476.ll b/llvm/test/CodeGen/AArch64/pr51476.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test(i8 %arg) nounwind {
+; CHECK-LABEL: test:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    cmp w8, #1
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    cmp w0, #3
+; CHECK-NEXT:    strb w0, [sp, #12]
+; CHECK-NEXT:    b.eq .LBB0_2
+; CHECK-NEXT:  // %bb.1: // %do_call
+; CHECK-NEXT:    bl unknown
+; CHECK-NEXT:  .LBB0_2: // %common.ret
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %tmp = alloca i8
+  %cmp1 = icmp ne i8 %arg, 1
+  %zext = zext i1 %cmp1 to i8
+  store i8 %zext, i8* %tmp
+  %zext2 = load i8, i8* %tmp
+  %cmp2 = icmp eq i8 %zext2, 3
+  br i1 %cmp2, label %exit, label %do_call
+
+do_call:
+  call void @unknown(i8 %zext2)
+  ret void
+
+exit:
+  ret void
+}
+
+declare void @unknown(i8)