Skip to content

Commit 58b5757

Browse files
committed
[TailDuplicator] Only duplicate the blocks containing computed gotos
1 parent 067cd06 commit 58b5757

File tree

3 files changed

+49
-45
lines changed

3 files changed

+49
-45
lines changed

llvm/include/llvm/CodeGen/MachineInstr.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -986,8 +986,12 @@ class MachineInstr
986986

987987
/// Return true if this is an indirect branch, such as a
988988
/// branch through a register.
989-
bool isIndirectBranch(QueryType Type = AnyInBundle) const {
990-
return hasProperty(MCID::IndirectBranch, Type);
989+
bool isIndirectBranch(QueryType Type = AnyInBundle,
990+
bool IncludeJumpTable = true) const {
991+
return hasProperty(MCID::IndirectBranch, Type) &&
992+
(IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) {
993+
return Op.isJTI();
994+
}));
991995
}
992996

993997
/// Return true if this is a branch which may fall

llvm/lib/CodeGen/TailDuplicator.cpp

+12-10
Original file line numberDiff line numberDiff line change
@@ -603,17 +603,19 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
603603
TailBB.canFallThrough())
604604
return false;
605605

606-
// If the target has hardware branch prediction that can handle indirect
607-
// branches, duplicating them can often make them predictable when there
608-
// are common paths through the code. The limit needs to be high enough
609-
// to allow undoing the effects of tail merging and other optimizations
610-
// that rearrange the predecessors of the indirect branch.
611-
612-
bool HasIndirectbr = false;
606+
// Only duplicate the blocks containing computed gotos. This basically
607+
// unfactors computed gotos that were factored early on in the compilation
608+
// process to speed up edge based data flow. If we do not unfactor them again,
609+
// it can seriously pessimize code with many computed jumps in the source
610+
// code, such as interpreters.
611+
bool HasComputedGoto = false;
613612
if (!TailBB.empty())
614-
HasIndirectbr = TailBB.back().isIndirectBranch();
613+
HasComputedGoto = TailBB.back().isIndirectBranch(
614+
/*Type=*/MachineInstr::AnyInBundle,
615+
// Jump tables are not considered computed gotos.
616+
/*IncludeJumpTable=*/false);
615617

616-
if (HasIndirectbr && PreRegAlloc)
618+
if (HasComputedGoto && PreRegAlloc)
617619
MaxDuplicateCount = TailDupIndirectBranchSize;
618620

619621
// Check the instructions in the block to determine whether tail-duplication
@@ -685,7 +687,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
685687
}
686688
}
687689

688-
if (HasIndirectbr && PreRegAlloc)
690+
if (HasComputedGoto && PreRegAlloc)
689691
return true;
690692

691693
if (IsSimple)

llvm/test/CodeGen/X86/tail-dup-computed-goto.mir

+31-33
Original file line numberDiff line numberDiff line change
@@ -128,76 +128,74 @@ jumpTable:
128128
body: |
129129
; CHECK-LABEL: name: jump_table
130130
; CHECK: bb.0:
131-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
131+
; CHECK-NEXT: successors: %bb.1(0x80000000)
132132
; CHECK-NEXT: {{ $}}
133133
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
134134
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
135135
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
136136
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax
137137
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY [[COPY]]
138-
; CHECK-NEXT: [[DEC64r:%[0-9]+]]:gr64_nosp = DEC64r [[COPY1]], implicit-def dead $eflags
139-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY [[COPY1]]
138+
; CHECK-NEXT: {{ $}}
139+
; CHECK-NEXT: bb.1:
140+
; CHECK-NEXT: successors: %bb.2(0x80000000)
141+
; CHECK-NEXT: {{ $}}
142+
; CHECK-NEXT: [[PHI:%[0-9]+]]:gr64 = PHI [[COPY1]], %bb.0, %3, %bb.7, %4, %bb.6, %5, %bb.5, %6, %bb.4, %7, %bb.3
143+
; CHECK-NEXT: [[DEC64r:%[0-9]+]]:gr64_nosp = DEC64r [[PHI]], implicit-def dead $eflags
144+
; CHECK-NEXT: {{ $}}
145+
; CHECK-NEXT: bb.2:
146+
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
147+
; CHECK-NEXT: {{ $}}
140148
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r]], %jump-table.0, $noreg :: (load (s64) from jump-table)
141149
; CHECK-NEXT: {{ $}}
142150
; CHECK-NEXT: bb.3:
143-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
151+
; CHECK-NEXT: successors: %bb.1(0x80000000)
144152
; CHECK-NEXT: {{ $}}
145153
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
146154
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
147155
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
148-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax
149-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY [[COPY3]]
150-
; CHECK-NEXT: [[DEC64r1:%[0-9]+]]:gr64_nosp = DEC64r [[COPY4]], implicit-def dead $eflags
151-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY [[COPY4]]
152-
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r1]], %jump-table.0, $noreg :: (load (s64) from jump-table)
156+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rax
157+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY2]]
158+
; CHECK-NEXT: JMP_1 %bb.1
153159
; CHECK-NEXT: {{ $}}
154160
; CHECK-NEXT: bb.4:
155-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
161+
; CHECK-NEXT: successors: %bb.1(0x80000000)
156162
; CHECK-NEXT: {{ $}}
157163
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
158164
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
159165
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
160-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
161-
; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64 = COPY [[COPY6]]
162-
; CHECK-NEXT: [[DEC64r2:%[0-9]+]]:gr64_nosp = DEC64r [[COPY7]], implicit-def dead $eflags
163-
; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY [[COPY7]]
164-
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r2]], %jump-table.0, $noreg :: (load (s64) from jump-table)
166+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rax
167+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY [[COPY4]]
168+
; CHECK-NEXT: JMP_1 %bb.1
165169
; CHECK-NEXT: {{ $}}
166170
; CHECK-NEXT: bb.5:
167-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
171+
; CHECK-NEXT: successors: %bb.1(0x80000000)
168172
; CHECK-NEXT: {{ $}}
169173
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
170174
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
171175
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
172-
; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY $rax
173-
; CHECK-NEXT: [[COPY10:%[0-9]+]]:gr64 = COPY [[COPY9]]
174-
; CHECK-NEXT: [[DEC64r3:%[0-9]+]]:gr64_nosp = DEC64r [[COPY10]], implicit-def dead $eflags
175-
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gr64 = COPY [[COPY10]]
176-
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r3]], %jump-table.0, $noreg :: (load (s64) from jump-table)
176+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
177+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64 = COPY [[COPY6]]
178+
; CHECK-NEXT: JMP_1 %bb.1
177179
; CHECK-NEXT: {{ $}}
178180
; CHECK-NEXT: bb.6:
179-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
181+
; CHECK-NEXT: successors: %bb.1(0x80000000)
180182
; CHECK-NEXT: {{ $}}
181183
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
182184
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
183185
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
184-
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gr64 = COPY $rax
185-
; CHECK-NEXT: [[COPY13:%[0-9]+]]:gr64 = COPY [[COPY12]]
186-
; CHECK-NEXT: [[DEC64r4:%[0-9]+]]:gr64_nosp = DEC64r [[COPY13]], implicit-def dead $eflags
187-
; CHECK-NEXT: [[COPY14:%[0-9]+]]:gr64 = COPY [[COPY13]]
188-
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r4]], %jump-table.0, $noreg :: (load (s64) from jump-table)
186+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY $rax
187+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY [[COPY8]]
188+
; CHECK-NEXT: JMP_1 %bb.1
189189
; CHECK-NEXT: {{ $}}
190190
; CHECK-NEXT: bb.7:
191-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
191+
; CHECK-NEXT: successors: %bb.1(0x80000000)
192192
; CHECK-NEXT: {{ $}}
193193
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
194194
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f5, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
195195
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
196-
; CHECK-NEXT: [[COPY15:%[0-9]+]]:gr64 = COPY $rax
197-
; CHECK-NEXT: [[COPY16:%[0-9]+]]:gr64 = COPY [[COPY15]]
198-
; CHECK-NEXT: [[DEC64r5:%[0-9]+]]:gr64_nosp = DEC64r [[COPY16]], implicit-def dead $eflags
199-
; CHECK-NEXT: [[COPY17:%[0-9]+]]:gr64 = COPY [[COPY16]]
200-
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r5]], %jump-table.0, $noreg :: (load (s64) from jump-table)
196+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:gr64 = COPY $rax
197+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gr64 = COPY [[COPY10]]
198+
; CHECK-NEXT: JMP_1 %bb.1
201199
; CHECK-NEXT: {{ $}}
202200
; CHECK-NEXT: bb.8:
203201
bb.0:

0 commit comments

Comments
 (0)