Skip to content

Commit 9b32dd3

Browse files
committed
Auto merge of rust-lang#133852 - x17jiri:cold_path, r=<try>
improve cold_path() rust-lang#120370 added a new instrinsic `cold_path()` and used it to fix `likely` and `unlikely` However, in order to limit scope, the information about cold code paths is only used in 2-target switch instructions. This is sufficient for `likely` and `unlikely`, but limits usefulness of `cold_path` for idiomatic rust. For example, code like this: ``` if let Some(x) = y { ... } ``` may generate 3-target switch: ``` switch y.discriminator: 0 => true branch 1 = > false branch _ => unreachable ``` and therefore marking a branch as cold will have no effect. This PR improves `cold_path()` to work with arbitrary switch instructions. Note that for 2-target switches, we can use `llvm.expect`, but for multiple targets we need to manually emit branch weights. I checked Clang and it also emits weights in this situation. The Clang's weight calculation is more complex that this PR, which I believe is mainly because `switch` in `C/C++` can have multiple cases going to the same target.
2 parents 3cb0272 + 7968501 commit 9b32dd3

File tree

6 files changed

+229
-15
lines changed

6 files changed

+229
-15
lines changed

compiler/rustc_codegen_llvm/src/builder.rs

+46-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::{iter, ptr};
44

55
pub(crate) mod autodiff;
66

7-
use libc::{c_char, c_uint};
7+
use libc::{c_char, c_uint, size_t};
88
use rustc_abi as abi;
99
use rustc_abi::{Align, Size, WrappingRange};
1010
use rustc_codegen_ssa::MemFlags;
@@ -32,7 +32,7 @@ use crate::abi::FnAbiLlvmExt;
3232
use crate::attributes;
3333
use crate::common::Funclet;
3434
use crate::context::{CodegenCx, SimpleCx};
35-
use crate::llvm::{self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, False, True};
35+
use crate::llvm::{self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, False, Metadata, True};
3636
use crate::type_::Type;
3737
use crate::type_of::LayoutLlvmExt;
3838
use crate::value::Value;
@@ -333,6 +333,50 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
333333
}
334334
}
335335

336+
fn switch_with_weights(
337+
&mut self,
338+
v: Self::Value,
339+
else_llbb: Self::BasicBlock,
340+
else_is_cold: bool,
341+
cases: impl ExactSizeIterator<Item = (u128, Self::BasicBlock, bool)>,
342+
) {
343+
if self.cx.sess().opts.optimize == rustc_session::config::OptLevel::No {
344+
self.switch(v, else_llbb, cases.map(|(val, dest, _)| (val, dest)));
345+
return;
346+
}
347+
348+
let id_str = "branch_weights";
349+
let id = unsafe {
350+
llvm::LLVMMDStringInContext2(self.cx.llcx, id_str.as_ptr().cast(), id_str.len())
351+
};
352+
353+
// For switch instructions with 2 targets, the `llvm.expect` intrinsic is used.
354+
// This function handles switch instructions with more than 2 targets and it needs to
355+
// emit branch weights metadata instead of using the intrinsic.
356+
// The values 1 and 2000 are the same as the values used by the `llvm.expect` intrinsic.
357+
let cold_weight = unsafe { llvm::LLVMValueAsMetadata(self.cx.const_u32(1)) };
358+
let hot_weight = unsafe { llvm::LLVMValueAsMetadata(self.cx.const_u32(2000)) };
359+
let weight =
360+
|is_cold: bool| -> &Metadata { if is_cold { cold_weight } else { hot_weight } };
361+
362+
let mut md: SmallVec<[&Metadata; 16]> = SmallVec::with_capacity(cases.len() + 2);
363+
md.push(id);
364+
md.push(weight(else_is_cold));
365+
366+
let switch =
367+
unsafe { llvm::LLVMBuildSwitch(self.llbuilder, v, else_llbb, cases.len() as c_uint) };
368+
for (on_val, dest, is_cold) in cases {
369+
let on_val = self.const_uint_big(self.val_ty(v), on_val);
370+
unsafe { llvm::LLVMAddCase(switch, on_val, dest) }
371+
md.push(weight(is_cold));
372+
}
373+
374+
unsafe {
375+
let md_node = llvm::LLVMMDNodeInContext2(self.cx.llcx, md.as_ptr(), md.len() as size_t);
376+
self.cx.set_metadata(switch, llvm::MD_prof, md_node);
377+
}
378+
}
379+
336380
fn invoke(
337381
&mut self,
338382
llty: &'ll Type,

compiler/rustc_codegen_ssa/src/mir/block.rs

+28-5
Original file line numberDiff line numberDiff line change
@@ -429,11 +429,34 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
429429
let cmp = bx.icmp(IntPredicate::IntEQ, discr_value, llval);
430430
bx.cond_br(cmp, ll1, ll2);
431431
} else {
432-
bx.switch(
433-
discr_value,
434-
helper.llbb_with_cleanup(self, targets.otherwise()),
435-
target_iter.map(|(value, target)| (value, helper.llbb_with_cleanup(self, target))),
436-
);
432+
let otherwise = targets.otherwise();
433+
let otherwise_cold = self.cold_blocks[otherwise];
434+
let otherwise_unreachable = self.mir[otherwise].is_empty_unreachable();
435+
let cold_count = targets.iter().filter(|(_, target)| self.cold_blocks[*target]).count();
436+
let none_cold = cold_count == 0;
437+
let all_cold = cold_count == targets.iter().len();
438+
if (none_cold && (!otherwise_cold || otherwise_unreachable))
439+
|| (all_cold && (otherwise_cold || otherwise_unreachable))
440+
{
441+
// All targets have the same weight,
442+
// or `otherwise` is unreachable and it's the only target with a different weight.
443+
bx.switch(
444+
discr_value,
445+
helper.llbb_with_cleanup(self, targets.otherwise()),
446+
target_iter
447+
.map(|(value, target)| (value, helper.llbb_with_cleanup(self, target))),
448+
);
449+
} else {
450+
// Targets have different weights
451+
bx.switch_with_weights(
452+
discr_value,
453+
helper.llbb_with_cleanup(self, targets.otherwise()),
454+
otherwise_cold,
455+
target_iter.map(|(value, target)| {
456+
(value, helper.llbb_with_cleanup(self, target), self.cold_blocks[target])
457+
}),
458+
);
459+
}
437460
}
438461
}
439462

compiler/rustc_codegen_ssa/src/mir/mod.rs

+19-8
Original file line numberDiff line numberDiff line change
@@ -502,14 +502,25 @@ fn find_cold_blocks<'tcx>(
502502
for (bb, bb_data) in traversal::postorder(mir) {
503503
let terminator = bb_data.terminator();
504504

505-
// If a BB ends with a call to a cold function, mark it as cold.
506-
if let mir::TerminatorKind::Call { ref func, .. } = terminator.kind
507-
&& let ty::FnDef(def_id, ..) = *func.ty(local_decls, tcx).kind()
508-
&& let attrs = tcx.codegen_fn_attrs(def_id)
509-
&& attrs.flags.contains(CodegenFnAttrFlags::COLD)
510-
{
511-
cold_blocks[bb] = true;
512-
continue;
505+
match terminator.kind {
506+
// If a BB ends with a call to a cold function, mark it as cold.
507+
mir::TerminatorKind::Call { ref func, .. }
508+
| mir::TerminatorKind::TailCall { ref func, .. }
509+
if let ty::FnDef(def_id, ..) = *func.ty(local_decls, tcx).kind()
510+
&& let attrs = tcx.codegen_fn_attrs(def_id)
511+
&& attrs.flags.contains(CodegenFnAttrFlags::COLD) =>
512+
{
513+
cold_blocks[bb] = true;
514+
continue;
515+
}
516+
517+
// If a BB ends with an `unreachable`, also mark it as cold.
518+
mir::TerminatorKind::Unreachable => {
519+
cold_blocks[bb] = true;
520+
continue;
521+
}
522+
523+
_ => {}
513524
}
514525

515526
// If all successors of a BB are cold and there's at least one of them, mark this BB as cold

compiler/rustc_codegen_ssa/src/traits/builder.rs

+14
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,20 @@ pub trait BuilderMethods<'a, 'tcx>:
110110
else_llbb: Self::BasicBlock,
111111
cases: impl ExactSizeIterator<Item = (u128, Self::BasicBlock)>,
112112
);
113+
114+
// This is like `switch()`, but every case has a bool flag indicating whether it's cold.
115+
//
116+
// Default implementation throws away the cold flags and calls `switch()`.
117+
fn switch_with_weights(
118+
&mut self,
119+
v: Self::Value,
120+
else_llbb: Self::BasicBlock,
121+
_else_is_cold: bool,
122+
cases: impl ExactSizeIterator<Item = (u128, Self::BasicBlock, bool)>,
123+
) {
124+
self.switch(v, else_llbb, cases.map(|(val, bb, _)| (val, bb)))
125+
}
126+
113127
fn invoke(
114128
&mut self,
115129
llty: Self::Type,
+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
//@ compile-flags: -O
2+
#![crate_type = "lib"]
3+
#![feature(core_intrinsics)]
4+
5+
use std::intrinsics::cold_path;
6+
7+
#[inline(never)]
8+
#[no_mangle]
9+
pub fn path_a() {
10+
println!("path a");
11+
}
12+
13+
#[inline(never)]
14+
#[no_mangle]
15+
pub fn path_b() {
16+
println!("path b");
17+
}
18+
19+
#[no_mangle]
20+
pub fn test(x: Option<bool>) {
21+
if let Some(_) = x {
22+
path_a();
23+
} else {
24+
cold_path();
25+
path_b();
26+
}
27+
}
28+
29+
// CHECK-LABEL: @test(
30+
// CHECK: br i1 %1, label %bb2, label %bb1, !prof ![[NUM:[0-9]+]]
31+
// CHECK: bb1:
32+
// CHECK: path_a
33+
// CHECK: bb2:
34+
// CHECK: path_b
35+
// CHECK: ![[NUM]] = !{!"branch_weights", {{(!"expected", )?}}i32 1, i32 2000}
+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
//@ compile-flags: -O
2+
#![crate_type = "lib"]
3+
#![feature(core_intrinsics)]
4+
5+
use std::intrinsics::cold_path;
6+
7+
#[inline(never)]
8+
#[no_mangle]
9+
pub fn path_a() {
10+
println!("path a");
11+
}
12+
13+
#[inline(never)]
14+
#[no_mangle]
15+
pub fn path_b() {
16+
println!("path b");
17+
}
18+
19+
#[inline(never)]
20+
#[no_mangle]
21+
pub fn path_c() {
22+
println!("path c");
23+
}
24+
25+
#[inline(never)]
26+
#[no_mangle]
27+
pub fn path_d() {
28+
println!("path d");
29+
}
30+
31+
#[no_mangle]
32+
pub fn test(x: Option<u32>) {
33+
match x {
34+
Some(0) => path_a(),
35+
Some(1) => {
36+
cold_path();
37+
path_b()
38+
}
39+
Some(2) => path_c(),
40+
Some(3) => {
41+
cold_path();
42+
path_d()
43+
}
44+
_ => path_a(),
45+
}
46+
}
47+
48+
#[no_mangle]
49+
pub fn test2(x: Option<u32>) {
50+
match x {
51+
Some(10) => path_a(),
52+
Some(11) => {
53+
cold_path();
54+
path_b()
55+
}
56+
Some(12) => {
57+
unsafe { core::intrinsics::unreachable() };
58+
path_c()
59+
}
60+
Some(13) => {
61+
cold_path();
62+
path_d()
63+
}
64+
_ => {
65+
cold_path();
66+
path_a()
67+
}
68+
}
69+
}
70+
71+
// CHECK-LABEL: @test(
72+
// CHECK: switch i32 %1, label %bb1 [
73+
// CHECK: i32 0, label %bb6
74+
// CHECK: i32 1, label %bb5
75+
// CHECK: i32 2, label %bb4
76+
// CHECK: i32 3, label %bb3
77+
// CHECK: ], !prof !3
78+
79+
// CHECK-LABEL: @test2(
80+
// CHECK: switch i32 %1, label %bb1 [
81+
// CHECK: i32 10, label %bb5
82+
// CHECK: i32 11, label %bb4
83+
// CHECK: i32 13, label %bb3
84+
// CHECK: ], !prof !5
85+
86+
// CHECK: !3 = !{!"branch_weights", i32 2000, i32 2000, i32 1, i32 2000, i32 1}
87+
// CHECK: !5 = !{!"branch_weights", i32 1, i32 2000, i32 1, i32 1}

0 commit comments

Comments
 (0)