Skip to content

Commit dde77f7

Browse files
authored
Rollup merge of #117042 - Zalathar:file-table, r=cjgillot
coverage: Emit the filenames section before encoding per-function mappings When embedding coverage information in LLVM IR (and ultimately in the resulting binary), there are two main things that each CGU needs to emit: - A single `__llvm_covmap` record containing a coverage header, which mostly consists of a list of filenames used by the CGU's coverage mappings. - Several `__llvm_covfun` records, one for each instrumented function, each of which contains the hash of the list of filenames in the header. There is a kind of loose cyclic dependency between the two: we need the hash of the file table before we can emit the covfun records, but we need to traverse all of the instrumented functions in order to build the file table. The existing code works by processing the individual functions first. It lazily adds filenames to the file table, and stores the mostly-complete function records in a temporary list. After this it hashes the file table, emits the header (containing the file table), and then uses the hash to emit all of the function records. This PR reverses that order: first we traverse all of the functions (without trying to prepare their function records) to build a *complete* file table, and then emit it immediately. At this point we have the file table hash, so we can then proceed to build and emit all of the function records, without needing to store them in an intermediate list. --- Along the way, this PR makes some necessary changes that are also worthwhile in their own right: - We split `FunctionCoverage` into distinct collector/finished phases, which neatly avoids some borrow-checker hassles when extracting a function's final expression/mapping data. - We avoid having to re-sort a function's mappings when preparing the list of filenames that it uses.
2 parents e86f9b6 + 6af9fef commit dde77f7

File tree

7 files changed

+154
-106
lines changed

7 files changed

+154
-106
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -3601,6 +3601,7 @@ version = "0.0.0"
36013601
dependencies = [
36023602
"bitflags 1.3.2",
36033603
"cstr",
3604+
"itertools",
36043605
"libc",
36053606
"measureme",
36063607
"object",

compiler/rustc_codegen_llvm/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ test = false
99
[dependencies]
1010
bitflags = "1.0"
1111
cstr = "0.2"
12+
itertools = "0.10.5"
1213
libc = "0.2"
1314
measureme = "10.0.0"
1415
object = { version = "0.32.0", default-features = false, features = [

compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs

+40-36
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};
22

3+
use rustc_data_structures::captures::Captures;
34
use rustc_data_structures::fx::FxIndexSet;
45
use rustc_index::bit_set::BitSet;
56
use rustc_middle::mir::coverage::{
67
CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, Op,
78
};
89
use rustc_middle::ty::Instance;
10+
use rustc_span::Symbol;
911

1012
/// Holds all of the coverage mapping data associated with a function instance,
1113
/// collected during traversal of `Coverage` statements in the function's MIR.
1214
#[derive(Debug)]
13-
pub struct FunctionCoverage<'tcx> {
15+
pub struct FunctionCoverageCollector<'tcx> {
1416
/// Coverage info that was attached to this function by the instrumentor.
1517
function_coverage_info: &'tcx FunctionCoverageInfo,
1618
is_used: bool,
@@ -26,7 +28,7 @@ pub struct FunctionCoverage<'tcx> {
2628
expressions_seen: BitSet<ExpressionId>,
2729
}
2830

29-
impl<'tcx> FunctionCoverage<'tcx> {
31+
impl<'tcx> FunctionCoverageCollector<'tcx> {
3032
/// Creates a new set of coverage data for a used (called) function.
3133
pub fn new(
3234
instance: Instance<'tcx>,
@@ -76,11 +78,6 @@ impl<'tcx> FunctionCoverage<'tcx> {
7678
}
7779
}
7880

79-
/// Returns true for a used (called) function, and false for an unused function.
80-
pub fn is_used(&self) -> bool {
81-
self.is_used
82-
}
83-
8481
/// Marks a counter ID as having been seen in a counter-increment statement.
8582
#[instrument(level = "debug", skip(self))]
8683
pub(crate) fn mark_counter_id_seen(&mut self, id: CounterId) {
@@ -165,72 +162,79 @@ impl<'tcx> FunctionCoverage<'tcx> {
165162
ZeroExpressions(zero_expressions)
166163
}
167164

165+
pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> {
166+
let zero_expressions = self.identify_zero_expressions();
167+
let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } = self;
168+
169+
FunctionCoverage { function_coverage_info, is_used, counters_seen, zero_expressions }
170+
}
171+
}
172+
173+
pub(crate) struct FunctionCoverage<'tcx> {
174+
function_coverage_info: &'tcx FunctionCoverageInfo,
175+
is_used: bool,
176+
177+
counters_seen: BitSet<CounterId>,
178+
zero_expressions: ZeroExpressions,
179+
}
180+
181+
impl<'tcx> FunctionCoverage<'tcx> {
182+
/// Returns true for a used (called) function, and false for an unused function.
183+
pub(crate) fn is_used(&self) -> bool {
184+
self.is_used
185+
}
186+
168187
/// Return the source hash, generated from the HIR node structure, and used to indicate whether
169188
/// or not the source code structure changed between different compilations.
170189
pub fn source_hash(&self) -> u64 {
171190
if self.is_used { self.function_coverage_info.function_source_hash } else { 0 }
172191
}
173192

174-
/// Generate an array of CounterExpressions, and an iterator over all `Counter`s and their
175-
/// associated `Regions` (from which the LLVM-specific `CoverageMapGenerator` will create
176-
/// `CounterMappingRegion`s.
177-
pub fn get_expressions_and_counter_regions(
178-
&self,
179-
) -> (Vec<CounterExpression>, impl Iterator<Item = (Counter, &CodeRegion)>) {
180-
let zero_expressions = self.identify_zero_expressions();
181-
182-
let counter_expressions = self.counter_expressions(&zero_expressions);
183-
// Expression IDs are indices into `self.expressions`, and on the LLVM
184-
// side they will be treated as indices into `counter_expressions`, so
185-
// the two vectors should correspond 1:1.
186-
assert_eq!(self.function_coverage_info.expressions.len(), counter_expressions.len());
187-
188-
let counter_regions = self.counter_regions(zero_expressions);
189-
190-
(counter_expressions, counter_regions)
193+
/// Returns an iterator over all filenames used by this function's mappings.
194+
pub(crate) fn all_file_names(&self) -> impl Iterator<Item = Symbol> + Captures<'_> {
195+
self.function_coverage_info.mappings.iter().map(|mapping| mapping.code_region.file_name)
191196
}
192197

193198
/// Convert this function's coverage expression data into a form that can be
194199
/// passed through FFI to LLVM.
195-
fn counter_expressions(&self, zero_expressions: &ZeroExpressions) -> Vec<CounterExpression> {
200+
pub(crate) fn counter_expressions(
201+
&self,
202+
) -> impl Iterator<Item = CounterExpression> + ExactSizeIterator + Captures<'_> {
196203
// We know that LLVM will optimize out any unused expressions before
197204
// producing the final coverage map, so there's no need to do the same
198205
// thing on the Rust side unless we're confident we can do much better.
199206
// (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.)
200207

201208
let counter_from_operand = |operand: CovTerm| match operand {
202-
CovTerm::Expression(id) if zero_expressions.contains(id) => Counter::ZERO,
209+
CovTerm::Expression(id) if self.zero_expressions.contains(id) => Counter::ZERO,
203210
_ => Counter::from_term(operand),
204211
};
205212

206-
self.function_coverage_info
207-
.expressions
208-
.iter()
209-
.map(|&Expression { lhs, op, rhs }| CounterExpression {
213+
self.function_coverage_info.expressions.iter().map(move |&Expression { lhs, op, rhs }| {
214+
CounterExpression {
210215
lhs: counter_from_operand(lhs),
211216
kind: match op {
212217
Op::Add => ExprKind::Add,
213218
Op::Subtract => ExprKind::Subtract,
214219
},
215220
rhs: counter_from_operand(rhs),
216-
})
217-
.collect::<Vec<_>>()
221+
}
222+
})
218223
}
219224

220225
/// Converts this function's coverage mappings into an intermediate form
221226
/// that will be used by `mapgen` when preparing for FFI.
222-
fn counter_regions(
227+
pub(crate) fn counter_regions(
223228
&self,
224-
zero_expressions: ZeroExpressions,
225-
) -> impl Iterator<Item = (Counter, &CodeRegion)> {
229+
) -> impl Iterator<Item = (Counter, &CodeRegion)> + ExactSizeIterator {
226230
// Historically, mappings were stored directly in counter/expression
227231
// statements in MIR, and MIR optimizations would sometimes remove them.
228232
// That's mostly no longer true, so now we detect cases where that would
229233
// have happened, and zero out the corresponding mappings here instead.
230234
let counter_for_term = move |term: CovTerm| {
231235
let force_to_zero = match term {
232236
CovTerm::Counter(id) => !self.counters_seen.contains(id),
233-
CovTerm::Expression(id) => zero_expressions.contains(id),
237+
CovTerm::Expression(id) => self.zero_expressions.contains(id),
234238
CovTerm::Zero => false,
235239
};
236240
if force_to_zero { Counter::ZERO } else { Counter::from_term(term) }

0 commit comments

Comments
 (0)