Skip to content

Commit f9e77f2

Browse files
committed
Auto merge of rust-lang#91604 - nikic:section-flags, r=nagisa
Use object crate for .rustc metadata generation We already use the object crate for generating uncompressed .rmeta metadata object files. This switches the generation of compressed .rustc object files to use the object crate as well. These have slightly different requirements in that .rmeta should be completely excluded from any final compilation artifacts, while .rustc should be part of shared objects, but not loaded into memory. The primary motivation for this change is rust-lang#90326: In LLVM 14, the current way of setting section flags (and in particular, preventing the setting of SHF_ALLOC) will no longer work. There are other ways we could work around this, but switching to the object crate seems like the most elegant, as we already use it for .rmeta, and as it makes this independent of the codegen backend. In particular, we don't need separate handling in codegen_llvm and codegen_gcc. codegen_cranelift should be able to reuse the implementation as well, though I have omitted that here, as it is not based on codegen_ssa. This change mostly extracts the existing code for .rmeta handling to allow using it for .rustc as well, and adjusts the codegen infrastructure to handle the metadata object file separately: We no longer create a backend-specific module for it, and directly produce the compiled module instead. This does not `fix` rust-lang#90326 by itself yet, as .llvmbc will need to be handled separately. r? `@nagisa`
2 parents 4459e72 + 9488cac commit f9e77f2

File tree

12 files changed

+246
-288
lines changed

12 files changed

+246
-288
lines changed

Cargo.lock

+1-1
Original file line numberDiff line numberDiff line change
@@ -3714,7 +3714,6 @@ dependencies = [
37143714
"rustc_span",
37153715
"rustc_target",
37163716
"smallvec",
3717-
"snap",
37183717
"tracing",
37193718
]
37203719

@@ -3749,6 +3748,7 @@ dependencies = [
37493748
"rustc_symbol_mangling",
37503749
"rustc_target",
37513750
"smallvec",
3751+
"snap",
37523752
"tempfile",
37533753
"tracing",
37543754
]

compiler/rustc_codegen_gcc/src/base.rs

-39
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,12 @@ use gccjit::{
77
GlobalKind,
88
};
99
use rustc_middle::dep_graph;
10-
use rustc_middle::middle::exported_symbols;
1110
use rustc_middle::ty::TyCtxt;
1211
use rustc_middle::mir::mono::Linkage;
1312
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
1413
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
1514
use rustc_codegen_ssa::mono_item::MonoItemExt;
1615
use rustc_codegen_ssa::traits::DebugInfoMethods;
17-
use rustc_metadata::EncodedMetadata;
1816
use rustc_session::config::DebugInfo;
1917
use rustc_span::Symbol;
2018

@@ -132,40 +130,3 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol) -> (Modul
132130

133131
(module, cost)
134132
}
135-
136-
pub fn write_compressed_metadata<'tcx>(tcx: TyCtxt<'tcx>, metadata: &EncodedMetadata, gcc_module: &mut GccContext) {
137-
use snap::write::FrameEncoder;
138-
use std::io::Write;
139-
140-
// Historical note:
141-
//
142-
// When using link.exe it was seen that the section name `.note.rustc`
143-
// was getting shortened to `.note.ru`, and according to the PE and COFF
144-
// specification:
145-
//
146-
// > Executable images do not use a string table and do not support
147-
// > section names longer than 8 characters
148-
//
149-
// https://docs.microsoft.com/en-us/windows/win32/debug/pe-format
150-
//
151-
// As a result, we choose a slightly shorter name! As to why
152-
// `.note.rustc` works on MinGW, see
153-
// https://github.com/llvm/llvm-project/blob/llvmorg-12.0.0/lld/COFF/Writer.cpp#L1190-L1197
154-
let section_name = if tcx.sess.target.is_like_osx { "__DATA,.rustc" } else { ".rustc" };
155-
156-
let context = &gcc_module.context;
157-
let mut compressed = rustc_metadata::METADATA_HEADER.to_vec();
158-
FrameEncoder::new(&mut compressed).write_all(&metadata.raw_data()).unwrap();
159-
160-
let name = exported_symbols::metadata_symbol_name(tcx);
161-
let typ = context.new_array_type(None, context.new_type::<u8>(), compressed.len() as i32);
162-
let global = context.new_global(None, GlobalKind::Exported, typ, name);
163-
global.global_set_initializer(&compressed);
164-
global.set_link_section(section_name);
165-
166-
// Also generate a .section directive to force no
167-
// flags, at least for ELF outputs, so that the
168-
// metadata doesn't get loaded into memory.
169-
let directive = format!(".section {}", section_name);
170-
context.add_top_level_asm(None, &directive);
171-
}

compiler/rustc_codegen_gcc/src/lib.rs

-5
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ extern crate rustc_session;
2222
extern crate rustc_span;
2323
extern crate rustc_symbol_mangling;
2424
extern crate rustc_target;
25-
extern crate snap;
2625

2726
// This prevents duplicating functions and statics that are already part of the host rustc process.
2827
#[allow(unused_extern_crates)]
@@ -128,10 +127,6 @@ impl ExtraBackendMethods for GccCodegenBackend {
128127
}
129128
}
130129

131-
fn write_compressed_metadata<'tcx>(&self, tcx: TyCtxt<'tcx>, metadata: &EncodedMetadata, gcc_module: &mut Self::Module) {
132-
base::write_compressed_metadata(tcx, metadata, gcc_module)
133-
}
134-
135130
fn codegen_allocator<'tcx>(&self, tcx: TyCtxt<'tcx>, mods: &mut Self::Module, module_name: &str, kind: AllocatorKind, has_alloc_error_handler: bool) {
136131
unsafe { allocator::codegen(tcx, mods, module_name, kind, has_alloc_error_handler) }
137132
}

compiler/rustc_codegen_llvm/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ bitflags = "1.0"
1212
cstr = "0.2"
1313
libc = "0.2"
1414
measureme = "10.0.0"
15-
snap = "1"
1615
tracing = "0.1"
1716
rustc_middle = { path = "../rustc_middle" }
1817
rustc-demangle = "0.1.21"

compiler/rustc_codegen_llvm/src/base.rs

+1-52
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,12 @@
99
//! int)` and `rec(x=int, y=int, z=int)` will have the same [`llvm::Type`].
1010
//!
1111
//! [`Ty`]: rustc_middle::ty::Ty
12-
//! [`val_ty`]: common::val_ty
12+
//! [`val_ty`]: crate::common::val_ty
1313
1414
use super::ModuleLlvm;
1515

1616
use crate::attributes;
1717
use crate::builder::Builder;
18-
use crate::common;
1918
use crate::context::CodegenCx;
2019
use crate::llvm;
2120
use crate::value::Value;
@@ -25,66 +24,16 @@ use rustc_codegen_ssa::mono_item::MonoItemExt;
2524
use rustc_codegen_ssa::traits::*;
2625
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
2726
use rustc_data_structures::small_c_str::SmallCStr;
28-
use rustc_metadata::EncodedMetadata;
2927
use rustc_middle::dep_graph;
3028
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
31-
use rustc_middle::middle::exported_symbols;
3229
use rustc_middle::mir::mono::{Linkage, Visibility};
3330
use rustc_middle::ty::TyCtxt;
3431
use rustc_session::config::DebugInfo;
3532
use rustc_span::symbol::Symbol;
3633
use rustc_target::spec::SanitizerSet;
3734

38-
use std::ffi::CString;
3935
use std::time::Instant;
4036

41-
pub fn write_compressed_metadata<'tcx>(
42-
tcx: TyCtxt<'tcx>,
43-
metadata: &EncodedMetadata,
44-
llvm_module: &mut ModuleLlvm,
45-
) {
46-
use snap::write::FrameEncoder;
47-
use std::io::Write;
48-
49-
// Historical note:
50-
//
51-
// When using link.exe it was seen that the section name `.note.rustc`
52-
// was getting shortened to `.note.ru`, and according to the PE and COFF
53-
// specification:
54-
//
55-
// > Executable images do not use a string table and do not support
56-
// > section names longer than 8 characters
57-
//
58-
// https://docs.microsoft.com/en-us/windows/win32/debug/pe-format
59-
//
60-
// As a result, we choose a slightly shorter name! As to why
61-
// `.note.rustc` works on MinGW, see
62-
// https://github.com/llvm/llvm-project/blob/llvmorg-12.0.0/lld/COFF/Writer.cpp#L1190-L1197
63-
let section_name = if tcx.sess.target.is_like_osx { "__DATA,.rustc" } else { ".rustc" };
64-
65-
let (metadata_llcx, metadata_llmod) = (&*llvm_module.llcx, llvm_module.llmod());
66-
let mut compressed = rustc_metadata::METADATA_HEADER.to_vec();
67-
FrameEncoder::new(&mut compressed).write_all(metadata.raw_data()).unwrap();
68-
69-
let llmeta = common::bytes_in_context(metadata_llcx, &compressed);
70-
let llconst = common::struct_in_context(metadata_llcx, &[llmeta], false);
71-
let name = exported_symbols::metadata_symbol_name(tcx);
72-
let buf = CString::new(name).unwrap();
73-
let llglobal =
74-
unsafe { llvm::LLVMAddGlobal(metadata_llmod, common::val_ty(llconst), buf.as_ptr()) };
75-
unsafe {
76-
llvm::LLVMSetInitializer(llglobal, llconst);
77-
let name = SmallCStr::new(section_name);
78-
llvm::LLVMSetSection(llglobal, name.as_ptr());
79-
80-
// Also generate a .section directive to force no
81-
// flags, at least for ELF outputs, so that the
82-
// metadata doesn't get loaded into memory.
83-
let directive = format!(".section {}", section_name);
84-
llvm::LLVMSetModuleInlineAsm2(metadata_llmod, directive.as_ptr().cast(), directive.len())
85-
}
86-
}
87-
8837
pub struct ValueIter<'ll> {
8938
cur: Option<&'ll Value>,
9039
step: unsafe extern "C" fn(&'ll Value) -> Option<&'ll Value>,

compiler/rustc_codegen_llvm/src/lib.rs

-8
Original file line numberDiff line numberDiff line change
@@ -102,14 +102,6 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
102102
ModuleLlvm::new_metadata(tcx, mod_name)
103103
}
104104

105-
fn write_compressed_metadata<'tcx>(
106-
&self,
107-
tcx: TyCtxt<'tcx>,
108-
metadata: &EncodedMetadata,
109-
llvm_module: &mut ModuleLlvm,
110-
) {
111-
base::write_compressed_metadata(tcx, metadata, llvm_module)
112-
}
113105
fn codegen_allocator<'tcx>(
114106
&self,
115107
tcx: TyCtxt<'tcx>,

compiler/rustc_codegen_ssa/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ libc = "0.2.50"
1515
jobserver = "0.1.22"
1616
tempfile = "3.2"
1717
pathdiff = "0.2.0"
18+
snap = "1"
1819
smallvec = { version = "1.6.1", features = ["union", "may_dangle"] }
1920
regex = "1.4"
2021

compiler/rustc_codegen_ssa/src/back/link.rs

+2-135
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,21 @@ use rustc_session::utils::NativeLibKind;
1414
/// need out of the shared crate context before we get rid of it.
1515
use rustc_session::{filesearch, Session};
1616
use rustc_span::symbol::Symbol;
17-
use rustc_target::abi::Endian;
1817
use rustc_target::spec::crt_objects::{CrtObjects, CrtObjectsFallback};
1918
use rustc_target::spec::{LinkOutputKind, LinkerFlavor, LldFlavor, SplitDebuginfo};
2019
use rustc_target::spec::{PanicStrategy, RelocModel, RelroLevel, SanitizerSet, Target};
2120

2221
use super::archive::{find_library, ArchiveBuilder};
2322
use super::command::Command;
2423
use super::linker::{self, Linker};
24+
use super::metadata::create_rmeta_file;
2525
use super::rpath::{self, RPathConfig};
2626
use crate::{
2727
looks_like_rust_object_file, CodegenResults, CompiledModule, CrateInfo, NativeLib,
2828
METADATA_FILENAME,
2929
};
3030

3131
use cc::windows_registry;
32-
use object::elf;
33-
use object::write::Object;
34-
use object::{Architecture, BinaryFormat, Endianness, FileFlags, SectionFlags, SectionKind};
3532
use regex::Regex;
3633
use tempfile::Builder as TempFileBuilder;
3734

@@ -339,7 +336,7 @@ fn link_rlib<'a, B: ArchiveBuilder<'a>>(
339336
// metadata in rlib files is wrapped in a "dummy" object file for
340337
// the target platform so the rlib can be processed entirely by
341338
// normal linkers for the platform.
342-
let metadata = create_metadata_file(sess, codegen_results.metadata.raw_data());
339+
let metadata = create_rmeta_file(sess, codegen_results.metadata.raw_data());
343340
ab.add_file(&emit_metadata(sess, &metadata, tmpdir));
344341

345342
// After adding all files to the archive, we need to update the
@@ -358,136 +355,6 @@ fn link_rlib<'a, B: ArchiveBuilder<'a>>(
358355
}
359356
}
360357
return Ok(ab);
361-
362-
// For rlibs we "pack" rustc metadata into a dummy object file. When rustc
363-
// creates a dylib crate type it will pass `--whole-archive` (or the
364-
// platform equivalent) to include all object files from an rlib into the
365-
// final dylib itself. This causes linkers to iterate and try to include all
366-
// files located in an archive, so if metadata is stored in an archive then
367-
// it needs to be of a form that the linker will be able to process.
368-
//
369-
// Note, though, that we don't actually want this metadata to show up in any
370-
// final output of the compiler. Instead this is purely for rustc's own
371-
// metadata tracking purposes.
372-
//
373-
// With the above in mind, each "flavor" of object format gets special
374-
// handling here depending on the target:
375-
//
376-
// * MachO - macos-like targets will insert the metadata into a section that
377-
// is sort of fake dwarf debug info. Inspecting the source of the macos
378-
// linker this causes these sections to be skipped automatically because
379-
// it's not in an allowlist of otherwise well known dwarf section names to
380-
// go into the final artifact.
381-
//
382-
// * WebAssembly - we actually don't have any container format for this
383-
// target. WebAssembly doesn't support the `dylib` crate type anyway so
384-
// there's no need for us to support this at this time. Consequently the
385-
// metadata bytes are simply stored as-is into an rlib.
386-
//
387-
// * COFF - Windows-like targets create an object with a section that has
388-
// the `IMAGE_SCN_LNK_REMOVE` flag set which ensures that if the linker
389-
// ever sees the section it doesn't process it and it's removed.
390-
//
391-
// * ELF - All other targets are similar to Windows in that there's a
392-
// `SHF_EXCLUDE` flag we can set on sections in an object file to get
393-
// automatically removed from the final output.
394-
//
395-
// Note that this metdata format is kept in sync with
396-
// `rustc_codegen_ssa/src/back/metadata.rs`.
397-
fn create_metadata_file(sess: &Session, metadata: &[u8]) -> Vec<u8> {
398-
let endianness = match sess.target.options.endian {
399-
Endian::Little => Endianness::Little,
400-
Endian::Big => Endianness::Big,
401-
};
402-
let architecture = match &sess.target.arch[..] {
403-
"arm" => Architecture::Arm,
404-
"aarch64" => Architecture::Aarch64,
405-
"x86" => Architecture::I386,
406-
"s390x" => Architecture::S390x,
407-
"mips" => Architecture::Mips,
408-
"mips64" => Architecture::Mips64,
409-
"x86_64" => {
410-
if sess.target.pointer_width == 32 {
411-
Architecture::X86_64_X32
412-
} else {
413-
Architecture::X86_64
414-
}
415-
}
416-
"powerpc" => Architecture::PowerPc,
417-
"powerpc64" => Architecture::PowerPc64,
418-
"riscv32" => Architecture::Riscv32,
419-
"riscv64" => Architecture::Riscv64,
420-
"sparc64" => Architecture::Sparc64,
421-
422-
// This is used to handle all "other" targets. This includes targets
423-
// in two categories:
424-
//
425-
// * Some targets don't have support in the `object` crate just yet
426-
// to write an object file. These targets are likely to get filled
427-
// out over time.
428-
//
429-
// * Targets like WebAssembly don't support dylibs, so the purpose
430-
// of putting metadata in object files, to support linking rlibs
431-
// into dylibs, is moot.
432-
//
433-
// In both of these cases it means that linking into dylibs will
434-
// not be supported by rustc. This doesn't matter for targets like
435-
// WebAssembly and for targets not supported by the `object` crate
436-
// yet it means that work will need to be done in the `object` crate
437-
// to add a case above.
438-
_ => return metadata.to_vec(),
439-
};
440-
441-
if sess.target.is_like_osx {
442-
let mut file = Object::new(BinaryFormat::MachO, architecture, endianness);
443-
444-
let section =
445-
file.add_section(b"__DWARF".to_vec(), b".rmeta".to_vec(), SectionKind::Debug);
446-
file.append_section_data(section, metadata, 1);
447-
file.write().unwrap()
448-
} else if sess.target.is_like_windows {
449-
const IMAGE_SCN_LNK_REMOVE: u32 = 0;
450-
let mut file = Object::new(BinaryFormat::Coff, architecture, endianness);
451-
452-
let section = file.add_section(Vec::new(), b".rmeta".to_vec(), SectionKind::Debug);
453-
file.section_mut(section).flags =
454-
SectionFlags::Coff { characteristics: IMAGE_SCN_LNK_REMOVE };
455-
file.append_section_data(section, metadata, 1);
456-
file.write().unwrap()
457-
} else {
458-
const SHF_EXCLUDE: u64 = 0x80000000;
459-
let mut file = Object::new(BinaryFormat::Elf, architecture, endianness);
460-
461-
match &sess.target.arch[..] {
462-
// copied from `mipsel-linux-gnu-gcc foo.c -c` and
463-
// inspecting the resulting `e_flags` field.
464-
"mips" => {
465-
let e_flags = elf::EF_MIPS_ARCH_32R2 | elf::EF_MIPS_CPIC | elf::EF_MIPS_PIC;
466-
file.flags = FileFlags::Elf { e_flags };
467-
}
468-
// copied from `mips64el-linux-gnuabi64-gcc foo.c -c`
469-
"mips64" => {
470-
let e_flags = elf::EF_MIPS_ARCH_64R2 | elf::EF_MIPS_CPIC | elf::EF_MIPS_PIC;
471-
file.flags = FileFlags::Elf { e_flags };
472-
}
473-
474-
// copied from `riscv64-linux-gnu-gcc foo.c -c`, note though
475-
// that the `+d` target feature represents whether the double
476-
// float abi is enabled.
477-
"riscv64" if sess.target.options.features.contains("+d") => {
478-
let e_flags = elf::EF_RISCV_RVC | elf::EF_RISCV_FLOAT_ABI_DOUBLE;
479-
file.flags = FileFlags::Elf { e_flags };
480-
}
481-
482-
_ => {}
483-
}
484-
485-
let section = file.add_section(Vec::new(), b".rmeta".to_vec(), SectionKind::Debug);
486-
file.section_mut(section).flags = SectionFlags::Elf { sh_flags: SHF_EXCLUDE };
487-
file.append_section_data(section, metadata, 1);
488-
file.write().unwrap()
489-
}
490-
}
491358
}
492359

493360
/// Extract all symbols defined in raw-dylib libraries, collated by library name.

0 commit comments

Comments
 (0)