Skip to content

Commit 4261d56

Browse files
authored
Add support for split dyld shared cache. (#398)
Fixes #358. This adds support for the dyld cache format that is used on macOS 12 and iOS 15. The cache is split over multiple files, with a "root" cache and one or more subcaches, for example: ``` /System/Library/dyld/dyld_shared_cache_x86_64 /System/Library/dyld/dyld_shared_cache_x86_64.1 /System/Library/dyld/dyld_shared_cache_x86_64.2 /System/Library/dyld/dyld_shared_cache_x86_64.3 ``` Additionally, on iOS, there is a separate .symbols subcache, which contains local symbols. Each file has a set of mappings. For each image in the cache, the segments of that image can be distributed over multiple files: For example, on macOS 12.0.1, the image for libsystem_malloc.dylib for the arm64e architecture has its __TEXT segment in the root cache and the __LINKEDIT segment in the .1 subcache - there's a single __LINKEDIT segment which is shared between all images across both files. The remaining libsystem_malloc.dylib segments are in the same file as the __TEXT segment. The DyldCache API now requires the data for all subcaches to be supplied to the constructor. The parse_at methods have been removed and been replaced with a parse_dyld_cache_image method. With this patch, the following command outputs correct symbols for libsystem_malloc.dylib: ``` cargo run --release --bin objdump -- /System/Library/dyld/dyld_shared_cache_arm64e /usr/lib/system/libsystem_malloc.dylib ``` Support for local symbols is not implemented. But, as a first step, DyldCache::parse requires the .symbols subcache to be supplied (if the root cache expects one to be present) and checks that its UUID is correct. MachOFile doesn't do anything with ilocalsym and nlocalsym yet, and we don't yet have the struct definitions for dyld_cache_local_symbols_info and dyld_cache_local_symbols_entry.
1 parent 867084c commit 4261d56

File tree

10 files changed

+433
-97
lines changed

10 files changed

+433
-97
lines changed

Diff for: crates/examples/src/bin/dyldcachedump.rs

+41-1
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,34 @@ fn main() {
2222
continue;
2323
}
2424
};
25+
let subcache_files = open_subcaches_if_exist(&file_path);
2526
let file = match unsafe { memmap2::Mmap::map(&file) } {
2627
Ok(mmap) => mmap,
2728
Err(err) => {
2829
println!("Failed to map file '{}': {}", file_path, err,);
2930
continue;
3031
}
3132
};
32-
let cache = match DyldCache::<Endianness>::parse(&*file) {
33+
let subcache_files: Option<Vec<_>> = subcache_files
34+
.into_iter()
35+
.map(
36+
|subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } {
37+
Ok(mmap) => Some(mmap),
38+
Err(err) => {
39+
eprintln!("Failed to map file '{}': {}", file_path, err);
40+
None
41+
}
42+
},
43+
)
44+
.collect();
45+
let subcache_files: Vec<&[u8]> = match &subcache_files {
46+
Some(subcache_files) => subcache_files
47+
.iter()
48+
.map(|subcache_file| &**subcache_file)
49+
.collect(),
50+
None => continue,
51+
};
52+
let cache = match DyldCache::<Endianness>::parse(&*file, &subcache_files) {
3353
Ok(cache) => cache,
3454
Err(err) => {
3555
println!(
@@ -48,3 +68,23 @@ fn main() {
4868
}
4969
}
5070
}
71+
72+
// If the file is a dyld shared cache, and we're on macOS 12 or later,
73+
// then there will be one or more "subcache" files next to this file,
74+
// with the names filename.1, filename.2, ..., filename.symbols.
75+
fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
76+
let mut files = Vec::new();
77+
for i in 1.. {
78+
let subcache_path = format!("{}.{}", path, i);
79+
match fs::File::open(&subcache_path) {
80+
Ok(subcache_file) => files.push(subcache_file),
81+
Err(_) => break,
82+
};
83+
}
84+
let symbols_subcache_path = format!("{}.symbols", path);
85+
if let Ok(subcache_file) = fs::File::open(&symbols_subcache_path) {
86+
files.push(subcache_file);
87+
};
88+
println!("Found {} subcache files", files.len());
89+
files
90+
}

Diff for: crates/examples/src/bin/objdump.rs

+44-1
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,58 @@ fn main() {
1818
process::exit(1);
1919
}
2020
};
21+
let extra_files = open_subcaches_if_exist(&file_path);
2122
let file = match unsafe { memmap2::Mmap::map(&file) } {
2223
Ok(mmap) => mmap,
2324
Err(err) => {
2425
eprintln!("Failed to map file '{}': {}", file_path, err,);
2526
process::exit(1);
2627
}
2728
};
29+
let extra_files: Vec<_> = extra_files
30+
.into_iter()
31+
.map(
32+
|subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } {
33+
Ok(mmap) => mmap,
34+
Err(err) => {
35+
eprintln!("Failed to map file '{}': {}", file_path, err,);
36+
process::exit(1);
37+
}
38+
},
39+
)
40+
.collect();
41+
let extra_file_data: Vec<&[u8]> = extra_files.iter().map(|f| &**f).collect();
2842

2943
let stdout = io::stdout();
3044
let stderr = io::stderr();
31-
objdump::print(&mut stdout.lock(), &mut stderr.lock(), &*file, member_names).unwrap();
45+
objdump::print(
46+
&mut stdout.lock(),
47+
&mut stderr.lock(),
48+
&*file,
49+
&extra_file_data,
50+
member_names,
51+
)
52+
.unwrap();
53+
}
54+
55+
// If the file is a dyld shared cache, and we're on macOS 12 or later,
56+
// then there will be one or more "subcache" files next to this file,
57+
// with the names filename.1, filename.2 etc.
58+
// Read those files now, if they exist, even if we don't know that
59+
// we're dealing with a dyld shared cache. By the time we know what
60+
// we're dealing with, it's too late to read more files.
61+
fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
62+
let mut files = Vec::new();
63+
for i in 1.. {
64+
let subcache_path = format!("{}.{}", path, i);
65+
match fs::File::open(&subcache_path) {
66+
Ok(subcache_file) => files.push(subcache_file),
67+
Err(_) => break,
68+
};
69+
}
70+
let symbols_subcache_path = format!("{}.symbols", path);
71+
if let Ok(subcache_file) = fs::File::open(&symbols_subcache_path) {
72+
files.push(subcache_file);
73+
};
74+
files
3275
}

Diff for: crates/examples/src/objdump.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ pub fn print<W: Write, E: Write>(
77
w: &mut W,
88
e: &mut E,
99
file: &[u8],
10+
extra_files: &[&[u8]],
1011
member_names: Vec<String>,
1112
) -> Result<()> {
1213
let mut member_names: Vec<_> = member_names.into_iter().map(|name| (name, false)).collect();
@@ -47,7 +48,7 @@ pub fn print<W: Write, E: Write>(
4748
Err(err) => writeln!(e, "Failed to parse Fat 64 data: {}", err)?,
4849
}
4950
}
50-
} else if let Ok(cache) = DyldCache::<Endianness>::parse(&*file) {
51+
} else if let Ok(cache) = DyldCache::<Endianness>::parse(&*file, extra_files) {
5152
writeln!(w, "Format: dyld cache {:?}-endian", cache.endianness())?;
5253
writeln!(w, "Architecture: {:?}", cache.architecture())?;
5354
for image in cache.images() {

Diff for: crates/examples/tests/testfiles.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ fn testfiles() {
2828
println!("File {}", path);
2929
let data = fs::read(&path).unwrap();
3030
fail |= testfile(path, &data, "objdump", |mut out, mut err, data| {
31-
objdump::print(&mut out, &mut err, data, vec![]).unwrap()
31+
objdump::print(&mut out, &mut err, data, &[], vec![]).unwrap()
3232
});
3333
fail |= testfile(path, &data, "readobj", readobj::print);
3434
println!();

Diff for: src/macho.rs

+62-9
Original file line numberDiff line numberDiff line change
@@ -284,26 +284,67 @@ pub const VM_PROT_EXECUTE: u32 = 0x04;
284284

285285
// Definitions from https://opensource.apple.com/source/dyld/dyld-210.2.3/launch-cache/dyld_cache_format.h.auto.html
286286

287-
/// The dyld cache header, containing only the fields which are present
288-
/// in all versions of dyld caches (dyld-95.3 and up).
289-
/// Many more fields exist in later dyld versions, but we currently do
290-
/// not need to parse those.
287+
/// The dyld cache header.
291288
/// Corresponds to struct dyld_cache_header from dyld_cache_format.h.
289+
/// This header has grown over time. Only the fields up to and including dyld_base_address
290+
/// are guaranteed to be present. For all other fields, check the header size before
291+
/// accessing the field. The header size is stored in mapping_offset; the mappings start
292+
/// right after the theader.
292293
#[derive(Debug, Clone, Copy)]
293294
#[repr(C)]
294295
pub struct DyldCacheHeader<E: Endian> {
295296
/// e.g. "dyld_v0 i386"
296297
pub magic: [u8; 16],
297298
/// file offset to first dyld_cache_mapping_info
298-
pub mapping_offset: U32<E>,
299+
pub mapping_offset: U32<E>, // offset: 0x10
299300
/// number of dyld_cache_mapping_info entries
300-
pub mapping_count: U32<E>,
301+
pub mapping_count: U32<E>, // offset: 0x14
301302
/// file offset to first dyld_cache_image_info
302-
pub images_offset: U32<E>,
303+
pub images_offset: U32<E>, // offset: 0x18
303304
/// number of dyld_cache_image_info entries
304-
pub images_count: U32<E>,
305+
pub images_count: U32<E>, // offset: 0x1c
305306
/// base address of dyld when cache was built
306-
pub dyld_base_address: U64<E>,
307+
pub dyld_base_address: U64<E>, // offset: 0x20
308+
///
309+
reserved1: [u8; 32], // offset: 0x28
310+
/// file offset of where local symbols are stored
311+
pub local_symbols_offset: U64<E>, // offset: 0x48
312+
/// size of local symbols information
313+
pub local_symbols_size: U64<E>, // offset: 0x50
314+
/// unique value for each shared cache file
315+
pub uuid: [u8; 16], // offset: 0x58
316+
///
317+
reserved2: [u8; 32], // offset: 0x68
318+
///
319+
reserved3: [u8; 32], // offset: 0x88
320+
///
321+
reserved4: [u8; 32], // offset: 0xa8
322+
///
323+
reserved5: [u8; 32], // offset: 0xc8
324+
///
325+
reserved6: [u8; 32], // offset: 0xe8
326+
///
327+
reserved7: [u8; 32], // offset: 0x108
328+
///
329+
reserved8: [u8; 32], // offset: 0x128
330+
///
331+
reserved9: [u8; 32], // offset: 0x148
332+
///
333+
reserved10: [u8; 32], // offset: 0x168
334+
/// file offset to first dyld_subcache_info
335+
pub subcaches_offset: U32<E>, // offset: 0x188
336+
/// number of dyld_subcache_info entries
337+
pub subcaches_count: U32<E>, // offset: 0x18c
338+
/// the UUID of the .symbols subcache
339+
pub symbols_subcache_uuid: [u8; 16], // offset: 0x190
340+
///
341+
reserved11: [u8; 32], // offset: 0x1a0
342+
/// file offset to first dyld_cache_image_info
343+
/// Use this instead of images_offset if mapping_offset is at least 0x1c4.
344+
pub images_across_all_subcaches_offset: U32<E>, // offset: 0x1c0
345+
/// number of dyld_cache_image_info entries
346+
/// Use this instead of images_count if mapping_offset is at least 0x1c4.
347+
pub images_across_all_subcaches_count: U32<E>, // offset: 0x1c4
307348
}
308349

309350
/// Corresponds to struct dyld_cache_mapping_info from dyld_cache_format.h.
@@ -338,6 +379,17 @@ pub struct DyldCacheImageInfo<E: Endian> {
338379
pub pad: U32<E>,
339380
}
340381

382+
/// Corresponds to a struct whose source code has not been published as of Nov 2021.
383+
/// Added in the dyld cache version which shipped with macOS 12 / iOS 15.
384+
#[derive(Debug, Clone, Copy)]
385+
#[repr(C)]
386+
pub struct DyldSubCacheInfo<E: Endian> {
387+
/// The UUID of this subcache.
388+
pub uuid: [u8; 16],
389+
/// The size of this subcache plus all previous subcaches.
390+
pub cumulative_size: U64<E>,
391+
}
392+
341393
// Definitions from "/usr/include/mach-o/loader.h".
342394

343395
/*
@@ -3199,6 +3251,7 @@ unsafe_impl_endian_pod!(
31993251
DyldCacheHeader,
32003252
DyldCacheMappingInfo,
32013253
DyldCacheImageInfo,
3254+
DyldSubCacheInfo,
32023255
MachHeader32,
32033256
MachHeader64,
32043257
LoadCommand,

Diff for: src/read/any.rs

+14-16
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use crate::read::{
2020
SymbolMapName, SymbolScope, SymbolSection,
2121
};
2222
#[allow(unused_imports)]
23-
use crate::Endianness;
23+
use crate::{AddressSize, Endian, Endianness};
2424

2525
/// Evaluate an expression on the contents of a file format enum.
2626
///
@@ -220,23 +220,21 @@ impl<'data, R: ReadRef<'data>> File<'data, R> {
220220
Ok(File { inner })
221221
}
222222

223-
/// Parse the raw file data at an arbitrary offset inside the input data.
224-
///
225-
/// Currently, this is only supported for Mach-O images.
226-
/// This can be used for parsing Mach-O images inside the dyld shared cache,
227-
/// where multiple images, located at different offsets, share the same address
228-
/// space.
229-
pub fn parse_at(data: R, offset: u64) -> Result<Self> {
230-
let _inner = match FileKind::parse_at(data, offset)? {
231-
#[cfg(feature = "macho")]
232-
FileKind::MachO32 => FileInternal::MachO32(macho::MachOFile32::parse_at(data, offset)?),
233-
#[cfg(feature = "macho")]
234-
FileKind::MachO64 => FileInternal::MachO64(macho::MachOFile64::parse_at(data, offset)?),
235-
#[allow(unreachable_patterns)]
223+
/// Parse a Mach-O image from the dyld shared cache.
224+
#[cfg(feature = "macho")]
225+
pub fn parse_dyld_cache_image<'cache, E: Endian>(
226+
image: &macho::DyldCacheImage<'data, 'cache, E, R>,
227+
) -> Result<Self> {
228+
let inner = match image.cache.architecture().address_size() {
229+
Some(AddressSize::U64) => {
230+
FileInternal::MachO64(macho::MachOFile64::parse_dyld_cache_image(image)?)
231+
}
232+
Some(AddressSize::U32) => {
233+
FileInternal::MachO32(macho::MachOFile32::parse_dyld_cache_image(image)?)
234+
}
236235
_ => return Err(Error("Unsupported file format")),
237236
};
238-
#[allow(unreachable_code)]
239-
Ok(File { inner: _inner })
237+
Ok(File { inner })
240238
}
241239

242240
/// Return the file format.

0 commit comments

Comments
 (0)