1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
use super::{Box, Context, Mapping, Path, Stash, Vec};
use core::convert::TryInto;
use object::macho;
use object::read::macho::{MachHeader, Nlist, Section, Segment as _};
use object::{Bytes, NativeEndian};

#[cfg(target_pointer_width = "32")]
type Mach = object::macho::MachHeader32<NativeEndian>;
#[cfg(target_pointer_width = "64")]
type Mach = object::macho::MachHeader64<NativeEndian>;
type MachSegment = <Mach as MachHeader>::Segment;
type MachSection = <Mach as MachHeader>::Section;
type MachNlist = <Mach as MachHeader>::Nlist;

impl Mapping {
    // The loading path for macOS is so different we just have a completely
    // different implementation of the function here. On macOS we need to go
    // probing the filesystem for a bunch of files.
    pub fn new(path: &Path) -> Option<Mapping> {
        // First up we need to load the unique UUID which is stored in the macho
        // header of the file we're reading, specified at `path`.
        let map = super::mmap(path)?;
        let (macho, data) = find_header(&map)?;
        let endian = macho.endian().ok()?;
        let uuid = macho.uuid(endian, data, 0).ok()?;

        // Next we need to look for a `*.dSYM` file. For now we just probe the
        // containing directory and look around for something that matches
        // `*.dSYM`. Once it's found we root through the dwarf resources that it
        // contains and try to find a macho file which has a matching UUID as
        // the one of our own file. If we find a match that's the dwarf file we
        // want to return.
        if let Some(uuid) = uuid {
            if let Some(parent) = path.parent() {
                if let Some(mapping) = Mapping::load_dsym(parent, uuid) {
                    return Some(mapping);
                }
            }
        }

        // Looks like nothing matched our UUID, so let's at least return our own
        // file. This should have the symbol table for at least some
        // symbolication purposes.
        Mapping::mk(map, |data, stash| {
            let (macho, data) = find_header(data)?;
            let endian = macho.endian().ok()?;
            let obj = Object::parse(macho, endian, data)?;
            Context::new(stash, obj, None)
        })
    }

    fn load_dsym(dir: &Path, uuid: [u8; 16]) -> Option<Mapping> {
        for entry in dir.read_dir().ok()? {
            let entry = entry.ok()?;
            let filename = match entry.file_name().into_string() {
                Ok(name) => name,
                Err(_) => continue,
            };
            if !filename.ends_with(".dSYM") {
                continue;
            }
            let candidates = entry.path().join("Contents/Resources/DWARF");
            if let Some(mapping) = Mapping::try_dsym_candidate(&candidates, uuid) {
                return Some(mapping);
            }
        }
        None
    }

    fn try_dsym_candidate(dir: &Path, uuid: [u8; 16]) -> Option<Mapping> {
        // Look for files in the `DWARF` directory which have a matching uuid to
        // the original object file. If we find one then we found the debug
        // information.
        for entry in dir.read_dir().ok()? {
            let entry = entry.ok()?;
            let map = super::mmap(&entry.path())?;
            let candidate = Mapping::mk(map, |data, stash| {
                let (macho, data) = find_header(data)?;
                let endian = macho.endian().ok()?;
                let entry_uuid = macho.uuid(endian, data, 0).ok()??;
                if entry_uuid != uuid {
                    return None;
                }
                let obj = Object::parse(macho, endian, data)?;
                Context::new(stash, obj, None)
            });
            if let Some(candidate) = candidate {
                return Some(candidate);
            }
        }

        None
    }
}

fn find_header(data: &'_ [u8]) -> Option<(&'_ Mach, &'_ [u8])> {
    use object::endian::BigEndian;

    let desired_cpu = || {
        if cfg!(target_arch = "x86") {
            Some(macho::CPU_TYPE_X86)
        } else if cfg!(target_arch = "x86_64") {
            Some(macho::CPU_TYPE_X86_64)
        } else if cfg!(target_arch = "arm") {
            Some(macho::CPU_TYPE_ARM)
        } else if cfg!(target_arch = "aarch64") {
            Some(macho::CPU_TYPE_ARM64)
        } else {
            None
        }
    };

    let mut data = Bytes(data);
    match data
        .clone()
        .read::<object::endian::U32<NativeEndian>>()
        .ok()?
        .get(NativeEndian)
    {
        macho::MH_MAGIC_64 | macho::MH_CIGAM_64 | macho::MH_MAGIC | macho::MH_CIGAM => {}

        macho::FAT_MAGIC | macho::FAT_CIGAM => {
            let mut header_data = data;
            let endian = BigEndian;
            let header = header_data.read::<macho::FatHeader>().ok()?;
            let nfat = header.nfat_arch.get(endian);
            let arch = (0..nfat)
                .filter_map(|_| header_data.read::<macho::FatArch32>().ok())
                .find(|arch| desired_cpu() == Some(arch.cputype.get(endian)))?;
            let offset = arch.offset.get(endian);
            let size = arch.size.get(endian);
            data = data
                .read_bytes_at(offset.try_into().ok()?, size.try_into().ok()?)
                .ok()?;
        }

        macho::FAT_MAGIC_64 | macho::FAT_CIGAM_64 => {
            let mut header_data = data;
            let endian = BigEndian;
            let header = header_data.read::<macho::FatHeader>().ok()?;
            let nfat = header.nfat_arch.get(endian);
            let arch = (0..nfat)
                .filter_map(|_| header_data.read::<macho::FatArch64>().ok())
                .find(|arch| desired_cpu() == Some(arch.cputype.get(endian)))?;
            let offset = arch.offset.get(endian);
            let size = arch.size.get(endian);
            data = data
                .read_bytes_at(offset.try_into().ok()?, size.try_into().ok()?)
                .ok()?;
        }

        _ => return None,
    }

    Mach::parse(data.0, 0).ok().map(|h| (h, data.0))
}

// This is used both for executables/libraries and source object files.
pub struct Object<'a> {
    endian: NativeEndian,
    data: &'a [u8],
    dwarf: Option<&'a [MachSection]>,
    syms: Vec<(&'a [u8], u64)>,
    syms_sort_by_name: bool,
    // Only set for executables/libraries, and not the source object files.
    object_map: Option<object::ObjectMap<'a>>,
    // The outer Option is for lazy loading, and the inner Option allows load errors to be cached.
    object_mappings: Box<[Option<Option<Mapping>>]>,
}

impl<'a> Object<'a> {
    fn parse(mach: &'a Mach, endian: NativeEndian, data: &'a [u8]) -> Option<Object<'a>> {
        let is_object = mach.filetype(endian) == object::macho::MH_OBJECT;
        let mut dwarf = None;
        let mut syms = Vec::new();
        let mut syms_sort_by_name = false;
        let mut commands = mach.load_commands(endian, data, 0).ok()?;
        let mut object_map = None;
        let mut object_mappings = Vec::new();
        while let Ok(Some(command)) = commands.next() {
            if let Some((segment, section_data)) = MachSegment::from_command(command).ok()? {
                // Object files should have all sections in a single unnamed segment load command.
                if segment.name() == b"__DWARF" || (is_object && segment.name() == b"") {
                    dwarf = segment.sections(endian, section_data).ok();
                }
            } else if let Some(symtab) = command.symtab().ok()? {
                let symbols = symtab.symbols::<Mach, _>(endian, data).ok()?;
                syms = symbols
                    .iter()
                    .filter_map(|nlist: &MachNlist| {
                        let name = nlist.name(endian, symbols.strings()).ok()?;
                        if name.len() > 0 && nlist.is_definition() {
                            Some((name, u64::from(nlist.n_value(endian))))
                        } else {
                            None
                        }
                    })
                    .collect();
                if is_object {
                    // We never search object file symbols by address.
                    // Instead, we already know the symbol name from the executable, and we
                    // need to search by name to find the matching symbol in the object file.
                    syms.sort_unstable_by_key(|(name, _)| *name);
                    syms_sort_by_name = true;
                } else {
                    syms.sort_unstable_by_key(|(_, addr)| *addr);
                    let map = symbols.object_map(endian);
                    object_mappings.resize_with(map.objects().len(), || None);
                    object_map = Some(map);
                }
            }
        }

        Some(Object {
            endian,
            data,
            dwarf,
            syms,
            syms_sort_by_name,
            object_map,
            object_mappings: object_mappings.into_boxed_slice(),
        })
    }

    pub fn section(&self, _: &Stash, name: &str) -> Option<&'a [u8]> {
        let name = name.as_bytes();
        let dwarf = self.dwarf?;
        let section = dwarf.into_iter().find(|section| {
            let section_name = section.name();
            section_name == name || {
                section_name.starts_with(b"__")
                    && name.starts_with(b".")
                    && &section_name[2..] == &name[1..]
            }
        })?;
        Some(section.data(self.endian, self.data).ok()?)
    }

    pub fn search_symtab<'b>(&'b self, addr: u64) -> Option<&'b [u8]> {
        debug_assert!(!self.syms_sort_by_name);
        let i = match self.syms.binary_search_by_key(&addr, |(_, addr)| *addr) {
            Ok(i) => i,
            Err(i) => i.checked_sub(1)?,
        };
        let (sym, _addr) = self.syms.get(i)?;
        Some(sym)
    }

    /// Try to load a context for an object file.
    ///
    /// If dsymutil was not run, then the DWARF may be found in the source object files.
    pub(super) fn search_object_map<'b>(&'b mut self, addr: u64) -> Option<(&Context<'b>, u64)> {
        // `object_map` contains a map from addresses to symbols and object paths.
        // Look up the address and get a mapping for the object.
        let object_map = self.object_map.as_ref()?;
        let symbol = object_map.get(addr)?;
        let object_index = symbol.object_index();
        let mapping = self.object_mappings.get_mut(object_index)?;
        if mapping.is_none() {
            // No cached mapping, so create it.
            *mapping = Some(object_mapping(object_map.objects().get(object_index)?));
        }
        let cx: &'b Context<'static> = &mapping.as_ref()?.as_ref()?.cx;
        // Don't leak the `'static` lifetime, make sure it's scoped to just ourselves.
        let cx = unsafe { core::mem::transmute::<&'b Context<'static>, &'b Context<'b>>(cx) };

        // We must translate the address in order to be able to look it up
        // in the DWARF in the object file.
        debug_assert!(cx.object.syms.is_empty() || cx.object.syms_sort_by_name);
        let i = cx
            .object
            .syms
            .binary_search_by_key(&symbol.name(), |(name, _)| *name)
            .ok()?;
        let object_symbol = cx.object.syms.get(i)?;
        let object_addr = addr
            .wrapping_sub(symbol.address())
            .wrapping_add(object_symbol.1);
        Some((cx, object_addr))
    }
}

fn object_mapping(path: &[u8]) -> Option<Mapping> {
    use super::mystd::ffi::OsStr;
    use super::mystd::os::unix::prelude::*;

    let map;

    // `N_OSO` symbol names can be either `/path/to/object.o` or `/path/to/archive.a(object.o)`.
    let member_name = if let Some((archive_path, member_name)) = split_archive_path(path) {
        map = super::mmap(Path::new(OsStr::from_bytes(archive_path)))?;
        Some(member_name)
    } else {
        map = super::mmap(Path::new(OsStr::from_bytes(path)))?;
        None
    };
    Mapping::mk(map, |data, stash| {
        let data = match member_name {
            Some(member_name) => {
                let archive = object::read::archive::ArchiveFile::parse(data).ok()?;
                let member = archive
                    .members()
                    .filter_map(Result::ok)
                    .find(|m| m.name() == member_name)?;
                member.data(data).ok()?
            }
            None => data,
        };
        let (macho, data) = find_header(data)?;
        let endian = macho.endian().ok()?;
        let obj = Object::parse(macho, endian, data)?;
        Context::new(stash, obj, None)
    })
}

fn split_archive_path(path: &[u8]) -> Option<(&[u8], &[u8])> {
    let (last, path) = path.split_last()?;
    if *last != b')' {
        return None;
    }
    let index = path.iter().position(|&x| x == b'(')?;
    let (archive, rest) = path.split_at(index);
    Some((archive, &rest[1..]))
}