1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
//! macOS-specific handling of handling exceptions
//!
//! Unlike other Unix platforms macOS here uses mach ports to handle exceptions
//! instead of signals. While macOS platforms could use signals (and
//! historically they did!) this is incompatible when Wasmtime is linked into a
//! project that is otherwise using mach ports for catching exceptions. This
//! came up #2456 notably when a project like Breakpad is integrated to blanket
//! catch exceptions and report them.
//!
//! Mach ports are somewhat obscure and not really heavily used in a ton of
//! places. Needless to say the original author of this file worked with mach
//! ports for the first time when writing this file. As such the exact specifics
//! here may not be super well documented. This file is 100% lifted from
//! SpiderMonkey and then adapted for Wasmtime's purposes. Credit for almost
//! all of this file goes to SpiderMonkey for figuring out all the fiddly bits.
//! See also
//! <https://searchfox.org/mozilla-central/source/js/src/wasm/WasmSignalHandlers.cpp>
//! for the original code.
//!
//! The high-level overview is that when using mach ports a thread is blocked
//! when it generates an exception and then a message can be read from the
//! port. This means that, unlike signals, threads can't fix their own traps.
//! Instead a helper thread is spun up to service exception messages. This is
//! also in conflict with Wasmtime's exception handling currently which is to
//! use a thread-local to store information about how to unwind. Additionally
//! this requires that the check of whether a pc is a wasm trap or not is a
//! global check rather than a per-thread check. This necessitates the existence
//! of `GlobalModuleRegistry` in the `wasmtime` crate.
//!
//! Otherwise this file heavily uses the `mach` Rust crate for type and
//! function declarations. Many bits and pieces are copied or translated from
//! the SpiderMonkey implementation and it should pass all the tests!

#![allow(non_snake_case)]

use crate::traphandlers::{tls, wasmtime_longjmp};
use mach::exception_types::*;
use mach::kern_return::*;
use mach::mach_init::*;
use mach::mach_port::*;
use mach::message::*;
use mach::port::*;
use mach::thread_act::*;
use mach::traps::*;
use std::mem;
use std::thread;

/// Other `mach` declarations awaiting <https://github.com/fitzgen/mach/pull/64>
/// to be merged.
mod mach_addons {
    #![allow(non_camel_case_types)]
    #![allow(non_upper_case_globals)]
    #![allow(dead_code)]

    use mach::{
        exception_types::*, kern_return::*, mach_types::*, message::*, port::*, thread_status::*,
    };
    use std::mem;

    #[repr(C)]
    #[derive(Copy, Clone, Debug)]
    #[allow(dead_code)]
    pub struct NDR_record_t {
        mig_vers: libc::c_uchar,
        if_vers: libc::c_uchar,
        reserved1: libc::c_uchar,
        mig_encoding: libc::c_uchar,
        int_rep: libc::c_uchar,
        char_rep: libc::c_uchar,
        float_rep: libc::c_uchar,
        reserved32: libc::c_uchar,
    }

    extern "C" {
        pub static NDR_record: NDR_record_t;
    }

    #[repr(C)]
    #[allow(dead_code)]
    #[derive(Copy, Clone, Debug)]
    pub struct __Request__exception_raise_t {
        pub Head: mach_msg_header_t,
        /* start of the kernel processed data */
        pub msgh_body: mach_msg_body_t,
        pub thread: mach_msg_port_descriptor_t,
        pub task: mach_msg_port_descriptor_t,
        /* end of the kernel processed data */
        pub NDR: NDR_record_t,
        pub exception: exception_type_t,
        pub codeCnt: mach_msg_type_number_t,
        pub code: [i64; 2],
    }

    #[repr(C)]
    #[allow(dead_code)]
    #[derive(Copy, Clone, Debug)]
    pub struct __Reply__exception_raise_t {
        pub Head: mach_msg_header_t,
        pub NDR: NDR_record_t,
        pub RetCode: kern_return_t,
    }

    #[repr(C)]
    #[derive(Copy, Clone, Debug, Default, Hash, PartialOrd, PartialEq, Eq, Ord)]
    pub struct arm_thread_state64_t {
        pub __x: [u64; 29],
        pub __fp: u64, // frame pointer x29
        pub __lr: u64, // link register x30
        pub __sp: u64, // stack pointer x31
        pub __pc: u64,
        pub __cpsr: u32,
        pub __pad: u32,
    }

    impl arm_thread_state64_t {
        pub fn count() -> mach_msg_type_number_t {
            (mem::size_of::<Self>() / mem::size_of::<u32>()) as mach_msg_type_number_t
        }
    }

    pub static ARM_THREAD_STATE64: thread_state_flavor_t = 6;

    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
    pub static THREAD_STATE_NONE: thread_state_flavor_t = 13;
    #[cfg(target_arch = "aarch64")]
    pub static THREAD_STATE_NONE: thread_state_flavor_t = 5;

    extern "C" {
        pub fn thread_set_state(
            target_act: thread_port_t,
            flavor: thread_state_flavor_t,
            new_state: thread_state_t,
            new_stateCnt: mach_msg_type_number_t,
        ) -> kern_return_t;

        pub fn thread_set_exception_ports(
            thread: thread_port_t,
            exception_mask: exception_mask_t,
            new_port: mach_port_t,
            behavior: libc::c_uint,
            new_flavor: thread_state_flavor_t,
        ) -> kern_return_t;
    }
}

use mach_addons::*;

/// Just used below
pub enum Void {}
/// For now this is basically unused, we don't expose this any more for
/// Wasmtime on macOS.
pub type SignalHandler<'a> = dyn Fn(Void) -> bool + Send + Sync + 'a;

/// Process-global port that we use to route thread-level exceptions to.
static mut WASMTIME_PORT: mach_port_name_t = MACH_PORT_NULL;

pub unsafe fn platform_init() {
    // Allocate our WASMTIME_PORT and make sure that it can be sent to so we
    // can receive exceptions.
    let me = mach_task_self();
    let kret = mach_port_allocate(me, MACH_PORT_RIGHT_RECEIVE, &mut WASMTIME_PORT);
    assert_eq!(kret, KERN_SUCCESS, "failed to allocate port");
    let kret = mach_port_insert_right(me, WASMTIME_PORT, WASMTIME_PORT, MACH_MSG_TYPE_MAKE_SEND);
    assert_eq!(kret, KERN_SUCCESS, "failed to insert right");

    // Spin up our handler thread which will solely exist to service exceptions
    // generated by other threads. Note that this is a background thread that
    // we're not very interested in so it's detached here.
    thread::spawn(|| handler_thread());
}

// This is largely just copied from SpiderMonkey.
#[repr(C)]
#[allow(dead_code)]
struct ExceptionRequest {
    body: __Request__exception_raise_t,
    trailer: mach_msg_trailer_t,
}

unsafe fn handler_thread() {
    // Taken from mach_exc in /usr/include/mach/mach_exc.defs.
    const EXCEPTION_MSG_ID: mach_msg_id_t = 2405;

    loop {
        // Block this thread reading a message from our port. This will block
        // until some thread throws an exception. Note that messages are all
        // expected to be exceptions here.
        let mut request: ExceptionRequest = mem::zeroed();
        let kret = mach_msg(
            &mut request.body.Head,
            MACH_RCV_MSG,
            0,
            mem::size_of_val(&request) as u32,
            WASMTIME_PORT,
            MACH_MSG_TIMEOUT_NONE,
            MACH_PORT_NULL,
        );
        if kret != KERN_SUCCESS {
            eprintln!("mach_msg failed with {} ({0:x})", kret);
            libc::abort();
        }
        if request.body.Head.msgh_id != EXCEPTION_MSG_ID {
            eprintln!("unexpected msg header id {}", request.body.Head.msgh_id);
            libc::abort();
        }

        // Attempt to handle the exception below which will process the state
        // of the request.
        //
        // We unconditionally need to send a message back on our port after
        // this exception is received, and our reply code here dictates whether
        // the thread crashes or whether we continue execution of the thread.
        let reply_code = if handle_exception(&mut request) {
            KERN_SUCCESS
        } else {
            KERN_FAILURE
        };

        // This magic incantation to send a reply back to the kernel was
        // derived from the exc_server generated by
        // 'mig -v /usr/include/mach/mach_exc.defs'.
        let mut reply: __Reply__exception_raise_t = mem::zeroed();
        reply.Head.msgh_bits =
            MACH_MSGH_BITS(request.body.Head.msgh_bits & MACH_MSGH_BITS_REMOTE_MASK, 0);
        reply.Head.msgh_size = mem::size_of_val(&reply) as u32;
        reply.Head.msgh_remote_port = request.body.Head.msgh_remote_port;
        reply.Head.msgh_local_port = MACH_PORT_NULL;
        reply.Head.msgh_id = request.body.Head.msgh_id + 100;
        reply.NDR = NDR_record;
        reply.RetCode = reply_code;
        mach_msg(
            &mut reply.Head,
            MACH_SEND_MSG,
            mem::size_of_val(&reply) as u32,
            0,
            MACH_PORT_NULL,
            MACH_MSG_TIMEOUT_NONE,
            MACH_PORT_NULL,
        );
    }
}

unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool {
    // First make sure that this exception is one that we actually expect to
    // get raised by wasm code. All other exceptions we safely ignore.
    match request.body.exception as u32 {
        EXC_BAD_ACCESS | EXC_BAD_INSTRUCTION => {}
        _ => return false,
    }

    // Depending on the current architecture various bits and pieces of this
    // will change. This is expected to get filled out for other macos
    // platforms as necessary.
    //
    // The variables this needs to define are:
    //
    // * `ThreadState` - a structure read via `thread_get_state` to learn about
    //   the register state of the thread that trapped.
    // * `thread_state_flavor` - used to read `ThreadState`
    // * `get_pc` - a function from `&ThreadState` to a pointer to read the
    //   current program counter, used to test if it's an address we're
    //   catching wasm traps for.
    // * `resume` - a function used to modify `ThreadState` to resume in the
    //   target thread in the `unwind` function below, passing the two
    //   parameters as the first two arguments.
    // * `thread_state` - a fresh instance of `ThreadState` to read into
    // * `thread_state_count` - the size to pass to `mach_msg`.
    cfg_if::cfg_if! {
        if #[cfg(target_arch = "x86_64")] {
            use mach::structs::x86_thread_state64_t;
            use mach::thread_status::x86_THREAD_STATE64;

            type ThreadState = x86_thread_state64_t;

            let thread_state_flavor = x86_THREAD_STATE64;

            let get_pc_and_fp = |state: &ThreadState| (
                state.__rip as *const u8,
                state.__rbp as usize,
            );

            let resume = |state: &mut ThreadState, pc: usize, fp: usize| {
                // The x86_64 ABI requires a 16-byte stack alignment for
                // functions, so typically we'll be 16-byte aligned. In this
                // case we simulate a `call` instruction by decrementing the
                // stack pointer and pushing the "return" address which in this
                // case is the faulting address. This should help the native
                // unwinder figure out how to find the precisely trapping
                // function.
                //
                // Note, however, that if the stack is not 16-byte aligned then
                // we don't do anything. Currently this only arises due to
                // `ud2` in the prologue of functions when performing the
                // initial stack check. In the old backend 0 stack manipulation
                // happens until after the stack check passes, so if the stack
                // check fails (hence we're running in this handler) then the
                // stack is not 16-byte aligned due to the previous return
                // address pushed by `call`. In this scenario we just blow away
                // the stack frame by overwriting %rip. This technically loses
                // the precise frame that was interrupted, but that's probably
                // not the end of the world anyway.
                if state.__rsp % 16 == 0 {
                    state.__rsp -= 8;
                    *(state.__rsp as *mut u64) = state.__rip;
                }
                state.__rip = unwind as u64;
                state.__rdi = pc as u64;
                state.__rsi = fp as u64;
            };
            let mut thread_state = ThreadState::new();
        } else if #[cfg(target_arch = "aarch64")] {
            type ThreadState = arm_thread_state64_t;

            let thread_state_flavor = ARM_THREAD_STATE64;

            let get_pc_and_fp = |state: &ThreadState| (
                state.__pc as *const u8,
                state.__fp as usize,
            );

            let resume = |state: &mut ThreadState, pc: usize, fp: usize| {
                // Clobber LR with the faulting PC, so unwinding resumes at the
                // faulting instruction. The previous value of LR has been saved
                // by the callee (in Cranelift generated code), so no need to
                // stash it.
                state.__lr = pc as u64;

                // Fill in the argument to unwind here, and set PC to it, so
                // it looks like a call to unwind.
                state.__x[0] = pc as u64;
                state.__x[1] = fp as u64;
                state.__pc = unwind as u64;
            };
            let mut thread_state = mem::zeroed::<ThreadState>();
        } else {
            compile_error!("unsupported target architecture");
        }
    }

    // First up read our origin thread's state into the area defined above.
    let origin_thread = request.body.thread.name;
    let mut thread_state_count = ThreadState::count();
    let kret = thread_get_state(
        origin_thread,
        thread_state_flavor,
        &mut thread_state as *mut ThreadState as *mut u32,
        &mut thread_state_count,
    );
    if kret != KERN_SUCCESS {
        return false;
    }

    // Use our global map to determine if this program counter is indeed a wasm
    // trap, loading the `jmp_buf` to unwind to if it is.
    //
    // Note that this is where things are pretty tricky. We're accessing
    // non-`Send` state (`CallThreadState`) from the exception handling thread.
    // While typically invalid we are guaranteed that the original thread is
    // stopped while we're accessing it here so this should be safe.
    //
    // Note also that we access the `state` outside the lock of `MAP`. This
    // again is safe because if `state` is `Some` then we're guaranteed the
    // thread is stopped and won't be removing or invalidating its state.
    // Finally our indirection with a pointer means that we can read the
    // pointer value and if `MAP` changes happen after we read our entry that's
    // ok since they won't invalidate our entry.
    let (pc, fp) = get_pc_and_fp(&thread_state);
    if !super::IS_WASM_PC(pc as usize) {
        return false;
    }

    // We have determined that this is a wasm trap and we need to actually
    // force the thread itself to trap. The thread's register state is
    // configured to resume in the `unwind` function below, we update the
    // thread's register state, and then we're off to the races.
    resume(&mut thread_state, pc as usize, fp);
    let kret = thread_set_state(
        origin_thread,
        thread_state_flavor,
        &mut thread_state as *mut ThreadState as *mut u32,
        thread_state_count,
    );
    kret == KERN_SUCCESS
}

/// This is a "landing pad" which is never called directly but is directly
/// resumed into from wasm-trapped threads.
///
/// This is a small shim which primarily serves the purpose of simply capturing
/// a native backtrace once we've switched back to the thread itself. After
/// the backtrace is captured we can do the usual `longjmp` back to the source
/// of the wasm code.
unsafe extern "C" fn unwind(wasm_pc: *const u8, wasm_fp: usize) -> ! {
    let jmp_buf = tls::with(|state| {
        let state = state.unwrap();
        state.set_jit_trap(wasm_pc, wasm_fp);
        state.jmp_buf.get()
    });
    debug_assert!(!jmp_buf.is_null());
    wasmtime_longjmp(jmp_buf);
}

/// Exceptions on macOS can be delivered to either thread-level or task-level
/// exception ports. In wasmtime we choose to send the exceptions to
/// thread-level ports. This means that we need to, for each thread that can
/// generate an exception, register our thread's exception port as
/// `WASMTIME_PORT` above.
///
/// Note that this choice is done because at the current time if we were to
/// implement a task-level (process-wide) port we'd have to figure out how to
/// forward exceptions that we're not interested to the previously registered
/// port. At this time the author isn't sure how to even do that. SpiderMonkey
/// calls this forwarding "dark magic" as well, and since SpiderMonkey chooses
/// thread-level ports then I hope that's good enough for wasmtime.
///
/// Also note that this choice of thread-level ports should be fine in that
/// unhandled thread-level exceptions get automatically forwarded to the
/// task-level port which is where we'd expected things like breakpad/crashpad
/// exception handlers to get registered.
#[cold]
pub fn lazy_per_thread_init() {
    unsafe {
        assert!(WASMTIME_PORT != MACH_PORT_NULL);
        let this_thread = mach_thread_self();
        let kret = thread_set_exception_ports(
            this_thread,
            EXC_MASK_BAD_ACCESS | EXC_MASK_BAD_INSTRUCTION,
            WASMTIME_PORT,
            EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES,
            mach_addons::THREAD_STATE_NONE,
        );
        mach_port_deallocate(mach_task_self(), this_thread);
        assert_eq!(kret, KERN_SUCCESS, "failed to set thread exception port");
    }
}