-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Expand file tree
/
Copy pathlib.rs
More file actions
448 lines (408 loc) · 17.1 KB
/
lib.rs
File metadata and controls
448 lines (408 loc) · 17.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
//! Wizer: the WebAssembly pre-initializer!
//!
//! See the [`Wizer`] struct for details.
#![deny(missing_docs)]
#![cfg_attr(docsrs, feature(doc_cfg))]
mod info;
mod instrument;
mod parse;
mod rewrite;
mod snapshot;
#[cfg(feature = "wasmtime")]
mod wasmtime;
#[cfg(feature = "wasmtime")]
pub use wasmtime::*;
#[cfg(feature = "component-model")]
mod component;
#[cfg(feature = "component-model")]
pub use component::*;
#[cfg(not(feature = "rayon"))]
mod rayoff;
pub use crate::info::ModuleContext;
pub use crate::snapshot::SnapshotVal;
use ::wasmtime::{Result, bail, error::Context as _};
use std::collections::{HashMap, HashSet};
pub use wasmparser::ValType;
const DEFAULT_KEEP_INIT_FUNC: bool = false;
const DEFAULT_KEEP_INSTRUMENTATION: bool = false;
/// Wizer: the WebAssembly pre-initializer!
///
/// Don't wait for your Wasm module to initialize itself, pre-initialize it!
/// Wizer instantiates your WebAssembly module, executes its initialization
/// function, and then serializes the instance's initialized state out into a
/// new WebAssembly module. Now you can use this new, pre-initialized
/// WebAssembly module to hit the ground running, without making your users wait
/// for that first-time set up code to complete.
///
/// ## Caveats
///
/// * The initialization function may not call any imported functions. Doing so
/// will trigger a trap and `wizer` will exit.
///
/// * The Wasm module may not import globals, tables, or memories.
///
/// * Reference types are not supported yet. This is tricky because it would
/// allow the Wasm module to mutate tables, and we would need to be able to
/// snapshot the new table state, but funcrefs and externrefs don't have
/// identity and aren't comparable in the Wasm spec, which makes snapshotting
/// difficult.
#[derive(Clone, Debug)]
#[cfg_attr(feature = "clap", derive(clap::Parser))]
pub struct Wizer {
/// The Wasm export name of the function that should be executed to
/// initialize the Wasm module.
#[cfg_attr(
feature = "clap",
arg(short = 'f', long, default_value = "wizer-initialize")
)]
init_func: String,
/// Any function renamings to perform.
///
/// A renaming specification `dst=src` renames a function export `src` to
/// `dst`, overwriting any previous `dst` export.
///
/// Multiple renamings can be specified. It is an error to specify more than
/// one source to rename to a destination name, or to specify more than one
/// renaming destination for one source.
///
/// This option can be used, for example, to replace a `_start` entry point
/// in an initialized module with an alternate entry point.
///
/// When module linking is enabled, these renames are only applied to the
/// outermost module.
#[cfg_attr(
feature = "clap",
arg(
short = 'r',
long = "rename-func",
alias = "func-rename",
value_name = "dst=src",
value_parser = parse_rename,
),
)]
func_renames: Vec<(String, String)>,
/// After initialization, should the Wasm module still export the
/// initialization function?
///
/// This is `false` by default, meaning that the initialization function is
/// no longer exported from the Wasm module.
#[cfg_attr(
feature = "clap",
arg(long, require_equals = true, value_name = "true|false")
)]
keep_init_func: Option<Option<bool>>,
/// After initialization, should the Wasm module still contain the
/// instrumentation needed for further snapshotting?
///
/// This is `false` by default, meaning that instrumentation is stripped
/// from the output module. Set to `true` to enable incremental
/// snapshotting (savestate) workflows.
#[cfg_attr(
feature = "clap",
arg(long, require_equals = true, value_name = "true|false")
)]
keep_instrumentation: Option<Option<bool>>,
}
#[cfg(feature = "clap")]
fn parse_rename(s: &str) -> Result<(String, String)> {
let parts: Vec<&str> = s.splitn(2, '=').collect();
if parts.len() != 2 {
bail!("must contain exactly one equals character ('=')");
}
Ok((parts[0].into(), parts[1].into()))
}
#[derive(Default)]
struct FuncRenames {
/// For a given export name that we encounter in the original module, a map
/// to a new name, if any, to emit in the output module.
rename_src_to_dst: HashMap<String, String>,
/// A set of export names that we ignore in the original module (because
/// they are overwritten by renamings).
rename_dsts: HashSet<String>,
}
impl FuncRenames {
fn parse(renames: &[(String, String)]) -> Result<FuncRenames> {
let mut ret = FuncRenames {
rename_src_to_dst: HashMap::new(),
rename_dsts: HashSet::new(),
};
if renames.is_empty() {
return Ok(ret);
}
for (dst, src) in renames {
if ret.rename_dsts.contains(dst) {
bail!("Duplicated function rename dst {dst}");
}
if ret.rename_src_to_dst.contains_key(src) {
bail!("Duplicated function rename src {src}");
}
ret.rename_dsts.insert(dst.clone());
ret.rename_src_to_dst.insert(src.clone(), dst.clone());
}
Ok(ret)
}
}
impl Wizer {
/// Construct a new `Wizer` builder.
pub fn new() -> Self {
Wizer {
init_func: "wizer-initialize".to_string(),
func_renames: vec![],
keep_init_func: None,
keep_instrumentation: None,
}
}
/// The export name of the initializer function.
///
/// Defaults to `"wizer-initialize"`.
pub fn init_func(&mut self, init_func: impl Into<String>) -> &mut Self {
self.init_func = init_func.into();
self
}
/// Returns the initialization function that will be run for wizer.
pub fn get_init_func(&self) -> &str {
&self.init_func
}
/// Add a function rename to perform.
pub fn func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self {
self.func_renames
.push((new_name.to_string(), old_name.to_string()));
self
}
/// After initialization, should the Wasm module still export the
/// initialization function?
///
/// This is `false` by default, meaning that the initialization function is
/// no longer exported from the Wasm module.
pub fn keep_init_func(&mut self, keep: bool) -> &mut Self {
self.keep_init_func = Some(Some(keep));
self
}
/// After initialization, should the Wasm module still contain the
/// instrumentation needed for further snapshotting?
///
/// This is `false` by default, meaning that instrumentation is stripped
/// from the output module. Set to `true` to enable incremental
/// snapshotting (savestate) workflows.
pub fn keep_instrumentation(&mut self, keep: bool) -> &mut Self {
self.keep_instrumentation = Some(Some(keep));
self
}
/// First half of [`Self::run`] which instruments the provided `wasm` and
/// produces a new wasm module which should be run by a runtime.
///
/// After the returned wasm is executed the context returned here and the
/// state of the instance should be passed to [`Self::snapshot`].
pub fn instrument<'a>(&self, wasm: &'a [u8]) -> Result<(ModuleContext<'a>, Vec<u8>)> {
// Make sure we're given valid Wasm from the get go.
self.wasm_validate(&wasm)?;
let mut cx = parse::parse(wasm, false)?;
// When wizening core modules directly some imports aren't supported,
// so check for those here.
for import in cx.imports() {
match import.ty {
wasmparser::TypeRef::Global(_) => {
bail!("imported globals are not supported")
}
wasmparser::TypeRef::Table(_) => {
bail!("imported tables are not supported")
}
wasmparser::TypeRef::Memory(_) => {
bail!("imported memories are not supported")
}
wasmparser::TypeRef::Func(_) => {}
wasmparser::TypeRef::FuncExact(_) => {}
wasmparser::TypeRef::Tag(_) => {}
}
}
let instrumented_wasm = instrument::instrument(&mut cx);
self.debug_assert_valid_wasm(&instrumented_wasm);
Ok((cx, instrumented_wasm))
}
/// Parse a previously instrumented Wasm module, returning its
/// [`ModuleContext`].
///
/// This is used in incremental snapshotting workflows where the module
/// was already instrumented by a prior call to [`Self::instrument`] or
/// [`Self::snapshot`] (with [`Self::keep_instrumentation`] enabled). The returned context can be
/// passed to [`Self::snapshot`] to produce a new snapshot.
pub fn parse_instrumented<'a>(&self, wasm: &'a [u8]) -> Result<ModuleContext<'a>> {
self.wasm_validate(wasm)?;
parse::parse(wasm, true)
}
/// Second half of [`Self::run`] which takes the [`ModuleContext`] returned
/// by [`Self::instrument`] and the state of the `instance` after it has
/// possibly executed its initialization function.
///
/// This returns a new WebAssembly binary which has all state
/// pre-initialized.
pub async fn snapshot(
&self,
mut cx: ModuleContext<'_>,
instance: &mut impl InstanceState,
) -> Result<Vec<u8>> {
// Parse rename spec.
let renames = FuncRenames::parse(&self.func_renames)?;
let snapshot = snapshot::snapshot(&cx, instance).await;
let rewritten_wasm = self.rewrite(
&mut cx,
&snapshot,
&renames,
true,
self.get_keep_instrumentation(),
);
self.debug_assert_valid_wasm(&rewritten_wasm);
Ok(rewritten_wasm)
}
fn debug_assert_valid_wasm(&self, wasm: &[u8]) {
if !cfg!(debug_assertions) {
return;
}
if let Err(error) = self.wasm_validate(&wasm) {
#[cfg(feature = "wasmprinter")]
let wat = wasmprinter::print_bytes(&wasm)
.unwrap_or_else(|e| format!("Disassembling to WAT failed: {}", e));
#[cfg(not(feature = "wasmprinter"))]
let wat = "`wasmprinter` cargo feature is not enabled".to_string();
panic!("instrumented Wasm is not valid: {error:?}\n\nWAT:\n{wat}");
}
}
fn wasm_validate(&self, wasm: &[u8]) -> Result<()> {
log::debug!("Validating input Wasm");
wasmparser::Validator::new_with_features(wasmparser::WasmFeatures::all())
.validate_all(wasm)
.context("wasm validation failed")?;
for payload in wasmparser::Parser::new(0).parse_all(wasm) {
match payload? {
wasmparser::Payload::CodeSectionEntry(code) => {
let mut ops = code.get_operators_reader()?;
while !ops.eof() {
match ops.read()? {
// Table mutations aren't allowed as wizer has no
// way to record a snapshot of a table at this time.
// The only table mutations allowed are those from
// active element segments which can be
// deterministically replayed, so disallow all other
// forms of mutating a table.
//
// Ideally Wizer could take a snapshot of a table
// post-instantiation and then ensure that after
// running initialization the table didn't get
// mutated, allowing these instructions, but that's
// also not possible at this time.
wasmparser::Operator::TableCopy { .. } => {
bail!("unsupported `table.copy` instruction")
}
wasmparser::Operator::TableInit { .. } => {
bail!("unsupported `table.init` instruction")
}
wasmparser::Operator::TableSet { .. } => {
bail!("unsupported `table.set` instruction")
}
wasmparser::Operator::TableGrow { .. } => {
bail!("unsupported `table.grow` instruction")
}
wasmparser::Operator::TableFill { .. } => {
bail!("unsupported `table.fill` instruction")
}
// Wizer has no way of dynamically determining which
// element or data segments were dropped during
// execution so instead disallow these instructions
// entirely. Like above it'd be nice to allow them
// but just forbid their execution during the
// initialization function, but that can't be done
// easily at this time.
wasmparser::Operator::ElemDrop { .. } => {
bail!("unsupported `elem.drop` instruction")
}
wasmparser::Operator::DataDrop { .. } => {
bail!("unsupported `data.drop` instruction")
}
// Wizer can't snapshot GC references, so disallow
// any mutation of GC references. This prevents, for
// example, reading something from a table and then
// mutating it.
wasmparser::Operator::StructSet { .. } => {
bail!("unsupported `struct.set` instruction")
}
wasmparser::Operator::ArraySet { .. } => {
bail!("unsupported `array.set` instruction")
}
wasmparser::Operator::ArrayFill { .. } => {
bail!("unsupported `array.fill` instruction")
}
wasmparser::Operator::ArrayCopy { .. } => {
bail!("unsupported `array.copy` instruction")
}
wasmparser::Operator::ArrayInitData { .. } => {
bail!("unsupported `array.init_data` instruction")
}
wasmparser::Operator::ArrayInitElem { .. } => {
bail!("unsupported `array.init_elem` instruction")
}
_ => continue,
}
}
}
wasmparser::Payload::GlobalSection(globals) => {
for g in globals {
let g = g?.ty;
if !g.mutable {
continue;
}
match g.content_type {
wasmparser::ValType::I32
| wasmparser::ValType::I64
| wasmparser::ValType::F32
| wasmparser::ValType::F64
| wasmparser::ValType::V128 => {}
wasmparser::ValType::Ref(_) => {
bail!("unsupported mutable global containing a reference type")
}
}
}
}
_ => {}
}
}
Ok(())
}
fn get_keep_init_func(&self) -> bool {
match self.keep_init_func {
Some(keep) => keep.unwrap_or(true),
None => DEFAULT_KEEP_INIT_FUNC,
}
}
fn get_keep_instrumentation(&self) -> bool {
match self.keep_instrumentation {
Some(keep) => keep.unwrap_or(true),
None => DEFAULT_KEEP_INSTRUMENTATION,
}
}
}
/// Abstract ability to load state from a WebAssembly instance after it's been
/// instantiated and some exports have run.
pub trait InstanceState {
/// Loads the global specified by `name`, returning a `SnapshotVal`.
///
/// # Panics
///
/// This function panics if `name` isn't an exported global or if the type
/// of the global doesn't fit in `SnapshotVal`.
fn global_get(
&mut self,
name: &str,
type_hint: ValType,
) -> impl Future<Output = SnapshotVal> + Send;
/// Loads the contents of the memory specified by `name`, returning the
/// entier contents as a `Vec<u8>`.
///
/// # Panics
///
/// This function panics if `name` isn't an exported memory.
fn memory_contents(
&mut self,
name: &str,
contents: impl FnOnce(&[u8]) + Send,
) -> impl Future<Output = ()> + Send;
}