1022: Add caching support for Singlepass backend. r=losfair a=losfair

This PR adds caching support for the Singlepass backend.

- [x] Implementation
- [x] AArch64 test

Co-authored-by: losfair <zhy20000919@hotmail.com>
Co-authored-by: Heyang Zhou <zhy20000919@hotmail.com>
This commit is contained in:
bors[bot] 2019-12-02 17:56:37 +00:00 committed by GitHub
commit d639748a20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 146 additions and 60 deletions

View File

@ -4,6 +4,7 @@
- [#1006](https://github.com/wasmerio/wasmer/pull/1006) Fix minor panic issue when `wasmer::compile_with` called with llvm backend
- [#1009](https://github.com/wasmerio/wasmer/pull/1009) Enable LLVM verifier for all tests, add new llvm-backend-tests crate.
- [#1022](https://github.com/wasmerio/wasmer/pull/1022) Add caching support for Singlepass backend.
- [#1004](https://github.com/wasmerio/wasmer/pull/1004) Add the Auto backend to enable to adapt backend usage depending on wasm file executed.
## 0.11.0 - 2019-11-22

3
Cargo.lock generated
View File

@ -2332,12 +2332,15 @@ dependencies = [
name = "wasmer-singlepass-backend"
version = "0.11.0"
dependencies = [
"bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"dynasm 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"dynasmrt 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
"nix 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)",
"wasmer-runtime-core 0.11.0",
]

View File

@ -4,11 +4,11 @@
| &nbsp; | Singlepass | Cranelift | LLVM |
| - | :-: | :-: | :-: |
| Caching | | ✅ | ✅ |
| Caching | | ✅ | ✅ |
| Emscripten | ✅ | ✅ | ✅ |
| Metering | ✅ | ⬜ | ✅ |
| Multi-value return | ⬜ | ⬜ | ⬜ |
| OSR | 🔄 | ❓ | ❓ |
| OSR | 🔄 | ⬜ | 🔄 |
| SIMD | ⬜ | ⬜ | ✅ |
| WASI | ✅ | ✅ | ✅ |
| WASMER_BACKTRACE | ✅ | ⬜ | ⬜ |

View File

@ -7,11 +7,11 @@ use std::collections::BTreeMap;
use std::ops::Bound::{Included, Unbounded};
/// An index to a register
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub struct RegisterIndex(pub usize);
/// A kind of wasm or constant value
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub enum WasmAbstractValue {
/// A wasm runtime value
Runtime,
@ -20,7 +20,7 @@ pub enum WasmAbstractValue {
}
/// A container for the state of a running wasm instance.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MachineState {
/// Stack values.
pub stack_values: Vec<MachineValue>,
@ -37,7 +37,7 @@ pub struct MachineState {
}
/// A diff of two `MachineState`s.
#[derive(Clone, Debug, Default)]
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct MachineStateDiff {
/// Last.
pub last: Option<usize>,
@ -63,7 +63,7 @@ pub struct MachineStateDiff {
}
/// A kind of machine value.
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
#[derive(Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub enum MachineValue {
/// Undefined.
Undefined,
@ -86,7 +86,7 @@ pub enum MachineValue {
}
/// A map of function states.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct FunctionStateMap {
/// Initial.
pub initial: MachineState,
@ -111,7 +111,7 @@ pub struct FunctionStateMap {
}
/// A kind of suspend offset.
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub enum SuspendOffset {
/// A loop.
Loop(usize),
@ -122,7 +122,7 @@ pub enum SuspendOffset {
}
/// Info for an offset.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct OffsetInfo {
/// End offset.
pub end_offset: usize, // excluded bound
@ -133,7 +133,7 @@ pub struct OffsetInfo {
}
/// A map of module state.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ModuleStateMap {
/// Local functions.
pub local_functions: BTreeMap<usize, FunctionStateMap>,

View File

@ -129,7 +129,7 @@ impl Cache for FileSystemCache {
}
}
#[cfg(all(test, not(feature = "singlepass")))]
#[cfg(test)]
mod tests {
use super::*;

View File

@ -19,3 +19,6 @@ byteorder = "1.3"
nix = "0.15"
libc = "0.2.60"
smallvec = "0.6"
serde = "1.0"
serde_derive = "1.0"
bincode = "1.2"

View File

@ -22,8 +22,10 @@ use std::{
};
use wasmer_runtime_core::{
backend::{
get_inline_breakpoint_size, sys::Memory, Architecture, Backend, CacheGen, CompilerConfig,
MemoryBoundCheckMode, RunnableModule, Token,
get_inline_breakpoint_size,
sys::{Memory, Protect},
Architecture, Backend, CacheGen, CompilerConfig, MemoryBoundCheckMode, RunnableModule,
Token,
},
cache::{Artifact, Error as CacheError},
codegen::*,
@ -229,8 +231,6 @@ unsafe impl Sync for FuncPtr {}
pub struct X64ExecutionContext {
#[allow(dead_code)]
code: CodeMemory,
#[allow(dead_code)]
functions: Vec<X64FunctionCode>,
function_pointers: Vec<FuncPtr>,
function_offsets: Vec<AssemblyOffset>,
signatures: Arc<Map<SigIndex, FuncSig>>,
@ -239,6 +239,28 @@ pub struct X64ExecutionContext {
msm: ModuleStateMap,
}
/// On-disk cache format.
/// Offsets are relative to the start of the executable image.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct CacheImage {
/// The executable image.
code: Vec<u8>,
/// Offsets to the start of each function. Including trampoline, if any.
/// Trampolines are only present on AArch64.
/// On x86-64, `function_pointers` are identical to `function_offsets`.
function_pointers: Vec<usize>,
/// Offsets to the start of each function after trampoline.
function_offsets: Vec<usize>,
/// Number of imported functions.
func_import_count: usize,
/// Module state map.
msm: ModuleStateMap,
}
#[derive(Debug)]
pub struct ControlFrame {
pub label: DynamicLabel,
@ -257,6 +279,25 @@ pub enum IfElseState {
Else,
}
pub struct SinglepassCache {
buffer: Arc<[u8]>,
}
impl CacheGen for SinglepassCache {
fn generate_cache(&self) -> Result<(Box<[u8]>, Memory), CacheError> {
let mut memory = Memory::with_size_protect(self.buffer.len(), Protect::ReadWrite)
.map_err(CacheError::SerializeError)?;
let buffer = &*self.buffer;
unsafe {
memory.as_slice_mut()[..buffer.len()].copy_from_slice(buffer);
}
Ok(([].as_ref().into(), memory))
}
}
impl RunnableModule for X64ExecutionContext {
fn get_func(
&self,
@ -677,29 +718,41 @@ impl ModuleCodeGenerator<X64FunctionCode, X64ExecutionContext, CodegenError>
.map(|x| (x.offset, x.fsm.clone()))
.collect();
struct Placeholder;
impl CacheGen for Placeholder {
fn generate_cache(&self) -> Result<(Box<[u8]>, Memory), CacheError> {
Err(CacheError::Unknown(
"the singlepass backend doesn't support caching yet".to_string(),
))
}
}
let msm = ModuleStateMap {
local_functions: local_function_maps,
total_size,
};
let cache_image = CacheImage {
code: output.to_vec(),
function_pointers: out_labels
.iter()
.map(|x| {
(x.0 as usize)
.checked_sub(output.as_ptr() as usize)
.unwrap()
})
.collect(),
function_offsets: out_offsets.iter().map(|x| x.0 as usize).collect(),
func_import_count: self.func_import_count,
msm: msm.clone(),
};
let cache = SinglepassCache {
buffer: Arc::from(bincode::serialize(&cache_image).unwrap().into_boxed_slice()),
};
Ok((
X64ExecutionContext {
code: output,
functions: self.functions,
signatures: self.signatures.as_ref().unwrap().clone(),
breakpoints: breakpoints,
func_import_count: self.func_import_count,
function_pointers: out_labels,
function_offsets: out_offsets,
msm: ModuleStateMap {
local_functions: local_function_maps,
total_size,
},
msm: msm,
},
Box::new(Placeholder),
Box::new(cache),
))
}
@ -771,10 +824,45 @@ impl ModuleCodeGenerator<X64FunctionCode, X64ExecutionContext, CodegenError>
}));
Ok(())
}
unsafe fn from_cache(_artifact: Artifact, _: Token) -> Result<ModuleInner, CacheError> {
Err(CacheError::Unknown(
"the singlepass compiler API doesn't support caching yet".to_string(),
))
unsafe fn from_cache(artifact: Artifact, _: Token) -> Result<ModuleInner, CacheError> {
let (info, _, memory) = artifact.consume();
let cache_image: CacheImage = bincode::deserialize(memory.as_slice())
.map_err(|x| CacheError::DeserializeError(format!("{:?}", x)))?;
let mut code_mem = CodeMemory::new(cache_image.code.len());
code_mem[0..cache_image.code.len()].copy_from_slice(&cache_image.code);
code_mem.make_executable();
let function_pointers: Vec<FuncPtr> = cache_image
.function_pointers
.iter()
.map(|&x| FuncPtr(code_mem.as_ptr().offset(x as isize) as *const FuncPtrInner))
.collect();
let function_offsets: Vec<AssemblyOffset> = cache_image
.function_offsets
.iter()
.cloned()
.map(AssemblyOffset)
.collect();
let ec = X64ExecutionContext {
code: code_mem,
function_pointers,
function_offsets,
signatures: Arc::new(info.signatures.clone()),
breakpoints: Arc::new(HashMap::new()),
func_import_count: cache_image.func_import_count,
msm: cache_image.msm,
};
Ok(ModuleInner {
runnable_module: Box::new(ec),
cache_gen: Box::new(SinglepassCache {
buffer: Arc::from(memory.as_slice().to_vec().into_boxed_slice()),
}),
info,
})
}
}
@ -3397,7 +3485,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
let tmp_xmm2 = XMM::XMM9;
let tmp_xmm3 = XMM::XMM10;
static CANONICAL_NAN: u128 = 0x7FC0_0000;
a.emit_mov(Size::S32, Location::XMM(src1), Location::GPR(tmpg1));
a.emit_mov(Size::S32, Location::XMM(src2), Location::GPR(tmpg2));
a.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1));
@ -3416,10 +3503,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
// load float canonical nan
a.emit_mov(
Size::S64,
Location::Imm64((&CANONICAL_NAN as *const u128) as u64),
Location::Imm32(0x7FC0_0000), // Canonical NaN
Location::GPR(tmpg1),
);
a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2));
a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(src2));
a.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
match ret {
Location::XMM(x) => {
@ -3509,8 +3596,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
let tmp_xmm2 = XMM::XMM9;
let tmp_xmm3 = XMM::XMM10;
static NEG_ZERO: u128 = 0x8000_0000;
static CANONICAL_NAN: u128 = 0x7FC0_0000;
a.emit_mov(Size::S32, Location::XMM(src1), Location::GPR(tmpg1));
a.emit_mov(Size::S32, Location::XMM(src2), Location::GPR(tmpg2));
a.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1));
@ -3524,14 +3609,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
// load float -0.0
a.emit_mov(
Size::S64,
Location::Imm64((&NEG_ZERO as *const u128) as u64),
Location::Imm32(0x8000_0000), // Negative zero
Location::GPR(tmpg1),
);
a.emit_mov(
Size::S64,
Location::Memory(tmpg1, 0),
Location::XMM(tmp_xmm2),
);
a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp_xmm2));
a.emit_label(label2);
a.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
a.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
@ -3539,10 +3620,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
// load float canonical nan
a.emit_mov(
Size::S64,
Location::Imm64((&CANONICAL_NAN as *const u128) as u64),
Location::Imm32(0x7FC0_0000), // Canonical NaN
Location::GPR(tmpg1),
);
a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2));
a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(src2));
a.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
match ret {
Location::XMM(x) => {
@ -3821,7 +3902,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
let tmp_xmm2 = XMM::XMM9;
let tmp_xmm3 = XMM::XMM10;
static CANONICAL_NAN: u128 = 0x7FF8_0000_0000_0000;
a.emit_mov(Size::S64, Location::XMM(src1), Location::GPR(tmpg1));
a.emit_mov(Size::S64, Location::XMM(src2), Location::GPR(tmpg2));
a.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1));
@ -3840,10 +3920,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
// load float canonical nan
a.emit_mov(
Size::S64,
Location::Imm64((&CANONICAL_NAN as *const u128) as u64),
Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
Location::GPR(tmpg1),
);
a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2));
a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(src2));
a.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
match ret {
Location::XMM(x) => {
@ -3933,8 +4013,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
let tmp_xmm2 = XMM::XMM9;
let tmp_xmm3 = XMM::XMM10;
static NEG_ZERO: u128 = 0x8000_0000_0000_0000;
static CANONICAL_NAN: u128 = 0x7FF8_0000_0000_0000;
a.emit_mov(Size::S64, Location::XMM(src1), Location::GPR(tmpg1));
a.emit_mov(Size::S64, Location::XMM(src2), Location::GPR(tmpg2));
a.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1));
@ -3948,14 +4026,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
// load float -0.0
a.emit_mov(
Size::S64,
Location::Imm64((&NEG_ZERO as *const u128) as u64),
Location::Imm64(0x8000_0000_0000_0000), // Negative zero
Location::GPR(tmpg1),
);
a.emit_mov(
Size::S64,
Location::Memory(tmpg1, 0),
Location::XMM(tmp_xmm2),
);
a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp_xmm2));
a.emit_label(label2);
a.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
a.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
@ -3963,10 +4037,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
// load float canonical nan
a.emit_mov(
Size::S64,
Location::Imm64((&CANONICAL_NAN as *const u128) as u64),
Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
Location::GPR(tmpg1),
);
a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2));
a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(src2));
a.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
match ret {
Location::XMM(x) => {

View File

@ -20,6 +20,11 @@ compile_error!("This crate doesn't yet support compiling on operating systems ot
extern crate dynasmrt;
extern crate serde;
#[macro_use]
extern crate serde_derive;
#[macro_use]
extern crate dynasm;