Optimize locals.

This commit is contained in:
losfair 2019-04-02 20:50:56 +08:00
parent 4d6bbed905
commit b74d8bc521
3 changed files with 139 additions and 34 deletions

View File

@ -138,7 +138,6 @@ pub struct X64FunctionCode {
br_table_data: Option<Vec<Vec<usize>>>,
returns: Vec<WpType>,
locals: Vec<Location>,
vmctx_location: Option<Location>,
num_params: usize,
num_locals: usize,
value_stack: Vec<(Location, LocalOrTemp)>,
@ -320,7 +319,6 @@ impl ModuleCodeGenerator<X64FunctionCode, X64ExecutionContext, X64RuntimeResolve
locals: vec![],
num_params: 0,
num_locals: 0,
vmctx_location: None,
value_stack: vec! [],
control_stack: vec! [],
machine: Machine::new(),
@ -805,20 +803,6 @@ impl X64FunctionCode {
f(a, Size::S64, Location::GPR(GPR::RCX), ret);
value_stack.push((ret, LocalOrTemp::Temp));
}
fn get_param_location(
idx: usize
) -> Location {
match idx {
0 => Location::GPR(GPR::RDI),
1 => Location::GPR(GPR::RSI),
2 => Location::GPR(GPR::RDX),
3 => Location::GPR(GPR::RCX),
4 => Location::GPR(GPR::R8),
5 => Location::GPR(GPR::R9),
_ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32),
}
}
}
impl FunctionCodeGenerator for X64FunctionCode {
@ -842,22 +826,8 @@ impl FunctionCodeGenerator for X64FunctionCode {
let a = self.assembler.as_mut().unwrap();
a.emit_push(Size::S64, Location::GPR(GPR::RBP));
a.emit_mov(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RBP));
let locations = self.machine.acquire_stack_locations(a, 1 + self.num_locals, false);
self.vmctx_location = Some(locations[0]);
self.locals = locations[1..].to_vec();
a.emit_mov(Size::S64, Self::get_param_location(0), self.vmctx_location.unwrap());
for i in 0..self.num_params {
Self::emit_relaxed_binop(
a, &mut self.machine, Assembler::emit_mov,
Size::S64, Self::get_param_location(i + 1), self.locals[i],
);
}
for i in self.num_params..self.num_locals {
a.emit_mov(Size::S32, Location::Imm32(0), self.locals[i]);
}
self.locals = self.machine.init_locals(a, self.num_locals, self.num_params);
self.control_stack.push(ControlFrame {
label: a.get_label(),
@ -1181,7 +1151,7 @@ impl FunctionCodeGenerator for X64FunctionCode {
let mut call_movs: Vec<(Location, GPR)> = vec![];
for i in (0..param_types.len()).rev() {
let loc = Self::get_param_location(1 + i);
let loc = Machine::get_param_location(1 + i);
match loc {
Location::GPR(x) => {
call_movs.push((params[i].0, x));
@ -1201,7 +1171,7 @@ impl FunctionCodeGenerator for X64FunctionCode {
}
}
a.emit_mov(Size::S64, Location::Memory(GPR::RBP, -8), Self::get_param_location(0)); // vmctx
a.emit_mov(Size::S64, Location::GPR(Machine::get_vmctx_reg()), Machine::get_param_location(0)); // vmctx
a.emit_call_label(label);
if stack_offset > 0 {
@ -1415,6 +1385,7 @@ impl FunctionCodeGenerator for X64FunctionCode {
if self.control_stack.len() == 0 {
a.emit_label(frame.label);
self.machine.finalize_locals(a, &self.locals);
a.emit_mov(Size::S64, Location::GPR(GPR::RBP), Location::GPR(GPR::RSP));
a.emit_pop(Size::S64, Location::GPR(GPR::RBP));
a.emit_ret();

View File

@ -79,6 +79,7 @@ pub trait Emitter {
fn emit_label(&mut self, label: Self::Label);
fn emit_mov(&mut self, sz: Size, src: Location, dst: Location);
fn emit_lea(&mut self, sz: Size, src: Location, dst: Location);
fn emit_xor(&mut self, sz: Size, src: Location, dst: Location);
fn emit_jmp(&mut self, condition: Condition, label: Self::Label);
fn emit_set(&mut self, condition: Condition, dst: GPR);
@ -343,6 +344,17 @@ impl Emitter for Assembler {
)}
);
}
fn emit_lea(&mut self, sz: Size, src: Location, dst: Location) {
match (sz, src, dst) {
(Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => {
dynasm!(self ; lea Rd(dst as u8), [Rq(src as u8) + disp]);
},
(Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => {
dynasm!(self ; lea Rq(dst as u8), [Rq(src as u8) + disp]);
},
_ => unreachable!(),
}
}
fn emit_xor(&mut self, sz: Size, src: Location, dst: Location) {
binop_all_nofp!(xor, self, sz, src, dst, {unreachable!()});
}

View File

@ -7,7 +7,8 @@ struct MachineStackOffset(usize);
pub struct Machine {
used_gprs: HashSet<GPR>,
used_xmms: HashSet<XMM>,
stack_offset: MachineStackOffset
stack_offset: MachineStackOffset,
save_area_offset: Option<MachineStackOffset>,
}
impl Machine {
@ -16,6 +17,7 @@ impl Machine {
used_gprs: HashSet::new(),
used_xmms: HashSet::new(),
stack_offset: MachineStackOffset(0),
save_area_offset: None,
}
}
@ -27,6 +29,10 @@ impl Machine {
self.used_xmms.iter().cloned().collect()
}
pub fn get_vmctx_reg() -> GPR {
GPR::R15
}
/// Picks an unused general purpose register for local/stack/argument use.
///
/// This method does not mark the register as used.
@ -321,4 +327,120 @@ impl Machine {
assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP));
}
}
pub fn init_locals<E: Emitter>(&mut self, a: &mut E, n: usize, n_params: usize) -> Vec<Location> {
// Use callee-saved registers for locals.
fn get_local_location(idx: usize) -> Location {
match idx {
0 => Location::GPR(GPR::R10),
1 => Location::GPR(GPR::R11),
2 => Location::GPR(GPR::R12),
3 => Location::GPR(GPR::R13),
4 => Location::GPR(GPR::R14),
_ => Location::Memory(GPR::RBP, -(((idx - 4) * 8) as i32)),
}
}
let mut locations: Vec<Location> = vec! [];
let mut allocated: usize = 0;
// Determine locations for parameters.
for i in 0..n_params {
let loc = Self::get_param_location(i + 1);
locations.push(match loc {
Location::GPR(x) => {
let old_idx = allocated;
allocated += 1;
get_local_location(old_idx)
},
Location::Memory(_, _) => loc,
_ => unreachable!(),
});
}
// Determine locations for normal locals.
for i in n_params..n {
locations.push(get_local_location(allocated));
allocated += 1;
}
// How many machine stack slots did all the locals use?
let num_mem_slots = locations.iter().filter(|&&loc| {
match loc {
Location::Memory(_, _) => true,
_ => false,
}
}).count();
// Move RSP down to reserve space for machine stack slots.
if num_mem_slots > 0 {
a.emit_sub(Size::S64, Location::Imm32((num_mem_slots * 8) as u32), Location::GPR(GPR::RSP));
self.stack_offset.0 += num_mem_slots * 8;
}
// Save callee-saved registers.
for loc in locations.iter() {
if let Location::GPR(x) = *loc {
a.emit_push(Size::S64, *loc);
self.stack_offset.0 += 8;
}
}
// Save R15 for vmctx use.
a.emit_push(Size::S64, Location::GPR(GPR::R15));
self.stack_offset.0 += 8;
// Save the offset of static area.
self.save_area_offset = Some(MachineStackOffset(self.stack_offset.0));
// Load in-register parameters into the allocated locations.
for i in 0..n_params {
let loc = Self::get_param_location(i + 1);
match loc {
Location::GPR(x) => {
a.emit_mov(Size::S64, loc, locations[i]);
},
_ => break
}
}
// Load vmctx.
a.emit_mov(Size::S64, Self::get_param_location(0), Location::GPR(GPR::R15));
// Initialize all normal locals to zero.
for i in n_params..n {
a.emit_mov(Size::S64, Location::Imm32(0), locations[i]);
}
locations
}
pub fn finalize_locals<E: Emitter>(&mut self, a: &mut E, locations: &[Location]) {
// Unwind stack to the "save area".
a.emit_lea(Size::S64, Location::Memory(GPR::RBP, -(self.save_area_offset.as_ref().unwrap().0 as i32)), Location::GPR(GPR::RSP));
// Restore R15 used by vmctx.
a.emit_pop(Size::S64, Location::GPR(GPR::R15));
// Restore callee-saved registers.
for loc in locations.iter().rev() {
if let Location::GPR(x) = *loc {
a.emit_pop(Size::S64, *loc);
}
}
}
pub fn get_param_location(
idx: usize
) -> Location {
match idx {
0 => Location::GPR(GPR::RDI),
1 => Location::GPR(GPR::RSI),
2 => Location::GPR(GPR::RDX),
3 => Location::GPR(GPR::RCX),
4 => Location::GPR(GPR::R8),
5 => Location::GPR(GPR::R9),
_ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32),
}
}
}