From d233e44b04da45dcc99980662db6d3776d9f4a10 Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Thu, 7 Dec 2023 23:15:37 +0100 Subject: asm: move into sub module; remove encode_ri --- src/asm.rs | 344 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 366 +------------------------------------------------------------ 2 files changed, 346 insertions(+), 364 deletions(-) create mode 100644 src/asm.rs (limited to 'src') diff --git a/src/asm.rs b/src/asm.rs new file mode 100644 index 0000000..8ee5fb1 --- /dev/null +++ b/src/asm.rs @@ -0,0 +1,344 @@ +//! The `x64` jit assembler. + +use crate::*; +use imm::Imm; +use reg::Reg; + +/// Encode the `REX` byte. +const fn rex(w: bool, r: u8, x: u8, b: u8) -> u8 { + let w = if w { 1 } else { 0 }; + let r = (r >> 3) & 1; + let x = (x >> 3) & 1; + let b = (b >> 3) & 1; + 0b0100_0000 | ((w & 1) << 3) | (r << 2) | (x << 1) | b +} + +/// Encode the `ModR/M` byte. +const fn modrm(mod_: u8, reg: u8, rm: u8) -> u8 { + ((mod_ & 0b11) << 6) | ((reg & 0b111) << 3) | (rm & 0b111) +} + +/// `x64` jit assembler. +pub struct Asm { + buf: Vec, +} + +impl Asm { + /// Create a new `x64` jit assembler. + pub fn new() -> Asm { + // Some random default capacity. + let buf = Vec::with_capacity(1024); + Asm { buf } + } + + /// Consume the assembler and get the emitted code. + pub fn into_code(self) -> Vec { + self.buf + } + + /// Emit a slice of bytes. + pub(crate) fn emit(&mut self, bytes: &[u8]) { + self.buf.extend_from_slice(bytes); + } + + /// Emit a slice of optional bytes. + fn emit_optional(&mut self, bytes: &[Option]) { + for byte in bytes.iter().filter_map(|&b| b) { + self.buf.push(byte); + } + } + + /// Emit a slice of bytes at `pos`. + /// + /// # Panics + /// + /// Panics if [pos..pos+len] indexes out of bound of the underlying code buffer. + fn emit_at(&mut self, pos: usize, bytes: &[u8]) { + if let Some(buf) = self.buf.get_mut(pos..pos + bytes.len()) { + buf.copy_from_slice(bytes); + } else { + unimplemented!(); + } + } + + /// Bind the [Label] to the current location. + pub fn bind(&mut self, label: &mut Label) { + // Bind the label to the current offset. + label.bind(self.buf.len()); + + // Resolve any pending relocations for the label. + self.resolve(label); + } + + /// If the [Label] is bound, patch any pending relocation. + fn resolve(&mut self, label: &mut Label) { + if let Some(loc) = label.location() { + // For now we only support disp32 as label location. + let loc = i32::try_from(loc).expect("Label location did not fit into i32."); + + // Resolve any pending relocations for the label. + for off in label.offsets_mut().drain() { + // Displacement is relative to the next instruction following the jump. + // We record the offset to patch at the first byte of the disp32 therefore we need + // to account for that in the disp computation. + let disp32 = loc - i32::try_from(off).expect("Label offset did not fit into i32") - 4 /* account for the disp32 */; + + // Patch the relocation with the disp32. + self.emit_at(off, &disp32.to_ne_bytes()); + } + } + } + + // -- Encode utilities. + + /// Encode an register-register instruction. + pub(crate) fn encode_rr(&mut self, opc: u8, op1: T, op2: T) + where + Self: EncodeRR, + { + // MR operand encoding. + // op1 -> modrm.rm + // op2 -> modrm.reg + let modrm = modrm( + 0b11, /* mod */ + op2.idx(), /* reg */ + op1.idx(), /* rm */ + ); + + let prefix = >::legacy_prefix(); + let rex = >::rex(op1, op2); + + self.emit_optional(&[prefix, rex]); + self.emit(&[opc, modrm]); + } + + /// Encode an offset-immediate instruction. + /// Register idx is encoded in the opcode. + pub(crate) fn encode_oi(&mut self, opc: u8, op1: T, op2: U) + where + Self: EncodeR, + { + let opc = opc + (op1.idx() & 0b111); + let prefix = >::legacy_prefix(); + let rex = >::rex(op1); + + self.emit_optional(&[prefix, rex]); + self.emit(&[opc]); + self.emit(op2.bytes()); + } + + /// Encode a register instruction. + pub(crate) fn encode_r(&mut self, opc: u8, opc_ext: u8, op1: T) + where + Self: EncodeR, + { + // M operand encoding. + // op1 -> modrm.rm + // opc extension -> modrm.reg + let modrm = modrm( + 0b11, /* mod */ + opc_ext, /* reg */ + op1.idx(), /* rm */ + ); + + let prefix = >::legacy_prefix(); + let rex = >::rex(op1); + + self.emit_optional(&[prefix, rex]); + self.emit(&[opc, modrm]); + } + + /// Encode a memory-immediate instruction. + pub(crate) fn encode_mi(&mut self, opc: u8, opc_ext: u8, op1: MemOp, op2: T) + where + Self: EncodeMI, + { + // MI operand encoding. + // op1 -> modrm.rm + // op2 -> imm + let mode = match op1 { + MemOp::Indirect(..) => { + assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); + 0b00 + } + MemOp::IndirectDisp(..) => { + assert!(!op1.base().need_sib()); + 0b10 + } + }; + + let modrm = modrm( + mode, /* mode */ + opc_ext, /* reg */ + op1.base().idx(), /* rm */ + ); + + let prefix = >::legacy_prefix(); + let rex = >::rex(&op1); + + self.emit_optional(&[prefix, rex]); + self.emit(&[opc, modrm]); + if let MemOp::IndirectDisp(_, disp) = op1 { + self.emit(&disp.to_ne_bytes()); + } + self.emit(op2.bytes()); + } + + /// Encode a memory-register instruction. + pub(crate) fn encode_mr(&mut self, opc: u8, op1: MemOp, op2: T) + where + Self: EncodeMR, + { + // MR operand encoding. + // op1 -> modrm.rm + // op2 -> modrm.reg + let mode = match op1 { + MemOp::Indirect(..) => { + assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); + 0b00 + } + MemOp::IndirectDisp(..) => { + assert!(!op1.base().need_sib()); + 0b10 + } + }; + + let modrm = modrm( + mode, /* mode */ + op2.idx(), /* reg */ + op1.base().idx(), /* rm */ + ); + let prefix = >::legacy_prefix(); + let rex = >::rex(&op1, op2); + + self.emit_optional(&[prefix, rex]); + self.emit(&[opc, modrm]); + if let MemOp::IndirectDisp(_, disp) = op1 { + self.emit(&disp.to_ne_bytes()); + } + } + + /// Encode a register-memory instruction. + pub(crate) fn encode_rm(&mut self, opc: u8, op1: T, op2: MemOp) + where + Self: EncodeMR, + { + // RM operand encoding. + // op1 -> modrm.reg + // op2 -> modrm.rm + self.encode_mr(opc, op2, op1); + } + + /// Encode a jump to label instruction. + pub(crate) fn encode_jmp_label(&mut self, opc: &[u8], op1: &mut Label) { + // Emit the opcode. + self.emit(opc); + + // Record relocation offset starting at the first byte of the disp32. + op1.record_offset(self.buf.len()); + + // Emit a zeroed disp32, which serves as placeholder for the relocation. + // We currently only support disp32 jump targets. + self.emit(&[0u8; 4]); + + // Resolve any pending relocations for the label. + self.resolve(op1); + } +} + +// -- Encoder helper. + +/// Encode helper for register-register instructions. +pub(crate) trait EncodeRR { + fn legacy_prefix() -> Option { + None + } + + fn rex(op1: T, op2: T) -> Option { + if op1.need_rex() || op2.need_rex() { + Some(rex(op1.rexw(), op2.idx(), 0, op1.idx())) + } else { + None + } + } +} + +impl EncodeRR for Asm {} +impl EncodeRR for Asm {} +impl EncodeRR for Asm { + fn legacy_prefix() -> Option { + Some(0x66) + } +} +impl EncodeRR for Asm {} + +/// Encode helper for register instructions. +pub(crate) trait EncodeR { + fn legacy_prefix() -> Option { + None + } + + fn rex(op1: T) -> Option { + if op1.need_rex() { + Some(rex(op1.rexw(), 0, 0, op1.idx())) + } else { + None + } + } +} + +impl EncodeR for Asm {} +impl EncodeR for Asm {} +impl EncodeR for Asm { + fn legacy_prefix() -> Option { + Some(0x66) + } +} +impl EncodeR for Asm {} + +/// Encode helper for memory-register instructions. +pub(crate) trait EncodeMR { + fn legacy_prefix() -> Option { + None + } + + fn rex(op1: &MemOp, op2: T) -> Option { + if op2.need_rex() || (op1.base().is_ext()) { + Some(rex(op2.rexw(), op2.idx(), 0, op1.base().idx())) + } else { + None + } + } +} + +impl EncodeMR for Asm {} +impl EncodeMR for Asm { + fn legacy_prefix() -> Option { + Some(0x66) + } +} +impl EncodeMR for Asm {} +impl EncodeMR for Asm {} + +/// Encode helper for memory-immediate instructions. +pub(crate) trait EncodeMI { + fn legacy_prefix() -> Option { + None + } + + fn rex(op1: &MemOp) -> Option { + if op1.base().is_ext() { + Some(rex(false, 0, 0, op1.base().idx())) + } else { + None + } + } +} + +impl EncodeMI for Asm {} +impl EncodeMI for Asm { + fn legacy_prefix() -> Option { + Some(0x66) + } +} +impl EncodeMI for Asm {} diff --git a/src/lib.rs b/src/lib.rs index 65d70b5..3b7b832 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,6 +73,7 @@ //! } //! ``` +mod asm; mod imm; mod label; mod reg; @@ -80,14 +81,12 @@ mod rt; pub mod insn; +pub use asm::Asm; pub use imm::{Imm16, Imm32, Imm64, Imm8}; pub use label::Label; pub use reg::{Reg16, Reg32, Reg64, Reg8}; pub use rt::Runtime; -use imm::Imm; -use reg::Reg; - /// Type representing a memory operand. pub enum MemOp { /// An indirect memory operand, eg `mov [rax], rcx`. @@ -106,364 +105,3 @@ impl MemOp { } } } - -/// Encode the `REX` byte. -const fn rex(w: bool, r: u8, x: u8, b: u8) -> u8 { - let w = if w { 1 } else { 0 }; - let r = (r >> 3) & 1; - let x = (x >> 3) & 1; - let b = (b >> 3) & 1; - 0b0100_0000 | ((w & 1) << 3) | (r << 2) | (x << 1) | b -} - -/// Encode the `ModR/M` byte. -const fn modrm(mod_: u8, reg: u8, rm: u8) -> u8 { - ((mod_ & 0b11) << 6) | ((reg & 0b111) << 3) | (rm & 0b111) -} - -/// `x64` jit assembler. -pub struct Asm { - buf: Vec, -} - -impl Asm { - /// Create a new `x64` jit assembler. - pub fn new() -> Asm { - // Some random default capacity. - let buf = Vec::with_capacity(1024); - Asm { buf } - } - - /// Consume the assembler and get the emitted code. - pub fn into_code(self) -> Vec { - self.buf - } - - /// Emit a slice of bytes. - fn emit(&mut self, bytes: &[u8]) { - self.buf.extend_from_slice(bytes); - } - - /// Emit a slice of optional bytes. - fn emit_optional(&mut self, bytes: &[Option]) { - for byte in bytes.iter().filter_map(|&b| b) { - self.buf.push(byte); - } - } - - /// Emit a slice of bytes at `pos`. - /// - /// # Panics - /// - /// Panics if [pos..pos+len] indexes out of bound of the underlying code buffer. - fn emit_at(&mut self, pos: usize, bytes: &[u8]) { - if let Some(buf) = self.buf.get_mut(pos..pos + bytes.len()) { - buf.copy_from_slice(bytes); - } else { - unimplemented!(); - } - } - - /// Bind the [Label] to the current location. - pub fn bind(&mut self, label: &mut Label) { - // Bind the label to the current offset. - label.bind(self.buf.len()); - - // Resolve any pending relocations for the label. - self.resolve(label); - } - - /// If the [Label] is bound, patch any pending relocation. - pub fn resolve(&mut self, label: &mut Label) { - if let Some(loc) = label.location() { - // For now we only support disp32 as label location. - let loc = i32::try_from(loc).expect("Label location did not fit into i32."); - - // Resolve any pending relocations for the label. - for off in label.offsets_mut().drain() { - // Displacement is relative to the next instruction following the jump. - // We record the offset to patch at the first byte of the disp32 therefore we need - // to account for that in the disp computation. - let disp32 = loc - i32::try_from(off).expect("Label offset did not fit into i32") - 4 /* account for the disp32 */; - - // Patch the relocation with the disp32. - self.emit_at(off, &disp32.to_ne_bytes()); - } - } - } - - // -- Encode utilities. - - /// Encode an register-register instruction. - fn encode_rr(&mut self, opc: u8, op1: T, op2: T) - where - Self: EncodeRR, - { - // MR operand encoding. - // op1 -> modrm.rm - // op2 -> modrm.reg - let modrm = modrm( - 0b11, /* mod */ - op2.idx(), /* reg */ - op1.idx(), /* rm */ - ); - - let prefix = >::legacy_prefix(); - let rex = >::rex(op1, op2); - - self.emit_optional(&[prefix, rex]); - self.emit(&[opc, modrm]); - } - - /// Encode an offset-immediate instruction. - /// Register idx is encoded in the opcode. - fn encode_oi(&mut self, opc: u8, op1: T, op2: U) - where - Self: EncodeR, - { - let opc = opc + (op1.idx() & 0b111); - let prefix = >::legacy_prefix(); - let rex = >::rex(op1); - - self.emit_optional(&[prefix, rex]); - self.emit(&[opc]); - self.emit(op2.bytes()); - } - - /// Encode a register-immediate instruction. - fn encode_ri(&mut self, opc: u8, opc_ext: u8, op1: T, op2: U) - where - Self: EncodeR, - { - // MI operand encoding. - // op1 -> modrm.rm - // opc extension -> modrm.reg - let modrm = modrm( - 0b11, /* mod */ - opc_ext, /* reg */ - op1.idx(), /* rm */ - ); - - let prefix = >::legacy_prefix(); - let rex = >::rex(op1); - - self.emit_optional(&[prefix, rex]); - self.emit(&[opc, modrm]); - self.emit(op2.bytes()); - } - - /// Encode a register instruction. - fn encode_r(&mut self, opc: u8, opc_ext: u8, op1: T) - where - Self: EncodeR, - { - // M operand encoding. - // op1 -> modrm.rm - // opc extension -> modrm.reg - let modrm = modrm( - 0b11, /* mod */ - opc_ext, /* reg */ - op1.idx(), /* rm */ - ); - - let prefix = >::legacy_prefix(); - let rex = >::rex(op1); - - self.emit_optional(&[prefix, rex]); - self.emit(&[opc, modrm]); - } - - /// Encode a memory-immediate instruction. - fn encode_mi(&mut self, opc: u8, opc_ext: u8, op1: MemOp, op2: T) - where - Self: EncodeMI, - { - // MI operand encoding. - // op1 -> modrm.rm - // op2 -> imm - let mode = match op1 { - MemOp::Indirect(..) => { - assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); - 0b00 - } - MemOp::IndirectDisp(..) => { - assert!(!op1.base().need_sib()); - 0b10 - } - }; - - let modrm = modrm( - mode, /* mode */ - opc_ext, /* reg */ - op1.base().idx(), /* rm */ - ); - - let prefix = >::legacy_prefix(); - let rex = >::rex(&op1); - - self.emit_optional(&[prefix, rex]); - self.emit(&[opc, modrm]); - if let MemOp::IndirectDisp(_, disp) = op1 { - self.emit(&disp.to_ne_bytes()); - } - self.emit(op2.bytes()); - } - - /// Encode a memory-register instruction. - fn encode_mr(&mut self, opc: u8, op1: MemOp, op2: T) - where - Self: EncodeMR, - { - // MR operand encoding. - // op1 -> modrm.rm - // op2 -> modrm.reg - let mode = match op1 { - MemOp::Indirect(..) => { - assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); - 0b00 - } - MemOp::IndirectDisp(..) => { - assert!(!op1.base().need_sib()); - 0b10 - } - }; - - let modrm = modrm( - mode, /* mode */ - op2.idx(), /* reg */ - op1.base().idx(), /* rm */ - ); - let prefix = >::legacy_prefix(); - let rex = >::rex(&op1, op2); - - self.emit_optional(&[prefix, rex]); - self.emit(&[opc, modrm]); - if let MemOp::IndirectDisp(_, disp) = op1 { - self.emit(&disp.to_ne_bytes()); - } - } - - /// Encode a register-memory instruction. - fn encode_rm(&mut self, opc: u8, op1: T, op2: MemOp) - where - Self: EncodeMR, - { - // RM operand encoding. - // op1 -> modrm.reg - // op2 -> modrm.rm - self.encode_mr(opc, op2, op1); - } - - /// Encode a jump to label instruction. - fn encode_jmp_label(&mut self, opc: &[u8], op1: &mut Label) { - // Emit the opcode. - self.emit(opc); - - // Record relocation offset starting at the first byte of the disp32. - op1.record_offset(self.buf.len()); - - // Emit a zeroed disp32, which serves as placeholder for the relocation. - // We currently only support disp32 jump targets. - self.emit(&[0u8; 4]); - - // Resolve any pending relocations for the label. - self.resolve(op1); - } -} - -// -- Encoder helper. - -/// Encode helper for register-register instructions. -trait EncodeRR { - fn legacy_prefix() -> Option { - None - } - - fn rex(op1: T, op2: T) -> Option { - if op1.need_rex() || op2.need_rex() { - Some(rex(op1.rexw(), op2.idx(), 0, op1.idx())) - } else { - None - } - } -} - -impl EncodeRR for Asm {} -impl EncodeRR for Asm {} -impl EncodeRR for Asm { - fn legacy_prefix() -> Option { - Some(0x66) - } -} -impl EncodeRR for Asm {} - -/// Encode helper for register instructions. -trait EncodeR { - fn legacy_prefix() -> Option { - None - } - - fn rex(op1: T) -> Option { - if op1.need_rex() { - Some(rex(op1.rexw(), 0, 0, op1.idx())) - } else { - None - } - } -} - -impl EncodeR for Asm {} -impl EncodeR for Asm {} -impl EncodeR for Asm { - fn legacy_prefix() -> Option { - Some(0x66) - } -} -impl EncodeR for Asm {} - -/// Encode helper for memory-register instructions. -trait EncodeMR { - fn legacy_prefix() -> Option { - None - } - - fn rex(op1: &MemOp, op2: T) -> Option { - if op2.need_rex() || (op1.base().is_ext()) { - Some(rex(op2.rexw(), op2.idx(), 0, op1.base().idx())) - } else { - None - } - } -} - -impl EncodeMR for Asm {} -impl EncodeMR for Asm { - fn legacy_prefix() -> Option { - Some(0x66) - } -} -impl EncodeMR for Asm {} -impl EncodeMR for Asm {} - -/// Encode helper for memory-immediate instructions. -trait EncodeMI { - fn legacy_prefix() -> Option { - None - } - - fn rex(op1: &MemOp) -> Option { - if op1.base().is_ext() { - Some(rex(false, 0, 0, op1.base().idx())) - } else { - None - } - } -} - -impl EncodeMI for Asm {} -impl EncodeMI for Asm { - fn legacy_prefix() -> Option { - Some(0x66) - } -} -impl EncodeMI for Asm {} -- cgit v1.2.3