From c619b7aeb72cb18cc0f76a94e78cc5d9d7c9e89f Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Sun, 26 Feb 2023 20:38:04 +0100 Subject: base version capable to emit different mov insns Experimenting with type system to detect invalid operands during compile time. --- src/insn.rs | 3 + src/lib.rs | 229 +++++++++++++++++++++++++++++++++++++++++++ src/reg.rs | 318 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 550 insertions(+) create mode 100644 src/insn.rs create mode 100644 src/lib.rs create mode 100644 src/reg.rs (limited to 'src') diff --git a/src/insn.rs b/src/insn.rs new file mode 100644 index 0000000..bb1a380 --- /dev/null +++ b/src/insn.rs @@ -0,0 +1,3 @@ +pub trait Mov { + fn mov(&mut self, op1: T, op2: U); +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..7c24704 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,229 @@ +mod insn; +mod reg; + +use reg::Reg; +pub use reg::{Reg16, Reg32, Reg64, Reg8}; + +use insn::Mov; + +pub enum MemOp { + Indirect(Reg64), + IndirectDisp(Reg64, i32), +} + +impl MemOp { + const fn base(&self) -> Reg64 { + match self { + MemOp::Indirect(base) => *base, + MemOp::IndirectDisp(base, ..) => *base, + } + } +} + +/// Encode the `REX` byte. +const fn rex(w: u8, r: u8, x: u8, b: u8) -> u8 { + let r = (r >> 3) & 1; + let x = (x >> 3) & 1; + let b = (b >> 3) & 1; + 0b0100_0000 | ((w & 1) << 3) | (r << 2) | (x << 1) | b +} + +/// Encode the `ModR/M` byte. +const fn modrm(mod_: u8, reg: u8, rm: u8) -> u8 { + ((mod_ & 0b11) << 6) | ((reg & 0b111) << 3) | (rm & 0b111) +} + +pub struct Asm { + buf: Vec, +} + +impl Asm { + pub fn new() -> Asm { + let buf = Vec::with_capacity(1024); + Asm { buf } + } + + pub fn into_code(self) -> Vec { + self.buf + } + + fn emit(&mut self, bytes: &[u8]) { + self.buf.extend_from_slice(bytes); + } + + fn emit_optional(&mut self, bytes: &[Option]) { + for byte in bytes.iter().filter_map(|&b| b) { + self.buf.push(byte); + } + } + + fn emit_at(&mut self, pos: usize, bytes: &[u8]) { + if let Some(buf) = self.buf.get_mut(pos..pos + bytes.len()) { + buf.copy_from_slice(bytes); + } else { + unimplemented!(); + } + } + + pub fn mov(&mut self, op1: T, op2: U) + where + Self: Mov, + { + >::mov(self, op1, op2); + } + + fn encode_rr(&mut self, opc: u8, op1: T, op2: T) + where + Self: EncodeRR, + { + // MR operand encoding. + // op1 -> modrm.rm + // op2 -> modrm.reg + let modrm = modrm( + 0b11, /* mod */ + op2.idx(), /* reg */ + op1.idx(), /* rm */ + ); + + let prefix = >::legacy_prefix(); + let rex = >::rex(op1, op2); + + self.emit_optional(&[prefix, rex]); + self.emit(&[opc, modrm]); + } + + fn encode_mr(&mut self, opc: u8, op1: MemOp, op2: T) + where + Self: EncodeMR, + { + // MR operand encoding. + // op1 -> modrm.rm + // op2 -> modrm.reg + let mode = match op1 { + MemOp::Indirect(..) => { + assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); + 0b00 + } + MemOp::IndirectDisp(..) => { + assert!(!op1.base().need_sib()); + 0b10 + } + }; + + let modrm = modrm( + mode, /* mode */ + op2.idx(), /* reg */ + op1.base().idx(), /* rm */ + ); + let prefix = >::legacy_prefix(); + let rex = >::rex(&op1, op2); + + self.emit_optional(&[prefix, rex]); + self.emit(&[opc, modrm]); + if let MemOp::IndirectDisp(_, disp) = op1 { + self.emit(&disp.to_ne_bytes()); + } + } + + fn encode_rm(&mut self, opc: u8, op1: T, op2: MemOp) + where + Self: EncodeMR, + { + // RM operand encoding. + // op1 -> modrm.reg + // op2 -> modrm.rm + self.encode_mr(opc, op2, op1); + } +} + +// -- Encoder helper. + +trait EncodeRR { + fn legacy_prefix() -> Option { + None + } + + fn rex(op1: T, op2: T) -> Option { + if op1.need_rex() || op2.need_rex() { + Some(rex(op1.rexw(), op2.idx(), 0, op1.idx())) + } else { + None + } + } +} + +impl EncodeRR for Asm {} +impl EncodeRR for Asm {} +impl EncodeRR for Asm { + fn legacy_prefix() -> Option { + Some(0x66) + } +} +impl EncodeRR for Asm {} + +trait EncodeMR { + fn legacy_prefix() -> Option { + None + } + + fn rex(op1: &MemOp, op2: T) -> Option { + if op1.base().need_rex() || op2.need_rex() { + Some(rex(op2.rexw(), op2.idx(), 0, op1.base().idx())) + } else { + None + } + } +} + +impl EncodeMR for Asm {} +impl EncodeMR for Asm {} + +// -- Instruction implementations. + +impl Mov for Asm { + fn mov(&mut self, op1: Reg64, op2: Reg64) { + self.encode_rr(0x89, op1, op2); + } +} + +impl Mov for Asm { + fn mov(&mut self, op1: Reg32, op2: Reg32) { + self.encode_rr(0x89, op1, op2); + } +} + +impl Mov for Asm { + fn mov(&mut self, op1: Reg16, op2: Reg16) { + self.encode_rr(0x89, op1, op2); + } +} + +impl Mov for Asm { + fn mov(&mut self, op1: Reg8, op2: Reg8) { + self.encode_rr(0x88, op1, op2); + } +} + +impl Mov for Asm { + fn mov(&mut self, op1: MemOp, op2: Reg64) { + self.encode_mr(0x89, op1, op2); + } +} + +impl Mov for Asm { + fn mov(&mut self, op1: MemOp, op2: Reg32) { + self.encode_mr(0x89, op1, op2); + } +} + +impl Mov for Asm { + fn mov(&mut self, op1: Reg64, op2: MemOp) { + self.encode_rm(0x8b, op1, op2); + } +} + +impl Mov for Asm { + fn mov(&mut self, op1: Reg32, op2: MemOp) { + self.encode_rm(0x8b, op1, op2); + } +} diff --git a/src/reg.rs b/src/reg.rs new file mode 100644 index 0000000..bf30a40 --- /dev/null +++ b/src/reg.rs @@ -0,0 +1,318 @@ +/// Trait to interact with register operands. +pub(crate) trait Reg { + /// Get the raw x64 register code. + fn idx(&self) -> u8; + + /// Get the `REX.W` bit. + fn rexw(&self) -> u8; + + /// Check if the register requires a `REX` byte. + fn need_rex(&self) -> bool { + self.idx() > 7 || self.rexw() > 0 + } + + /// Check if the register requires a `SIB` byte if used as addressing operand. + /// + /// See [64 bit + /// addressing](https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing) for + /// further details. + fn need_sib(&self) -> bool { + self.idx() == 4 || self.idx() == 12 + } + + /// Check if the register is interpreted as `PC` relative if used as addressing operand. + /// + /// See [64 bit + /// addressing](https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing) for + /// further details. + fn is_pc_rel(&self) -> bool { + self.idx() == 5 || self.idx() == 13 + } +} + +macro_rules! impl_reg { + (ENUM_ONLY, $name:ident, { $($reg: ident),+ $(,)? }) => { + /// General purpose register operands. + #[allow(non_camel_case_types)] + #[derive(Copy, Clone)] + #[repr(u8)] + pub enum $name { + $( $reg, )+ + } + + #[cfg(test)] + impl $name { + fn iter() -> impl Iterator { + use $name::*; + [$( $reg, )+].iter() + } + } + }; + + ($name:ident, $rexw: expr, { $($reg: ident),+ $(,)? }) => { + impl_reg!(ENUM_ONLY, $name, { $( $reg, )+ }); + + impl Reg for $name { + /// Get the raw x64 register code. + fn idx(&self) -> u8 { + *self as u8 + } + + /// Get the `REX.W` bit. + fn rexw(&self) -> u8 { + $rexw + } + } + } +} + +impl_reg!(Reg64, 1, { rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15 }); +impl_reg!(Reg32, 0, { eax, ecx, edx, ebx, esp, ebp, esi, edi, r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d }); +impl_reg!(Reg16, 0, { ax, cx, dx, bx, sp, bp, si, di, r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w }); +impl_reg!(ENUM_ONLY, + Reg8, { al, cl, dl, bl, spl, bpl, sil, dil, r8l, r9l, r10l, r11l, r12l, r13l, r14l, r15l, + ah, ch, dh, bh }); + +impl Reg for Reg8 { + /// Get the raw x64 register code. + fn idx(&self) -> u8 { + match self { + Reg8::ah => 4, + Reg8::ch => 5, + Reg8::dh => 6, + Reg8::bh => 7, + _ => *self as u8, + } + } + + /// Get the `REX.W` bit. + fn rexw(&self) -> u8 { + 0 + } + + /// Check whether the gp register needs a `REX` prefix + /// Check if the register requires a `REX` byte. + /// + /// For 1 byte addressing, register indexes `[4:7]` require a `REX` prefix, or else they will + /// be decoded as `{AH, CH, DH, BH}` accordingly. + /// + /// See [Registers](https://wiki.osdev.org/X86-64_Instruction_Encoding#Registers) for + /// further details or conduct `Table 3-1. Register Codes` in the *Intel Software Developers + /// Manual - Volume 2*. + fn need_rex(&self) -> bool { + self.idx() > 7 || matches!(self, Reg8::spl | Reg8::bpl | Reg8::sil | Reg8::dil) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_reg8() { + use Reg8::*; + + for r in Reg8::iter() { + // Check register index. + let idx = match r { + al => 0, + cl => 1, + dl => 2, + bl => 3, + spl => 4, + bpl => 5, + sil => 6, + dil => 7, + r8l => 8, + r9l => 9, + r10l => 10, + r11l => 11, + r12l => 12, + r13l => 13, + r14l => 14, + r15l => 15, + ah => 4, + ch => 5, + dh => 6, + bh => 7, + }; + assert_eq!(r.idx(), idx); + + // Check REX.W bit. + assert_eq!(r.rexw(), 0); + + // Check need REX byte. + let rex = match r { + r8l | r9l | r10l | r11l | r12l | r13l | r14l | r15l | spl | bpl | sil | dil => true, + _ => false, + }; + assert_eq!(r.need_rex(), rex); + + // Check need SIB byte. + let sib = match r { + spl | r12l | ah => true, + _ => false, + }; + assert_eq!(r.need_sib(), sib); + + // Check if is PC relative addressing. + let rel = match r { + bpl | r13l | ch => true, + _ => false, + }; + assert_eq!(r.is_pc_rel(), rel); + } + } + + #[test] + fn test_reg16() { + use Reg16::*; + + for r in Reg16::iter() { + // Check register index. + let idx = match r { + ax => 0, + cx => 1, + dx => 2, + bx => 3, + sp => 4, + bp => 5, + si => 6, + di => 7, + r8w => 8, + r9w => 9, + r10w => 10, + r11w => 11, + r12w => 12, + r13w => 13, + r14w => 14, + r15w => 15, + }; + assert_eq!(r.idx(), idx); + + // Check REX.W bit. + assert_eq!(r.rexw(), 0); + + // Check need REX byte. + let rex = match r { + r8w | r9w | r10w | r11w | r12w | r13w | r14w | r15w => true, + _ => false, + }; + assert_eq!(r.need_rex(), rex); + + // Check need SIB byte. + let sib = match r { + sp | r12w => true, + _ => false, + }; + assert_eq!(r.need_sib(), sib); + + // Check if is PC relative addressing. + let rel = match r { + bp | r13w => true, + _ => false, + }; + assert_eq!(r.is_pc_rel(), rel); + } + } + + #[test] + fn test_reg32() { + use Reg32::*; + + for r in Reg32::iter() { + // Check register index. + let idx = match r { + eax => 0, + ecx => 1, + edx => 2, + ebx => 3, + esp => 4, + ebp => 5, + esi => 6, + edi => 7, + r8d => 8, + r9d => 9, + r10d => 10, + r11d => 11, + r12d => 12, + r13d => 13, + r14d => 14, + r15d => 15, + }; + assert_eq!(r.idx(), idx); + + // Check REX.W bit. + assert_eq!(r.rexw(), 0); + + // Check need REX byte. + let rex = match r { + r8d | r9d | r10d | r11d | r12d | r13d | r14d | r15d => true, + _ => false, + }; + assert_eq!(r.need_rex(), rex); + + // Check need SIB byte. + let sib = match r { + esp | r12d => true, + _ => false, + }; + assert_eq!(r.need_sib(), sib); + + // Check if is PC relative addressing. + let rel = match r { + ebp | r13d => true, + _ => false, + }; + assert_eq!(r.is_pc_rel(), rel); + } + } + + #[test] + fn test_reg64() { + use Reg64::*; + + for r in Reg64::iter() { + // Check register index. + let idx = match r { + rax => 0, + rcx => 1, + rdx => 2, + rbx => 3, + rsp => 4, + rbp => 5, + rsi => 6, + rdi => 7, + r8 => 8, + r9 => 9, + r10 => 10, + r11 => 11, + r12 => 12, + r13 => 13, + r14 => 14, + r15 => 15, + }; + assert_eq!(r.idx(), idx); + + // Check REX.W bit. + assert_eq!(r.rexw(), 1); + + // Check need REX byte. + assert_eq!(r.need_rex(), true); + + // Check need SIB byte. + let sib = match r { + rsp | r12 => true, + _ => false, + }; + assert_eq!(r.need_sib(), sib); + + // Check if is PC relative addressing. + let rel = match r { + rbp | r13 => true, + _ => false, + }; + assert_eq!(r.is_pc_rel(), rel); + } + } +} -- cgit v1.2.3