From 36345d8ab93d23d9f94372863e3747a07222b6ce Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Fri, 6 Dec 2024 22:17:01 +0100 Subject: asm: add inc, xor insn and initial support for memory base+idx --- src/asm.rs | 67 +++++++++++++++++++++++++++++++++++++++++---------------- src/insn.rs | 14 ++++++++++++ src/insn/add.rs | 12 +++++++++++ src/insn/inc.rs | 14 ++++++++++++ src/insn/xor.rs | 8 +++++++ src/lib.rs | 18 ++++++++++++++++ 6 files changed, 115 insertions(+), 18 deletions(-) create mode 100644 src/insn/inc.rs create mode 100644 src/insn/xor.rs diff --git a/src/asm.rs b/src/asm.rs index 010bb68..7deeb70 100644 --- a/src/asm.rs +++ b/src/asm.rs @@ -18,6 +18,11 @@ const fn modrm(mod_: u8, reg: u8, rm: u8) -> u8 { ((mod_ & 0b11) << 6) | ((reg & 0b111) << 3) | (rm & 0b111) } +/// Encode the `SIB` byte. +const fn sib(scale: u8, index: u8, base: u8) -> u8 { + ((scale & 0b11) << 6) | ((index & 0b111) << 3) | (base & 0b111) +} + /// `x64` jit assembler. pub struct Asm { buf: Vec, @@ -157,21 +162,29 @@ impl Asm { // MI operand encoding. // op1 -> modrm.rm // op2 -> imm - let mode = match op1 { + let (mode, rm) = match op1 { MemOp::Indirect(..) => { assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); - 0b00 + (0b00, op1.base().idx()) } MemOp::IndirectDisp(..) => { assert!(!op1.base().need_sib()); - 0b10 + (0b10, op1.base().idx()) + } + MemOp::IndirectBaseIndex(..) => { + assert!(!op1.base().is_pc_rel()); + // Using rsp as index register is interpreted as just base w/o offset. + // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2 + // Disallow this case, as guard for the user. + assert!(!matches!(op1.index(), Reg64::rsp)); + (0b00, 0b100) } }; let modrm = modrm( - mode, /* mode */ - opc_ext, /* reg */ - op1.base().idx(), /* rm */ + mode, /* mode */ + opc_ext, /* reg */ + rm, /* rm */ ); let prefix = >::legacy_prefix(); @@ -179,8 +192,10 @@ impl Asm { self.emit_optional(&[prefix, rex]); self.emit(&[opc, modrm]); - if let MemOp::IndirectDisp(_, disp) = op1 { - self.emit(&disp.to_ne_bytes()); + match op1 { + MemOp::Indirect(..) => {} + MemOp::IndirectDisp(_, disp) => self.emit(&disp.to_ne_bytes()), + MemOp::IndirectBaseIndex(base, index) => self.emit(&[sib(0, index.idx(), base.idx())]), } self.emit(op2.bytes()); } @@ -193,29 +208,40 @@ impl Asm { // MR operand encoding. // op1 -> modrm.rm // op2 -> modrm.reg - let mode = match op1 { + let (mode, rm) = match op1 { MemOp::Indirect(..) => { assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); - 0b00 + (0b00, op1.base().idx()) } MemOp::IndirectDisp(..) => { assert!(!op1.base().need_sib()); - 0b10 + (0b10, op1.base().idx()) + } + MemOp::IndirectBaseIndex(..) => { + assert!(!op1.base().is_pc_rel()); + // Using rsp as index register is interpreted as just base w/o offset. + // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2 + // Disallow this case, as guard for the user. + assert!(!matches!(op1.index(), Reg64::rsp)); + (0b00, 0b100) } }; let modrm = modrm( - mode, /* mode */ - op2.idx(), /* reg */ - op1.base().idx(), /* rm */ + mode, /* mode */ + op2.idx(), /* reg */ + rm, /* rm */ ); + let prefix = >::legacy_prefix(); let rex = >::rex(&op1, op2); self.emit_optional(&[prefix, rex]); self.emit(&[opc, modrm]); - if let MemOp::IndirectDisp(_, disp) = op1 { - self.emit(&disp.to_ne_bytes()); + match op1 { + MemOp::Indirect(..) => {} + MemOp::IndirectDisp(_, disp) => self.emit(&disp.to_ne_bytes()), + MemOp::IndirectBaseIndex(base, index) => self.emit(&[sib(0, index.idx(), base.idx())]), } } @@ -305,7 +331,12 @@ pub(crate) trait EncodeMR { fn rex(op1: &MemOp, op2: T) -> Option { if op2.need_rex() || (op1.base().is_ext()) { - Some(rex(op2.rexw(), op2.idx(), 0, op1.base().idx())) + Some(rex( + op2.rexw(), + op2.idx(), + op1.index().idx(), + op1.base().idx(), + )) } else { None } @@ -329,7 +360,7 @@ pub(crate) trait EncodeMI { fn rex(op1: &MemOp) -> Option { if op1.base().is_ext() { - Some(rex(false, 0, 0, op1.base().idx())) + Some(rex(false, 0, op1.index().idx(), op1.base().idx())) } else { None } diff --git a/src/insn.rs b/src/insn.rs index c02206c..2f80d3d 100644 --- a/src/insn.rs +++ b/src/insn.rs @@ -6,6 +6,7 @@ mod cmovnz; mod cmovz; mod cmp; mod dec; +mod inc; mod jmp; mod jnz; mod jz; @@ -15,6 +16,7 @@ mod pop; mod push; mod ret; mod test; +mod xor; /// Trait for [`add`](https://www.felixcloutier.com/x86/add) instruction kinds. pub trait Add { @@ -57,6 +59,12 @@ pub trait Dec { fn dec(&mut self, op1: T); } +/// Trait for [`inc`](https://www.felixcloutier.com/x86/inc) instruction kinds. +pub trait Inc { + /// Emit a increment instruction. + fn inc(&mut self, op1: T); +} + /// Trait for [`jmp`](https://www.felixcloutier.com/x86/jmp) instruction kinds. pub trait Jmp { /// Emit an unconditional jump instruction. @@ -101,3 +109,9 @@ pub trait Test { /// `SF`, `ZF`, and `PF` status flags, the result is discarded. fn test(&mut self, op1: T, op2: U); } + +/// Trait for [`xor`](https://www.felixcloutier.com/x86/xor) instruction kinds. +pub trait Xor { + /// Emit a xor instruction. + fn xor(&mut self, op1: T, op2: U); +} diff --git a/src/insn/add.rs b/src/insn/add.rs index b8fe96c..d5312be 100644 --- a/src/insn/add.rs +++ b/src/insn/add.rs @@ -13,6 +13,12 @@ impl Add for Asm { } } +impl Add for Asm { + fn add(&mut self, op1: MemOp, op2: Reg64) { + self.encode_mr(0x01, op1, op2); + } +} + impl Add for Asm { fn add(&mut self, op1: MemOp, op2: Reg16) { self.encode_mr(0x01, op1, op2); @@ -24,3 +30,9 @@ impl Add for Asm { self.encode_mi(0x81, 0, op1, op2); } } + +impl Add for Asm { + fn add(&mut self, op1: Reg64, op2: MemOp) { + self.encode_rm(0x03, op1, op2); + } +} diff --git a/src/insn/inc.rs b/src/insn/inc.rs new file mode 100644 index 0000000..ede780a --- /dev/null +++ b/src/insn/inc.rs @@ -0,0 +1,14 @@ +use super::Inc; +use crate::{Asm, Reg32, Reg64}; + +impl Inc for Asm { + fn inc(&mut self, op1: Reg64) { + self.encode_r(0xff, 0, op1); + } +} + +impl Inc for Asm { + fn inc(&mut self, op1: Reg32) { + self.encode_r(0xff, 0, op1); + } +} diff --git a/src/insn/xor.rs b/src/insn/xor.rs new file mode 100644 index 0000000..b1fdc48 --- /dev/null +++ b/src/insn/xor.rs @@ -0,0 +1,8 @@ +use super::Xor; +use crate::{Asm, Reg64}; + +impl Xor for Asm { + fn xor(&mut self, op1: Reg64, op2: Reg64) { + self.encode_rr(&[0x31], op1, op2); + } +} diff --git a/src/lib.rs b/src/lib.rs index 3b7b832..131440a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,6 +94,9 @@ pub enum MemOp { /// An indirect memory operand with additional displacement, eg `mov [rax + 0x10], rcx`. IndirectDisp(Reg64, i32), + + /// An indirect memory operand in the form base + index, eg `mov [rax + rcx], rdx`. + IndirectBaseIndex(Reg64, Reg64), } impl MemOp { @@ -102,6 +105,21 @@ impl MemOp { match self { MemOp::Indirect(base) => *base, MemOp::IndirectDisp(base, ..) => *base, + MemOp::IndirectBaseIndex(base, ..) => *base, + } + } + + /// Get the index register of the memory operand. + fn index(&self) -> Reg64 { + // Return zero index register for memory operands w/o index register. + let zero_index = Reg64::rax; + use reg::Reg; + assert_eq!(zero_index.idx(), 0); + + match self { + MemOp::Indirect(..) => zero_index, + MemOp::IndirectDisp(..) => zero_index, + MemOp::IndirectBaseIndex(.., index) => *index, } } } -- cgit v1.2.3