From 2699292790476eccd726fc5dae179b3a688a1468 Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Fri, 6 Dec 2024 23:32:58 +0100 Subject: asm: add initial support for memory operand only instructions * add dec, inc instruction for with memory operand --- src/asm.rs | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- src/insn/dec.rs | 26 ++++++++++++++++- src/insn/inc.rs | 26 ++++++++++++++++- src/lib.rs | 39 ++++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 4 deletions(-) diff --git a/src/asm.rs b/src/asm.rs index 7deeb70..711de8a 100644 --- a/src/asm.rs +++ b/src/asm.rs @@ -154,6 +154,52 @@ impl Asm { self.emit(&[opc, modrm]); } + /// Encode a memory operand instruction. + pub(crate) fn encode_m(&mut self, opc: u8, opc_ext: u8, op1: T) + where + Self: EncodeM, + { + let op1 = op1.mem_op(); + + // M operand encoding. + // op1 -> modrm.rm + let (mode, rm) = match op1 { + MemOp::Indirect(..) => { + assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); + (0b00, op1.base().idx()) + } + MemOp::IndirectDisp(..) => { + assert!(!op1.base().need_sib()); + (0b10, op1.base().idx()) + } + MemOp::IndirectBaseIndex(..) => { + assert!(!op1.base().is_pc_rel()); + // Using rsp as index register is interpreted as just base w/o offset. + // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2 + // Disallow this case, as guard for the user. + assert!(!matches!(op1.index(), Reg64::rsp)); + (0b00, 0b100) + } + }; + + let modrm = modrm( + mode, /* mode */ + opc_ext, /* reg */ + rm, /* rm */ + ); + + let prefix = >::legacy_prefix(); + let rex = >::rex(&op1); + + self.emit_optional(&[prefix, rex]); + self.emit(&[opc, modrm]); + match op1 { + MemOp::Indirect(..) => {} + MemOp::IndirectDisp(_, disp) => self.emit(&disp.to_ne_bytes()), + MemOp::IndirectBaseIndex(base, index) => self.emit(&[sib(0, index.idx(), base.idx())]), + } + } + /// Encode a memory-immediate instruction. pub(crate) fn encode_mi(&mut self, opc: u8, opc_ext: u8, op1: MemOp, op2: T) where @@ -330,7 +376,7 @@ pub(crate) trait EncodeMR { } fn rex(op1: &MemOp, op2: T) -> Option { - if op2.need_rex() || (op1.base().is_ext()) { + if op2.need_rex() || op1.base().is_ext() || op1.index().is_ext() { Some(rex( op2.rexw(), op2.idx(), @@ -359,7 +405,7 @@ pub(crate) trait EncodeMI { } fn rex(op1: &MemOp) -> Option { - if op1.base().is_ext() { + if op1.base().is_ext() || op1.index().is_ext() { Some(rex(false, 0, op1.index().idx(), op1.base().idx())) } else { None @@ -374,3 +420,40 @@ impl EncodeMI for Asm { } } impl EncodeMI for Asm {} + +/// Encode helper for memory operand instructions. +pub(crate) trait EncodeM { + fn legacy_prefix() -> Option { + None + } + + fn rex(op1: &MemOp) -> Option { + if op1.base().is_ext() || op1.index().is_ext() || Self::is_64bit() { + Some(rex( + Self::is_64bit(), + 0, + op1.index().idx(), + op1.base().idx(), + )) + } else { + None + } + } + + fn is_64bit() -> bool { + false + } +} + +impl EncodeM for Asm {} +impl EncodeM for Asm { + fn legacy_prefix() -> Option { + Some(0x66) + } +} +impl EncodeM for Asm {} +impl EncodeM for Asm { + fn is_64bit() -> bool { + true + } +} diff --git a/src/insn/dec.rs b/src/insn/dec.rs index 1377d1c..66b83aa 100644 --- a/src/insn/dec.rs +++ b/src/insn/dec.rs @@ -1,5 +1,5 @@ use super::Dec; -use crate::{Asm, Reg32, Reg64}; +use crate::{Asm, MemOp16, MemOp32, MemOp64, MemOp8, Reg32, Reg64}; impl Dec for Asm { fn dec(&mut self, op1: Reg64) { @@ -12,3 +12,27 @@ impl Dec for Asm { self.encode_r(0xff, 1, op1); } } + +impl Dec for Asm { + fn dec(&mut self, op1: MemOp8) { + self.encode_m(0xfe, 1, op1); + } +} + +impl Dec for Asm { + fn dec(&mut self, op1: MemOp16) { + self.encode_m(0xff, 1, op1); + } +} + +impl Dec for Asm { + fn dec(&mut self, op1: MemOp32) { + self.encode_m(0xff, 1, op1); + } +} + +impl Dec for Asm { + fn dec(&mut self, op1: MemOp64) { + self.encode_m(0xff, 1, op1); + } +} diff --git a/src/insn/inc.rs b/src/insn/inc.rs index ede780a..1530d63 100644 --- a/src/insn/inc.rs +++ b/src/insn/inc.rs @@ -1,5 +1,5 @@ use super::Inc; -use crate::{Asm, Reg32, Reg64}; +use crate::{Asm, MemOp16, MemOp32, MemOp64, MemOp8, Reg32, Reg64}; impl Inc for Asm { fn inc(&mut self, op1: Reg64) { @@ -12,3 +12,27 @@ impl Inc for Asm { self.encode_r(0xff, 0, op1); } } + +impl Inc for Asm { + fn inc(&mut self, op1: MemOp8) { + self.encode_m(0xfe, 0, op1); + } +} + +impl Inc for Asm { + fn inc(&mut self, op1: MemOp16) { + self.encode_m(0xff, 0, op1); + } +} + +impl Inc for Asm { + fn inc(&mut self, op1: MemOp32) { + self.encode_m(0xff, 0, op1); + } +} + +impl Inc for Asm { + fn inc(&mut self, op1: MemOp64) { + self.encode_m(0xff, 0, op1); + } +} diff --git a/src/lib.rs b/src/lib.rs index 131440a..6bbfcbf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -88,6 +88,7 @@ pub use reg::{Reg16, Reg32, Reg64, Reg8}; pub use rt::Runtime; /// Type representing a memory operand. +#[derive(Clone, Copy)] pub enum MemOp { /// An indirect memory operand, eg `mov [rax], rcx`. Indirect(Reg64), @@ -123,3 +124,41 @@ impl MemOp { } } } + +/// Trait to give size hints for memory operands. +trait MemOpSized { + fn mem_op(&self) -> MemOp; +} + +macro_rules! impl_memop_sized { + ($(#[$doc:meta] $name:ident)+) => { + $( + #[$doc] + pub struct $name(MemOp); + + impl $name { + /// Create a memory with size hint from a raw memory operand. + pub fn from(op: MemOp) -> Self { + Self(op) + } + } + + impl MemOpSized for $name { + fn mem_op(&self) -> MemOp { + self.0 + } + } + )+ + }; +} + +impl_memop_sized!( + /// A memory operand with a word (8 bit) size hint. + MemOp8 + /// A memory operand with a word (16 bit) size hint. + MemOp16 + /// A memory operand with a dword (32 bit) size hint. + MemOp32 + /// A memory operand with a qword (64 bit) size hint. + MemOp64 +); -- cgit v1.2.3