From 758f014afb8ec5c20ef2fc862fc12e80f65d3d25 Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Fri, 13 Dec 2024 01:13:20 +0100 Subject: mem: make all memory operands explicit in size * remove non size explicit MemOp * introduce private Mem trait * implement Mem8, Mem16, Mem32 and Mem64 operands * implement EncodeX helpers based on explicit memory operands * fixup instructions with explicit memory operands * fixup examples --- examples/bf.rs | 20 +++++--- examples/tiny_vm.rs | 6 +-- src/asm.rs | 139 +++++++++++++++++++++++++--------------------------- src/insn/add.rs | 52 +++++++++++++------- src/insn/cmp.rs | 10 ++-- src/insn/dec.rs | 18 +++---- src/insn/inc.rs | 18 +++---- src/insn/mov.rs | 38 +++++++------- src/insn/sub.rs | 8 +-- src/insn/test.rs | 6 +-- src/lib.rs | 78 +---------------------------- src/mem.rs | 116 +++++++++++++++++++++++++++++++++++++++++++ tests/mov.rs | 69 +++++++++++++------------- 13 files changed, 319 insertions(+), 259 deletions(-) create mode 100644 src/mem.rs diff --git a/examples/bf.rs b/examples/bf.rs index 5d0da0b..fb75c09 100644 --- a/examples/bf.rs +++ b/examples/bf.rs @@ -19,7 +19,7 @@ use std::io::Write; use juicebox_asm::insn::*; use juicebox_asm::Runtime; -use juicebox_asm::{Asm, Imm64, Imm8, Label, MemOp, MemOp8, Reg64, Reg8}; +use juicebox_asm::{Asm, Imm64, Imm8, Label, Mem8, Reg64, Reg8}; // -- BRAINFUCK INTERPRETER ---------------------------------------------------- @@ -204,12 +204,14 @@ fn run_jit(prog: &str) { // single add instruction during compile time. match vm.imem[pc..].iter().take_while(|&&i| i.eq(&'+')).count() { - 1 => asm.inc(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx))), + 1 => { + asm.inc(Mem8::indirect_base_index(dmem_base, dmem_idx)); + } cnt if cnt <= i8::MAX as usize => { // For add m64, imm8, the immediate is sign-extend and // hence treated as signed. asm.add( - MemOp::IndirectBaseIndex(dmem_base, dmem_idx), + Mem8::indirect_base_index(dmem_base, dmem_idx), Imm8::from(cnt as u8), ); @@ -225,12 +227,14 @@ fn run_jit(prog: &str) { // single sub instruction during compile time. match vm.imem[pc..].iter().take_while(|&&i| i.eq(&'-')).count() { - 1 => asm.dec(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx))), + 1 => { + asm.dec(Mem8::indirect_base_index(dmem_base, dmem_idx)); + } cnt if cnt <= i8::MAX as usize => { // For sub m64, imm8, the immediate is sign-extend and // hence treated as signed. asm.sub( - MemOp::IndirectBaseIndex(dmem_base, dmem_idx), + Mem8::indirect_base_index(dmem_base, dmem_idx), Imm8::from(cnt as u8), ); @@ -247,7 +251,7 @@ fn run_jit(prog: &str) { // then call into putchar. Since we stored all out vm state in // callee saved registers we don't need to save any registers // before the call. - asm.mov(Reg8::dil, MemOp::IndirectBaseIndex(dmem_base, dmem_idx)); + asm.mov(Reg8::dil, Mem8::indirect_base_index(dmem_base, dmem_idx)); asm.mov(Reg64::rax, Imm64::from(putchar as usize)); asm.call(Reg64::rax); } @@ -263,7 +267,7 @@ fn run_jit(prog: &str) { // Goto label_pair.0 if data memory at active cell is 0. // if vm.dmem[vm.dptr] == 0 goto label_pair.0 asm.cmp( - MemOp::IndirectBaseIndex(dmem_base, dmem_idx), + Mem8::indirect_base_index(dmem_base, dmem_idx), Imm8::from(0u8), ); asm.jz(&mut label_pair.0); @@ -280,7 +284,7 @@ fn run_jit(prog: &str) { // Goto label_pair.1 if data memory at active cell is not 0. // if vm.dmem[vm.dptr] != 0 goto label_pair.1 asm.cmp( - MemOp::IndirectBaseIndex(dmem_base, dmem_idx), + Mem8::indirect_base_index(dmem_base, dmem_idx), Imm8::from(0u8), ); asm.jnz(&mut label_pair.1); diff --git a/examples/tiny_vm.rs b/examples/tiny_vm.rs index 1f4c653..7b40063 100644 --- a/examples/tiny_vm.rs +++ b/examples/tiny_vm.rs @@ -38,7 +38,7 @@ use juicebox_asm::insn::*; use juicebox_asm::Runtime; -use juicebox_asm::{Asm, Imm16, Imm64, MemOp, Reg16, Reg64}; +use juicebox_asm::{Asm, Imm16, Imm64, Mem16, Reg16, Reg64}; /// A guest physical address. pub struct PhysAddr(pub u16); @@ -285,11 +285,11 @@ impl TinyVm { // Generate memory operand into regs for guest register. let reg_op = |r: TinyReg| { - MemOp::IndirectDisp(Reg64::rdi, (r.idx() * 2).try_into().expect("only 3 regs")) + Mem16::indirect_disp(Reg64::rdi, (r.idx() * 2).try_into().expect("only 3 regs")) }; // Generate memory operand into dmem for guest phys address. - let mem_op = |paddr: u16| MemOp::IndirectDisp(Reg64::rsi, paddr.into()); + let mem_op = |paddr: u16| Mem16::indirect_disp(Reg64::rsi, paddr.into()); // Compute instructions in translated basic block. let bb_icnt = || -> u64 { (pc - self.pc).try_into().unwrap() }; diff --git a/src/asm.rs b/src/asm.rs index 711de8a..616ba87 100644 --- a/src/asm.rs +++ b/src/asm.rs @@ -2,6 +2,7 @@ use crate::*; use imm::Imm; +use mem::{AddrMode, Mem}; use reg::Reg; /// Encode the `REX` byte. @@ -155,24 +156,22 @@ impl Asm { } /// Encode a memory operand instruction. - pub(crate) fn encode_m(&mut self, opc: u8, opc_ext: u8, op1: T) + pub(crate) fn encode_m(&mut self, opc: u8, opc_ext: u8, op1: T) where Self: EncodeM, { - let op1 = op1.mem_op(); - // M operand encoding. // op1 -> modrm.rm - let (mode, rm) = match op1 { - MemOp::Indirect(..) => { + let (mode, rm) = match op1.mode() { + AddrMode::Indirect => { assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); (0b00, op1.base().idx()) } - MemOp::IndirectDisp(..) => { + AddrMode::IndirectDisp => { assert!(!op1.base().need_sib()); (0b10, op1.base().idx()) } - MemOp::IndirectBaseIndex(..) => { + AddrMode::IndirectBaseIndex => { assert!(!op1.base().is_pc_rel()); // Using rsp as index register is interpreted as just base w/o offset. // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2 @@ -193,31 +192,33 @@ impl Asm { self.emit_optional(&[prefix, rex]); self.emit(&[opc, modrm]); - match op1 { - MemOp::Indirect(..) => {} - MemOp::IndirectDisp(_, disp) => self.emit(&disp.to_ne_bytes()), - MemOp::IndirectBaseIndex(base, index) => self.emit(&[sib(0, index.idx(), base.idx())]), + match op1.mode() { + AddrMode::Indirect => {} + AddrMode::IndirectDisp => self.emit(&op1.disp().to_ne_bytes()), + AddrMode::IndirectBaseIndex => { + self.emit(&[sib(0, op1.index().idx(), op1.base().idx())]) + } } } /// Encode a memory-immediate instruction. - pub(crate) fn encode_mi(&mut self, opc: u8, opc_ext: u8, op1: MemOp, op2: T) + pub(crate) fn encode_mi(&mut self, opc: u8, opc_ext: u8, op1: M, op2: T) where - Self: EncodeMI, + Self: EncodeMI, { // MI operand encoding. // op1 -> modrm.rm // op2 -> imm - let (mode, rm) = match op1 { - MemOp::Indirect(..) => { + let (mode, rm) = match op1.mode() { + AddrMode::Indirect => { assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); (0b00, op1.base().idx()) } - MemOp::IndirectDisp(..) => { + AddrMode::IndirectDisp => { assert!(!op1.base().need_sib()); (0b10, op1.base().idx()) } - MemOp::IndirectBaseIndex(..) => { + AddrMode::IndirectBaseIndex => { assert!(!op1.base().is_pc_rel()); // Using rsp as index register is interpreted as just base w/o offset. // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2 @@ -233,37 +234,39 @@ impl Asm { rm, /* rm */ ); - let prefix = >::legacy_prefix(); - let rex = >::rex(&op1); + let prefix = >::legacy_prefix(); + let rex = >::rex(&op1); self.emit_optional(&[prefix, rex]); self.emit(&[opc, modrm]); - match op1 { - MemOp::Indirect(..) => {} - MemOp::IndirectDisp(_, disp) => self.emit(&disp.to_ne_bytes()), - MemOp::IndirectBaseIndex(base, index) => self.emit(&[sib(0, index.idx(), base.idx())]), + match op1.mode() { + AddrMode::Indirect => {} + AddrMode::IndirectDisp => self.emit(&op1.disp().to_ne_bytes()), + AddrMode::IndirectBaseIndex => { + self.emit(&[sib(0, op1.index().idx(), op1.base().idx())]) + } } self.emit(op2.bytes()); } /// Encode a memory-register instruction. - pub(crate) fn encode_mr(&mut self, opc: u8, op1: MemOp, op2: T) + pub(crate) fn encode_mr(&mut self, opc: u8, op1: M, op2: T) where - Self: EncodeMR, + Self: EncodeMR, { // MR operand encoding. // op1 -> modrm.rm // op2 -> modrm.reg - let (mode, rm) = match op1 { - MemOp::Indirect(..) => { + let (mode, rm) = match op1.mode() { + AddrMode::Indirect => { assert!(!op1.base().need_sib() && !op1.base().is_pc_rel()); (0b00, op1.base().idx()) } - MemOp::IndirectDisp(..) => { + AddrMode::IndirectDisp => { assert!(!op1.base().need_sib()); (0b10, op1.base().idx()) } - MemOp::IndirectBaseIndex(..) => { + AddrMode::IndirectBaseIndex => { assert!(!op1.base().is_pc_rel()); // Using rsp as index register is interpreted as just base w/o offset. // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2 @@ -279,22 +282,24 @@ impl Asm { rm, /* rm */ ); - let prefix = >::legacy_prefix(); - let rex = >::rex(&op1, op2); + let prefix = >::legacy_prefix(); + let rex = >::rex(&op1, op2); self.emit_optional(&[prefix, rex]); self.emit(&[opc, modrm]); - match op1 { - MemOp::Indirect(..) => {} - MemOp::IndirectDisp(_, disp) => self.emit(&disp.to_ne_bytes()), - MemOp::IndirectBaseIndex(base, index) => self.emit(&[sib(0, index.idx(), base.idx())]), + match op1.mode() { + AddrMode::Indirect => {} + AddrMode::IndirectDisp => self.emit(&op1.disp().to_ne_bytes()), + AddrMode::IndirectBaseIndex => { + self.emit(&[sib(0, op1.index().idx(), op1.base().idx())]) + } } } /// Encode a register-memory instruction. - pub(crate) fn encode_rm(&mut self, opc: u8, op1: T, op2: MemOp) + pub(crate) fn encode_rm(&mut self, opc: u8, op1: T, op2: M) where - Self: EncodeMR, + Self: EncodeMR, { // RM operand encoding. // op1 -> modrm.reg @@ -370,15 +375,15 @@ impl EncodeR for Asm { impl EncodeR for Asm {} /// Encode helper for memory-register instructions. -pub(crate) trait EncodeMR { +pub(crate) trait EncodeMR { fn legacy_prefix() -> Option { None } - fn rex(op1: &MemOp, op2: T) -> Option { - if op2.need_rex() || op1.base().is_ext() || op1.index().is_ext() { + fn rex(op1: &M, op2: T) -> Option { + if M::is_64() || op2.is_ext() || op1.base().is_ext() || op1.index().is_ext() { Some(rex( - op2.rexw(), + M::is_64(), op2.idx(), op1.index().idx(), op1.base().idx(), @@ -389,71 +394,59 @@ pub(crate) trait EncodeMR { } } -impl EncodeMR for Asm {} -impl EncodeMR for Asm { +impl EncodeMR for Asm {} +impl EncodeMR for Asm { fn legacy_prefix() -> Option { Some(0x66) } } -impl EncodeMR for Asm {} -impl EncodeMR for Asm {} +impl EncodeMR for Asm {} +impl EncodeMR for Asm {} /// Encode helper for memory-immediate instructions. -pub(crate) trait EncodeMI { +pub(crate) trait EncodeMI { fn legacy_prefix() -> Option { None } - fn rex(op1: &MemOp) -> Option { - if op1.base().is_ext() || op1.index().is_ext() { - Some(rex(false, 0, op1.index().idx(), op1.base().idx())) + fn rex(op1: &M) -> Option { + if M::is_64() || op1.base().is_ext() || op1.index().is_ext() { + Some(rex(M::is_64(), 0, op1.index().idx(), op1.base().idx())) } else { None } } } -impl EncodeMI for Asm {} -impl EncodeMI for Asm { +impl EncodeMI for Asm {} +impl EncodeMI for Asm { fn legacy_prefix() -> Option { Some(0x66) } } -impl EncodeMI for Asm {} +impl EncodeMI for Asm {} +impl EncodeMI for Asm {} /// Encode helper for memory operand instructions. -pub(crate) trait EncodeM { +pub(crate) trait EncodeM { fn legacy_prefix() -> Option { None } - fn rex(op1: &MemOp) -> Option { - if op1.base().is_ext() || op1.index().is_ext() || Self::is_64bit() { - Some(rex( - Self::is_64bit(), - 0, - op1.index().idx(), - op1.base().idx(), - )) + fn rex(op1: &M) -> Option { + if M::is_64() || op1.base().is_ext() || op1.index().is_ext() { + Some(rex(M::is_64(), 0, op1.index().idx(), op1.base().idx())) } else { None } } - - fn is_64bit() -> bool { - false - } } -impl EncodeM for Asm {} -impl EncodeM for Asm { +impl EncodeM for Asm {} +impl EncodeM for Asm { fn legacy_prefix() -> Option { Some(0x66) } } -impl EncodeM for Asm {} -impl EncodeM for Asm { - fn is_64bit() -> bool { - true - } -} +impl EncodeM for Asm {} +impl EncodeM for Asm {} diff --git a/src/insn/add.rs b/src/insn/add.rs index 1f5294e..9766b22 100644 --- a/src/insn/add.rs +++ b/src/insn/add.rs @@ -1,44 +1,62 @@ use super::Add; -use crate::{Asm, Imm16, Imm8, MemOp, Reg16, Reg32, Reg64}; +use crate::{Asm, Imm16, Imm8, Mem16, Mem32, Mem64, Mem8, Reg16, Reg32, Reg64}; -impl Add for Asm { - fn add(&mut self, op1: Reg64, op2: Reg64) { +impl Add for Asm { + fn add(&mut self, op1: Reg32, op2: Reg32) { self.encode_rr(&[0x01], op1, op2); } } -impl Add for Asm { - fn add(&mut self, op1: Reg32, op2: Reg32) { +impl Add for Asm { + fn add(&mut self, op1: Reg64, op2: Reg64) { self.encode_rr(&[0x01], op1, op2); } } -impl Add for Asm { - fn add(&mut self, op1: MemOp, op2: Reg64) { +impl Add for Asm { + fn add(&mut self, op1: Mem16, op2: Reg16) { self.encode_mr(0x01, op1, op2); } } -impl Add for Asm { - fn add(&mut self, op1: MemOp, op2: Reg16) { +impl Add for Asm { + fn add(&mut self, op1: Mem64, op2: Reg64) { self.encode_mr(0x01, op1, op2); } } -impl Add for Asm { - fn add(&mut self, op1: MemOp, op2: Imm8) { +impl Add for Asm { + fn add(&mut self, op1: Reg64, op2: Mem64) { + self.encode_rm(0x03, op1, op2); + } +} + +impl Add for Asm { + fn add(&mut self, op1: Mem8, op2: Imm8) { + self.encode_mi(0x80, 0, op1, op2); + } +} + +impl Add for Asm { + fn add(&mut self, op1: Mem16, op2: Imm8) { self.encode_mi(0x83, 0, op1, op2); } } -impl Add for Asm { - fn add(&mut self, op1: MemOp, op2: Imm16) { - self.encode_mi(0x81, 0, op1, op2); +impl Add for Asm { + fn add(&mut self, op1: Mem32, op2: Imm8) { + self.encode_mi(0x83, 0, op1, op2); } } -impl Add for Asm { - fn add(&mut self, op1: Reg64, op2: MemOp) { - self.encode_rm(0x03, op1, op2); +impl Add for Asm { + fn add(&mut self, op1: Mem64, op2: Imm8) { + self.encode_mi(0x83, 0, op1, op2); + } +} + +impl Add for Asm { + fn add(&mut self, op1: Mem16, op2: Imm16) { + self.encode_mi(0x81, 0, op1, op2); } } diff --git a/src/insn/cmp.rs b/src/insn/cmp.rs index 95c513d..2d6f48b 100644 --- a/src/insn/cmp.rs +++ b/src/insn/cmp.rs @@ -1,14 +1,14 @@ use super::Cmp; -use crate::{Asm, Imm16, Imm8, MemOp}; +use crate::{Asm, Imm16, Imm8, Mem16, Mem8}; -impl Cmp for Asm { - fn cmp(&mut self, op1: MemOp, op2: Imm8) { +impl Cmp for Asm { + fn cmp(&mut self, op1: Mem8, op2: Imm8) { self.encode_mi(0x80, 0x7, op1, op2); } } -impl Cmp for Asm { - fn cmp(&mut self, op1: MemOp, op2: Imm16) { +impl Cmp for Asm { + fn cmp(&mut self, op1: Mem16, op2: Imm16) { self.encode_mi(0x81, 0x7, op1, op2); } } diff --git a/src/insn/dec.rs b/src/insn/dec.rs index 66b83aa..545bc54 100644 --- a/src/insn/dec.rs +++ b/src/insn/dec.rs @@ -1,5 +1,5 @@ use super::Dec; -use crate::{Asm, MemOp16, MemOp32, MemOp64, MemOp8, Reg32, Reg64}; +use crate::{Asm, Mem16, Mem32, Mem64, Mem8, Reg32, Reg64}; impl Dec for Asm { fn dec(&mut self, op1: Reg64) { @@ -13,26 +13,26 @@ impl Dec for Asm { } } -impl Dec for Asm { - fn dec(&mut self, op1: MemOp8) { +impl Dec for Asm { + fn dec(&mut self, op1: Mem8) { self.encode_m(0xfe, 1, op1); } } -impl Dec for Asm { - fn dec(&mut self, op1: MemOp16) { +impl Dec for Asm { + fn dec(&mut self, op1: Mem16) { self.encode_m(0xff, 1, op1); } } -impl Dec for Asm { - fn dec(&mut self, op1: MemOp32) { +impl Dec for Asm { + fn dec(&mut self, op1: Mem32) { self.encode_m(0xff, 1, op1); } } -impl Dec for Asm { - fn dec(&mut self, op1: MemOp64) { +impl Dec for Asm { + fn dec(&mut self, op1: Mem64) { self.encode_m(0xff, 1, op1); } } diff --git a/src/insn/inc.rs b/src/insn/inc.rs index 1530d63..810fe3d 100644 --- a/src/insn/inc.rs +++ b/src/insn/inc.rs @@ -1,5 +1,5 @@ use super::Inc; -use crate::{Asm, MemOp16, MemOp32, MemOp64, MemOp8, Reg32, Reg64}; +use crate::{Asm, Mem16, Mem32, Mem64, Mem8, Reg32, Reg64}; impl Inc for Asm { fn inc(&mut self, op1: Reg64) { @@ -13,26 +13,26 @@ impl Inc for Asm { } } -impl Inc for Asm { - fn inc(&mut self, op1: MemOp8) { +impl Inc for Asm { + fn inc(&mut self, op1: Mem8) { self.encode_m(0xfe, 0, op1); } } -impl Inc for Asm { - fn inc(&mut self, op1: MemOp16) { +impl Inc for Asm { + fn inc(&mut self, op1: Mem16) { self.encode_m(0xff, 0, op1); } } -impl Inc for Asm { - fn inc(&mut self, op1: MemOp32) { +impl Inc for Asm { + fn inc(&mut self, op1: Mem32) { self.encode_m(0xff, 0, op1); } } -impl Inc for Asm { - fn inc(&mut self, op1: MemOp64) { +impl Inc for Asm { + fn inc(&mut self, op1: Mem64) { self.encode_m(0xff, 0, op1); } } diff --git a/src/insn/mov.rs b/src/insn/mov.rs index df45bd6..b9aef67 100644 --- a/src/insn/mov.rs +++ b/src/insn/mov.rs @@ -1,5 +1,5 @@ use super::Mov; -use crate::{Asm, Imm16, Imm32, Imm64, Imm8, MemOp, Reg16, Reg32, Reg64, Reg8}; +use crate::{Asm, Imm16, Imm32, Imm64, Imm8, Mem16, Mem32, Mem64, Mem8, Reg16, Reg32, Reg64, Reg8}; // -- MOV : reg reg @@ -29,52 +29,52 @@ impl Mov for Asm { // -- MOV : mem reg -impl Mov for Asm { - fn mov(&mut self, op1: MemOp, op2: Reg64) { +impl Mov for Asm { + fn mov(&mut self, op1: Mem64, op2: Reg64) { self.encode_mr(0x89, op1, op2); } } -impl Mov for Asm { - fn mov(&mut self, op1: MemOp, op2: Reg32) { +impl Mov for Asm { + fn mov(&mut self, op1: Mem32, op2: Reg32) { self.encode_mr(0x89, op1, op2); } } -impl Mov for Asm { - fn mov(&mut self, op1: MemOp, op2: Reg16) { +impl Mov for Asm { + fn mov(&mut self, op1: Mem16, op2: Reg16) { self.encode_mr(0x89, op1, op2); } } -impl Mov for Asm { - fn mov(&mut self, op1: MemOp, op2: Reg8) { +impl Mov for Asm { + fn mov(&mut self, op1: Mem8, op2: Reg8) { self.encode_mr(0x88, op1, op2); } } // -- MOV : reg mem -impl Mov for Asm { - fn mov(&mut self, op1: Reg64, op2: MemOp) { +impl Mov for Asm { + fn mov(&mut self, op1: Reg64, op2: Mem64) { self.encode_rm(0x8b, op1, op2); } } -impl Mov for Asm { - fn mov(&mut self, op1: Reg32, op2: MemOp) { +impl Mov for Asm { + fn mov(&mut self, op1: Reg32, op2: Mem32) { self.encode_rm(0x8b, op1, op2); } } -impl Mov for Asm { - fn mov(&mut self, op1: Reg16, op2: MemOp) { +impl Mov for Asm { + fn mov(&mut self, op1: Reg16, op2: Mem16) { self.encode_rm(0x8b, op1, op2); } } -impl Mov for Asm { - fn mov(&mut self, op1: Reg8, op2: MemOp) { +impl Mov for Asm { + fn mov(&mut self, op1: Reg8, op2: Mem8) { self.encode_rm(0x8a, op1, op2); } } @@ -107,8 +107,8 @@ impl Mov for Asm { // -- MOV : mem imm -impl Mov for Asm { - fn mov(&mut self, op1: MemOp, op2: Imm16) { +impl Mov for Asm { + fn mov(&mut self, op1: Mem16, op2: Imm16) { self.encode_mi(0xc7, 0, op1, op2); } } diff --git a/src/insn/sub.rs b/src/insn/sub.rs index 814744c..d56daae 100644 --- a/src/insn/sub.rs +++ b/src/insn/sub.rs @@ -1,5 +1,5 @@ use super::Sub; -use crate::{Asm, Imm8, MemOp, Reg64}; +use crate::{Asm, Imm8, Mem8, Reg64}; impl Sub for Asm { fn sub(&mut self, op1: Reg64, op2: Reg64) { @@ -7,8 +7,8 @@ impl Sub for Asm { } } -impl Sub for Asm { - fn sub(&mut self, op1: MemOp, op2: Imm8) { - self.encode_mi(0x83, 5, op1, op2); +impl Sub for Asm { + fn sub(&mut self, op1: Mem8, op2: Imm8) { + self.encode_mi(0x80, 5, op1, op2); } } diff --git a/src/insn/test.rs b/src/insn/test.rs index 9bca200..2cf6d26 100644 --- a/src/insn/test.rs +++ b/src/insn/test.rs @@ -1,5 +1,5 @@ use super::Test; -use crate::{Asm, Imm16, MemOp, Reg32, Reg64}; +use crate::{Asm, Imm16, Mem16, Reg32, Reg64}; impl Test for Asm { fn test(&mut self, op1: Reg64, op2: Reg64) { @@ -13,8 +13,8 @@ impl Test for Asm { } } -impl Test for Asm { - fn test(&mut self, op1: MemOp, op2: Imm16) { +impl Test for Asm { + fn test(&mut self, op1: Mem16, op2: Imm16) { self.encode_mi(0xf7, 0, op1, op2); } } diff --git a/src/lib.rs b/src/lib.rs index 6bbfcbf..ef49859 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,6 +76,7 @@ mod asm; mod imm; mod label; +mod mem; mod reg; mod rt; @@ -84,81 +85,6 @@ pub mod insn; pub use asm::Asm; pub use imm::{Imm16, Imm32, Imm64, Imm8}; pub use label::Label; +pub use mem::{Mem16, Mem32, Mem64, Mem8}; pub use reg::{Reg16, Reg32, Reg64, Reg8}; pub use rt::Runtime; - -/// Type representing a memory operand. -#[derive(Clone, Copy)] -pub enum MemOp { - /// An indirect memory operand, eg `mov [rax], rcx`. - Indirect(Reg64), - - /// An indirect memory operand with additional displacement, eg `mov [rax + 0x10], rcx`. - IndirectDisp(Reg64, i32), - - /// An indirect memory operand in the form base + index, eg `mov [rax + rcx], rdx`. - IndirectBaseIndex(Reg64, Reg64), -} - -impl MemOp { - /// Get the base address register of the memory operand. - const fn base(&self) -> Reg64 { - match self { - MemOp::Indirect(base) => *base, - MemOp::IndirectDisp(base, ..) => *base, - MemOp::IndirectBaseIndex(base, ..) => *base, - } - } - - /// Get the index register of the memory operand. - fn index(&self) -> Reg64 { - // Return zero index register for memory operands w/o index register. - let zero_index = Reg64::rax; - use reg::Reg; - assert_eq!(zero_index.idx(), 0); - - match self { - MemOp::Indirect(..) => zero_index, - MemOp::IndirectDisp(..) => zero_index, - MemOp::IndirectBaseIndex(.., index) => *index, - } - } -} - -/// Trait to give size hints for memory operands. -trait MemOpSized { - fn mem_op(&self) -> MemOp; -} - -macro_rules! impl_memop_sized { - ($(#[$doc:meta] $name:ident)+) => { - $( - #[$doc] - pub struct $name(MemOp); - - impl $name { - /// Create a memory with size hint from a raw memory operand. - pub fn from(op: MemOp) -> Self { - Self(op) - } - } - - impl MemOpSized for $name { - fn mem_op(&self) -> MemOp { - self.0 - } - } - )+ - }; -} - -impl_memop_sized!( - /// A memory operand with a word (8 bit) size hint. - MemOp8 - /// A memory operand with a word (16 bit) size hint. - MemOp16 - /// A memory operand with a dword (32 bit) size hint. - MemOp32 - /// A memory operand with a qword (64 bit) size hint. - MemOp64 -); diff --git a/src/mem.rs b/src/mem.rs new file mode 100644 index 0000000..6a87eb8 --- /dev/null +++ b/src/mem.rs @@ -0,0 +1,116 @@ +//! Definition of different addressing modes and memory operande used as input +//! and ouput operands in various instructions. + +use crate::Reg64; + +#[derive(Clone, Copy)] +pub(crate) enum AddrMode { + /// An indirect memory operand, eg `mov [rax], rcx`. + Indirect, + /// An indirect memory operand with additional displacement, eg `mov [rax + 0x10], rcx`. + IndirectDisp, + /// An indirect memory operand in the form base + index, eg `mov [rax + rcx], rdx`. + IndirectBaseIndex, +} + +/// Trait to interact with memory operands. +pub(crate) trait Mem { + /// Get the addressing mode [`AddrMode`] of the memory operand. + fn mode(&self) -> AddrMode; + + /// Get the base address register of the memory operand. + fn base(&self) -> Reg64; + + /// Get the index register of the memory operand. + fn index(&self) -> Reg64; + + /// Get the displacement of the memory operand. + fn disp(&self) -> i32; + + /// Check if memory operand is 64 bit. + fn is_64() -> bool; +} + +macro_rules! impl_mem { + ($(#[$doc:meta] $name:ident)+) => { + $( + #[$doc] + pub struct $name { + mode: AddrMode, + base: Reg64, + index: Reg64, + disp: i32, + } + + impl Mem for $name { + fn mode(&self) -> AddrMode { + self.mode + } + + fn base(&self) -> Reg64 { + self.base + } + + fn index(&self) -> Reg64 { + self.index + } + + fn disp(&self) -> i32 { + self.disp + } + + fn is_64() -> bool { + use std::any::TypeId; + TypeId::of::() == TypeId::of::() + } + } + + impl $name { + /// Create a memory operand with `indirect` addressing mode. + /// For example `mov [rax], rcx`. + pub fn indirect(base: Reg64) -> Self { + Self { + mode: AddrMode::Indirect, + base, + index: Reg64::rax, /* zero index */ + disp: 0, + } + } + + /// Create a memory operand with `indirect + displacement` + /// addressing mode. + /// For example `mov [rax + 0x10], rcx`. + pub fn indirect_disp(base: Reg64, disp: i32) -> Self { + Self { + mode: AddrMode::IndirectDisp, + base, + index: Reg64::rax, /* zero index */ + disp, + } + } + + /// Create a memory operand with `base + index` addressing mode. + /// For example `mov [rax + rcx], rdx`. + pub fn indirect_base_index(base: Reg64, index: Reg64) -> Self { + Self { + mode: AddrMode::IndirectBaseIndex, + base, + index, + disp: 0, + } + } + } + )+ + } +} + +impl_mem!( + /// A memory operand with `byte` size (8 bit). + Mem8 + /// A memory operand with `word` size (16 bit). + Mem16 + /// A memory operand with `dword` size (32 bit). + Mem32 + /// A memory operand with `qword` size (64 bit). + Mem64 +); diff --git a/tests/mov.rs b/tests/mov.rs index 8cc0b19..3314be0 100644 --- a/tests/mov.rs +++ b/tests/mov.rs @@ -1,5 +1,8 @@ use juicebox_asm::insn::Mov; -use juicebox_asm::{Asm, Imm16, Imm32, Imm64, Imm8, MemOp, Reg16::*, Reg32::*, Reg64::*, Reg8::*}; +use juicebox_asm::{ + Asm, Imm16, Imm32, Imm64, Imm8, Mem16, Mem32, Mem64, Mem8, Reg16::*, Reg32::*, Reg64::*, + Reg8::*, +}; macro_rules! mov { ($op1:expr, $op2:expr) => {{ @@ -63,54 +66,54 @@ fn mov_ri() { #[test] fn mov_rm() { // 64bit. - assert_eq!(mov!(rcx, MemOp::Indirect(rdx)), [0x48, 0x8b, 0x0a]); - assert_eq!(mov!(r11, MemOp::Indirect(rsi)), [0x4c, 0x8b, 0x1e]); - assert_eq!(mov!(rdi, MemOp::Indirect(r14)), [0x49, 0x8b, 0x3e]); - assert_eq!(mov!(r15, MemOp::Indirect(r14)), [0x4d, 0x8b, 0x3e]); + assert_eq!(mov!(rcx, Mem64::indirect(rdx)), [0x48, 0x8b, 0x0a]); + assert_eq!(mov!(r11, Mem64::indirect(rsi)), [0x4c, 0x8b, 0x1e]); + assert_eq!(mov!(rdi, Mem64::indirect(r14)), [0x49, 0x8b, 0x3e]); + assert_eq!(mov!(r15, Mem64::indirect(r14)), [0x4d, 0x8b, 0x3e]); // 32bit. - assert_eq!(mov!(ecx, MemOp::Indirect(rdx)), [0x8b, 0x0a]); - assert_eq!(mov!(r11d, MemOp::Indirect(rsi)), [0x44, 0x8b, 0x1e]); - assert_eq!(mov!(edi, MemOp::Indirect(r14)), [0x41, 0x8b, 0x3e]); - assert_eq!(mov!(r15d, MemOp::Indirect(r14)), [0x45, 0x8b, 0x3e]); + assert_eq!(mov!(ecx, Mem32::indirect(rdx)), [0x8b, 0x0a]); + assert_eq!(mov!(r11d, Mem32::indirect(rsi)), [0x44, 0x8b, 0x1e]); + assert_eq!(mov!(edi, Mem32::indirect(r14)), [0x41, 0x8b, 0x3e]); + assert_eq!(mov!(r15d, Mem32::indirect(r14)), [0x45, 0x8b, 0x3e]); // 16bit. - assert_eq!(mov!(cx, MemOp::Indirect(rdx)), [0x66, 0x8b, 0x0a]); - assert_eq!(mov!(r11w, MemOp::Indirect(rsi)), [0x66, 0x44, 0x8b, 0x1e]); - assert_eq!(mov!(di, MemOp::Indirect(r14)), [0x66, 0x41, 0x8b, 0x3e]); - assert_eq!(mov!(r15w, MemOp::Indirect(r14)), [0x66, 0x45, 0x8b, 0x3e]); + assert_eq!(mov!(cx, Mem16::indirect(rdx)), [0x66, 0x8b, 0x0a]); + assert_eq!(mov!(r11w, Mem16::indirect(rsi)), [0x66, 0x44, 0x8b, 0x1e]); + assert_eq!(mov!(di, Mem16::indirect(r14)), [0x66, 0x41, 0x8b, 0x3e]); + assert_eq!(mov!(r15w, Mem16::indirect(r14)), [0x66, 0x45, 0x8b, 0x3e]); // 8bit. - assert_eq!(mov!(cl, MemOp::Indirect(rdx)), [0x8a, 0x0a]); - assert_eq!(mov!(r11l, MemOp::Indirect(rsi)), [0x44, 0x8a, 0x1e]); - assert_eq!(mov!(dil, MemOp::Indirect(r14)), [0x41, 0x8a, 0x3e]); - assert_eq!(mov!(r15l, MemOp::Indirect(r14)), [0x45, 0x8a, 0x3e]); + assert_eq!(mov!(cl, Mem8::indirect(rdx)), [0x8a, 0x0a]); + assert_eq!(mov!(r11l, Mem8::indirect(rsi)), [0x44, 0x8a, 0x1e]); + assert_eq!(mov!(dil, Mem8::indirect(r14)), [0x41, 0x8a, 0x3e]); + assert_eq!(mov!(r15l, Mem8::indirect(r14)), [0x45, 0x8a, 0x3e]); } #[rustfmt::skip] #[test] fn mov_mr() { // 64bit. - assert_eq!(mov!(MemOp::Indirect(rdx), rcx), [0x48, 0x89, 0x0a]); - assert_eq!(mov!(MemOp::Indirect(rsi), r11), [0x4c, 0x89, 0x1e]); - assert_eq!(mov!(MemOp::Indirect(r14), rdi), [0x49, 0x89, 0x3e]); - assert_eq!(mov!(MemOp::Indirect(r14), r15), [0x4d, 0x89, 0x3e]); + assert_eq!(mov!(Mem64::indirect(rdx), rcx), [0x48, 0x89, 0x0a]); + assert_eq!(mov!(Mem64::indirect(rsi), r11), [0x4c, 0x89, 0x1e]); + assert_eq!(mov!(Mem64::indirect(r14), rdi), [0x49, 0x89, 0x3e]); + assert_eq!(mov!(Mem64::indirect(r14), r15), [0x4d, 0x89, 0x3e]); // 32bit. - assert_eq!(mov!(MemOp::Indirect(rdx), ecx), [0x89, 0x0a]); - assert_eq!(mov!(MemOp::Indirect(rsi), r11d), [0x44, 0x89, 0x1e]); - assert_eq!(mov!(MemOp::Indirect(r14), edi), [0x41, 0x89, 0x3e]); - assert_eq!(mov!(MemOp::Indirect(r14), r15d), [0x45, 0x89, 0x3e]); + assert_eq!(mov!(Mem32::indirect(rdx), ecx), [0x89, 0x0a]); + assert_eq!(mov!(Mem32::indirect(rsi), r11d), [0x44, 0x89, 0x1e]); + assert_eq!(mov!(Mem32::indirect(r14), edi), [0x41, 0x89, 0x3e]); + assert_eq!(mov!(Mem32::indirect(r14), r15d), [0x45, 0x89, 0x3e]); // 16bit. - assert_eq!(mov!(MemOp::Indirect(rdx), cx), [0x66, 0x89, 0x0a]); - assert_eq!(mov!(MemOp::Indirect(rsi), r11w), [0x66, 0x44, 0x89, 0x1e]); - assert_eq!(mov!(MemOp::Indirect(r14), di), [0x66, 0x41, 0x89, 0x3e]); - assert_eq!(mov!(MemOp::Indirect(r14), r15w), [0x66, 0x45, 0x89, 0x3e]); + assert_eq!(mov!(Mem16::indirect(rdx), cx), [0x66, 0x89, 0x0a]); + assert_eq!(mov!(Mem16::indirect(rsi), r11w), [0x66, 0x44, 0x89, 0x1e]); + assert_eq!(mov!(Mem16::indirect(r14), di), [0x66, 0x41, 0x89, 0x3e]); + assert_eq!(mov!(Mem16::indirect(r14), r15w), [0x66, 0x45, 0x89, 0x3e]); // 8bit. - assert_eq!(mov!(MemOp::Indirect(rdx), cl), [0x88, 0x0a]); - assert_eq!(mov!(MemOp::Indirect(rsi), r11l), [0x44, 0x88, 0x1e]); - assert_eq!(mov!(MemOp::Indirect(r14), dil), [0x41, 0x88, 0x3e]); - assert_eq!(mov!(MemOp::Indirect(r14), r15l), [0x45, 0x88, 0x3e]); + assert_eq!(mov!(Mem8::indirect(rdx), cl), [0x88, 0x0a]); + assert_eq!(mov!(Mem8::indirect(rsi), r11l), [0x44, 0x88, 0x1e]); + assert_eq!(mov!(Mem8::indirect(r14), dil), [0x41, 0x88, 0x3e]); + assert_eq!(mov!(Mem8::indirect(r14), r15l), [0x45, 0x88, 0x3e]); } -- cgit v1.2.3