aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorJohannes Stoelp <johannes.stoelp@gmail.com>2023-02-26 20:38:04 +0100
committerJohannes Stoelp <johannes.stoelp@gmail.com>2023-02-26 20:38:04 +0100
commitc619b7aeb72cb18cc0f76a94e78cc5d9d7c9e89f (patch)
tree986bd2bf12e0782a2bdb9466225df0dc9c4065ad /src
downloadjuicebox-asm-c619b7aeb72cb18cc0f76a94e78cc5d9d7c9e89f.tar.gz
juicebox-asm-c619b7aeb72cb18cc0f76a94e78cc5d9d7c9e89f.zip
base version capable to emit different mov insns
Experimenting with type system to detect invalid operands during compile time.
Diffstat (limited to 'src')
-rw-r--r--src/insn.rs3
-rw-r--r--src/lib.rs229
-rw-r--r--src/reg.rs318
3 files changed, 550 insertions, 0 deletions
diff --git a/src/insn.rs b/src/insn.rs
new file mode 100644
index 0000000..bb1a380
--- /dev/null
+++ b/src/insn.rs
@@ -0,0 +1,3 @@
+pub trait Mov<T, U> {
+ fn mov(&mut self, op1: T, op2: U);
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..7c24704
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,229 @@
+mod insn;
+mod reg;
+
+use reg::Reg;
+pub use reg::{Reg16, Reg32, Reg64, Reg8};
+
+use insn::Mov;
+
+pub enum MemOp {
+ Indirect(Reg64),
+ IndirectDisp(Reg64, i32),
+}
+
+impl MemOp {
+ const fn base(&self) -> Reg64 {
+ match self {
+ MemOp::Indirect(base) => *base,
+ MemOp::IndirectDisp(base, ..) => *base,
+ }
+ }
+}
+
+/// Encode the `REX` byte.
+const fn rex(w: u8, r: u8, x: u8, b: u8) -> u8 {
+ let r = (r >> 3) & 1;
+ let x = (x >> 3) & 1;
+ let b = (b >> 3) & 1;
+ 0b0100_0000 | ((w & 1) << 3) | (r << 2) | (x << 1) | b
+}
+
+/// Encode the `ModR/M` byte.
+const fn modrm(mod_: u8, reg: u8, rm: u8) -> u8 {
+ ((mod_ & 0b11) << 6) | ((reg & 0b111) << 3) | (rm & 0b111)
+}
+
+pub struct Asm {
+ buf: Vec<u8>,
+}
+
+impl Asm {
+ pub fn new() -> Asm {
+ let buf = Vec::with_capacity(1024);
+ Asm { buf }
+ }
+
+ pub fn into_code(self) -> Vec<u8> {
+ self.buf
+ }
+
+ fn emit(&mut self, bytes: &[u8]) {
+ self.buf.extend_from_slice(bytes);
+ }
+
+ fn emit_optional(&mut self, bytes: &[Option<u8>]) {
+ for byte in bytes.iter().filter_map(|&b| b) {
+ self.buf.push(byte);
+ }
+ }
+
+ fn emit_at(&mut self, pos: usize, bytes: &[u8]) {
+ if let Some(buf) = self.buf.get_mut(pos..pos + bytes.len()) {
+ buf.copy_from_slice(bytes);
+ } else {
+ unimplemented!();
+ }
+ }
+
+ pub fn mov<T, U>(&mut self, op1: T, op2: U)
+ where
+ Self: Mov<T, U>,
+ {
+ <Self as Mov<T, U>>::mov(self, op1, op2);
+ }
+
+ fn encode_rr<T: Reg>(&mut self, opc: u8, op1: T, op2: T)
+ where
+ Self: EncodeRR<T>,
+ {
+ // MR operand encoding.
+ // op1 -> modrm.rm
+ // op2 -> modrm.reg
+ let modrm = modrm(
+ 0b11, /* mod */
+ op2.idx(), /* reg */
+ op1.idx(), /* rm */
+ );
+
+ let prefix = <Self as EncodeRR<T>>::legacy_prefix();
+ let rex = <Self as EncodeRR<T>>::rex(op1, op2);
+
+ self.emit_optional(&[prefix, rex]);
+ self.emit(&[opc, modrm]);
+ }
+
+ fn encode_mr<T: Reg>(&mut self, opc: u8, op1: MemOp, op2: T)
+ where
+ Self: EncodeMR<T>,
+ {
+ // MR operand encoding.
+ // op1 -> modrm.rm
+ // op2 -> modrm.reg
+ let mode = match op1 {
+ MemOp::Indirect(..) => {
+ assert!(!op1.base().need_sib() && !op1.base().is_pc_rel());
+ 0b00
+ }
+ MemOp::IndirectDisp(..) => {
+ assert!(!op1.base().need_sib());
+ 0b10
+ }
+ };
+
+ let modrm = modrm(
+ mode, /* mode */
+ op2.idx(), /* reg */
+ op1.base().idx(), /* rm */
+ );
+ let prefix = <Self as EncodeMR<T>>::legacy_prefix();
+ let rex = <Self as EncodeMR<T>>::rex(&op1, op2);
+
+ self.emit_optional(&[prefix, rex]);
+ self.emit(&[opc, modrm]);
+ if let MemOp::IndirectDisp(_, disp) = op1 {
+ self.emit(&disp.to_ne_bytes());
+ }
+ }
+
+ fn encode_rm<T: Reg>(&mut self, opc: u8, op1: T, op2: MemOp)
+ where
+ Self: EncodeMR<T>,
+ {
+ // RM operand encoding.
+ // op1 -> modrm.reg
+ // op2 -> modrm.rm
+ self.encode_mr(opc, op2, op1);
+ }
+}
+
+// -- Encoder helper.
+
+trait EncodeRR<T: Reg> {
+ fn legacy_prefix() -> Option<u8> {
+ None
+ }
+
+ fn rex(op1: T, op2: T) -> Option<u8> {
+ if op1.need_rex() || op2.need_rex() {
+ Some(rex(op1.rexw(), op2.idx(), 0, op1.idx()))
+ } else {
+ None
+ }
+ }
+}
+
+impl EncodeRR<Reg8> for Asm {}
+impl EncodeRR<Reg32> for Asm {}
+impl EncodeRR<Reg16> for Asm {
+ fn legacy_prefix() -> Option<u8> {
+ Some(0x66)
+ }
+}
+impl EncodeRR<Reg64> for Asm {}
+
+trait EncodeMR<T: Reg> {
+ fn legacy_prefix() -> Option<u8> {
+ None
+ }
+
+ fn rex(op1: &MemOp, op2: T) -> Option<u8> {
+ if op1.base().need_rex() || op2.need_rex() {
+ Some(rex(op2.rexw(), op2.idx(), 0, op1.base().idx()))
+ } else {
+ None
+ }
+ }
+}
+
+impl EncodeMR<Reg32> for Asm {}
+impl EncodeMR<Reg64> for Asm {}
+
+// -- Instruction implementations.
+
+impl Mov<Reg64, Reg64> for Asm {
+ fn mov(&mut self, op1: Reg64, op2: Reg64) {
+ self.encode_rr(0x89, op1, op2);
+ }
+}
+
+impl Mov<Reg32, Reg32> for Asm {
+ fn mov(&mut self, op1: Reg32, op2: Reg32) {
+ self.encode_rr(0x89, op1, op2);
+ }
+}
+
+impl Mov<Reg16, Reg16> for Asm {
+ fn mov(&mut self, op1: Reg16, op2: Reg16) {
+ self.encode_rr(0x89, op1, op2);
+ }
+}
+
+impl Mov<Reg8, Reg8> for Asm {
+ fn mov(&mut self, op1: Reg8, op2: Reg8) {
+ self.encode_rr(0x88, op1, op2);
+ }
+}
+
+impl Mov<MemOp, Reg64> for Asm {
+ fn mov(&mut self, op1: MemOp, op2: Reg64) {
+ self.encode_mr(0x89, op1, op2);
+ }
+}
+
+impl Mov<MemOp, Reg32> for Asm {
+ fn mov(&mut self, op1: MemOp, op2: Reg32) {
+ self.encode_mr(0x89, op1, op2);
+ }
+}
+
+impl Mov<Reg64, MemOp> for Asm {
+ fn mov(&mut self, op1: Reg64, op2: MemOp) {
+ self.encode_rm(0x8b, op1, op2);
+ }
+}
+
+impl Mov<Reg32, MemOp> for Asm {
+ fn mov(&mut self, op1: Reg32, op2: MemOp) {
+ self.encode_rm(0x8b, op1, op2);
+ }
+}
diff --git a/src/reg.rs b/src/reg.rs
new file mode 100644
index 0000000..bf30a40
--- /dev/null
+++ b/src/reg.rs
@@ -0,0 +1,318 @@
+/// Trait to interact with register operands.
+pub(crate) trait Reg {
+ /// Get the raw x64 register code.
+ fn idx(&self) -> u8;
+
+ /// Get the `REX.W` bit.
+ fn rexw(&self) -> u8;
+
+ /// Check if the register requires a `REX` byte.
+ fn need_rex(&self) -> bool {
+ self.idx() > 7 || self.rexw() > 0
+ }
+
+ /// Check if the register requires a `SIB` byte if used as addressing operand.
+ ///
+ /// See [64 bit
+ /// addressing](https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing) for
+ /// further details.
+ fn need_sib(&self) -> bool {
+ self.idx() == 4 || self.idx() == 12
+ }
+
+ /// Check if the register is interpreted as `PC` relative if used as addressing operand.
+ ///
+ /// See [64 bit
+ /// addressing](https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing) for
+ /// further details.
+ fn is_pc_rel(&self) -> bool {
+ self.idx() == 5 || self.idx() == 13
+ }
+}
+
+macro_rules! impl_reg {
+ (ENUM_ONLY, $name:ident, { $($reg: ident),+ $(,)? }) => {
+ /// General purpose register operands.
+ #[allow(non_camel_case_types)]
+ #[derive(Copy, Clone)]
+ #[repr(u8)]
+ pub enum $name {
+ $( $reg, )+
+ }
+
+ #[cfg(test)]
+ impl $name {
+ fn iter() -> impl Iterator<Item = &'static $name> {
+ use $name::*;
+ [$( $reg, )+].iter()
+ }
+ }
+ };
+
+ ($name:ident, $rexw: expr, { $($reg: ident),+ $(,)? }) => {
+ impl_reg!(ENUM_ONLY, $name, { $( $reg, )+ });
+
+ impl Reg for $name {
+ /// Get the raw x64 register code.
+ fn idx(&self) -> u8 {
+ *self as u8
+ }
+
+ /// Get the `REX.W` bit.
+ fn rexw(&self) -> u8 {
+ $rexw
+ }
+ }
+ }
+}
+
+impl_reg!(Reg64, 1, { rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15 });
+impl_reg!(Reg32, 0, { eax, ecx, edx, ebx, esp, ebp, esi, edi, r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d });
+impl_reg!(Reg16, 0, { ax, cx, dx, bx, sp, bp, si, di, r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w });
+impl_reg!(ENUM_ONLY,
+ Reg8, { al, cl, dl, bl, spl, bpl, sil, dil, r8l, r9l, r10l, r11l, r12l, r13l, r14l, r15l,
+ ah, ch, dh, bh });
+
+impl Reg for Reg8 {
+ /// Get the raw x64 register code.
+ fn idx(&self) -> u8 {
+ match self {
+ Reg8::ah => 4,
+ Reg8::ch => 5,
+ Reg8::dh => 6,
+ Reg8::bh => 7,
+ _ => *self as u8,
+ }
+ }
+
+ /// Get the `REX.W` bit.
+ fn rexw(&self) -> u8 {
+ 0
+ }
+
+ /// Check whether the gp register needs a `REX` prefix
+ /// Check if the register requires a `REX` byte.
+ ///
+ /// For 1 byte addressing, register indexes `[4:7]` require a `REX` prefix, or else they will
+ /// be decoded as `{AH, CH, DH, BH}` accordingly.
+ ///
+ /// See [Registers](https://wiki.osdev.org/X86-64_Instruction_Encoding#Registers) for
+ /// further details or conduct `Table 3-1. Register Codes` in the *Intel Software Developers
+ /// Manual - Volume 2*.
+ fn need_rex(&self) -> bool {
+ self.idx() > 7 || matches!(self, Reg8::spl | Reg8::bpl | Reg8::sil | Reg8::dil)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_reg8() {
+ use Reg8::*;
+
+ for r in Reg8::iter() {
+ // Check register index.
+ let idx = match r {
+ al => 0,
+ cl => 1,
+ dl => 2,
+ bl => 3,
+ spl => 4,
+ bpl => 5,
+ sil => 6,
+ dil => 7,
+ r8l => 8,
+ r9l => 9,
+ r10l => 10,
+ r11l => 11,
+ r12l => 12,
+ r13l => 13,
+ r14l => 14,
+ r15l => 15,
+ ah => 4,
+ ch => 5,
+ dh => 6,
+ bh => 7,
+ };
+ assert_eq!(r.idx(), idx);
+
+ // Check REX.W bit.
+ assert_eq!(r.rexw(), 0);
+
+ // Check need REX byte.
+ let rex = match r {
+ r8l | r9l | r10l | r11l | r12l | r13l | r14l | r15l | spl | bpl | sil | dil => true,
+ _ => false,
+ };
+ assert_eq!(r.need_rex(), rex);
+
+ // Check need SIB byte.
+ let sib = match r {
+ spl | r12l | ah => true,
+ _ => false,
+ };
+ assert_eq!(r.need_sib(), sib);
+
+ // Check if is PC relative addressing.
+ let rel = match r {
+ bpl | r13l | ch => true,
+ _ => false,
+ };
+ assert_eq!(r.is_pc_rel(), rel);
+ }
+ }
+
+ #[test]
+ fn test_reg16() {
+ use Reg16::*;
+
+ for r in Reg16::iter() {
+ // Check register index.
+ let idx = match r {
+ ax => 0,
+ cx => 1,
+ dx => 2,
+ bx => 3,
+ sp => 4,
+ bp => 5,
+ si => 6,
+ di => 7,
+ r8w => 8,
+ r9w => 9,
+ r10w => 10,
+ r11w => 11,
+ r12w => 12,
+ r13w => 13,
+ r14w => 14,
+ r15w => 15,
+ };
+ assert_eq!(r.idx(), idx);
+
+ // Check REX.W bit.
+ assert_eq!(r.rexw(), 0);
+
+ // Check need REX byte.
+ let rex = match r {
+ r8w | r9w | r10w | r11w | r12w | r13w | r14w | r15w => true,
+ _ => false,
+ };
+ assert_eq!(r.need_rex(), rex);
+
+ // Check need SIB byte.
+ let sib = match r {
+ sp | r12w => true,
+ _ => false,
+ };
+ assert_eq!(r.need_sib(), sib);
+
+ // Check if is PC relative addressing.
+ let rel = match r {
+ bp | r13w => true,
+ _ => false,
+ };
+ assert_eq!(r.is_pc_rel(), rel);
+ }
+ }
+
+ #[test]
+ fn test_reg32() {
+ use Reg32::*;
+
+ for r in Reg32::iter() {
+ // Check register index.
+ let idx = match r {
+ eax => 0,
+ ecx => 1,
+ edx => 2,
+ ebx => 3,
+ esp => 4,
+ ebp => 5,
+ esi => 6,
+ edi => 7,
+ r8d => 8,
+ r9d => 9,
+ r10d => 10,
+ r11d => 11,
+ r12d => 12,
+ r13d => 13,
+ r14d => 14,
+ r15d => 15,
+ };
+ assert_eq!(r.idx(), idx);
+
+ // Check REX.W bit.
+ assert_eq!(r.rexw(), 0);
+
+ // Check need REX byte.
+ let rex = match r {
+ r8d | r9d | r10d | r11d | r12d | r13d | r14d | r15d => true,
+ _ => false,
+ };
+ assert_eq!(r.need_rex(), rex);
+
+ // Check need SIB byte.
+ let sib = match r {
+ esp | r12d => true,
+ _ => false,
+ };
+ assert_eq!(r.need_sib(), sib);
+
+ // Check if is PC relative addressing.
+ let rel = match r {
+ ebp | r13d => true,
+ _ => false,
+ };
+ assert_eq!(r.is_pc_rel(), rel);
+ }
+ }
+
+ #[test]
+ fn test_reg64() {
+ use Reg64::*;
+
+ for r in Reg64::iter() {
+ // Check register index.
+ let idx = match r {
+ rax => 0,
+ rcx => 1,
+ rdx => 2,
+ rbx => 3,
+ rsp => 4,
+ rbp => 5,
+ rsi => 6,
+ rdi => 7,
+ r8 => 8,
+ r9 => 9,
+ r10 => 10,
+ r11 => 11,
+ r12 => 12,
+ r13 => 13,
+ r14 => 14,
+ r15 => 15,
+ };
+ assert_eq!(r.idx(), idx);
+
+ // Check REX.W bit.
+ assert_eq!(r.rexw(), 1);
+
+ // Check need REX byte.
+ assert_eq!(r.need_rex(), true);
+
+ // Check need SIB byte.
+ let sib = match r {
+ rsp | r12 => true,
+ _ => false,
+ };
+ assert_eq!(r.need_sib(), sib);
+
+ // Check if is PC relative addressing.
+ let rel = match r {
+ rbp | r13 => true,
+ _ => false,
+ };
+ assert_eq!(r.is_pc_rel(), rel);
+ }
+ }
+}