From 90233c3cf84453424f1de6dd68f19255ece26f1d Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Sun, 5 Mar 2023 21:41:25 +0100 Subject: Updated doc comments --- src/imm.rs | 27 ++++++++++---- src/insn.rs | 2 ++ src/label.rs | 12 ++++++- src/lib.rs | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- src/prelude.rs | 2 ++ src/reg.rs | 31 ++++++++++------ src/rt.rs | 2 ++ 7 files changed, 168 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/imm.rs b/src/imm.rs index bcf0d31..85b2cbc 100644 --- a/src/imm.rs +++ b/src/imm.rs @@ -1,3 +1,6 @@ +//! Definition of different immediate types which are used as input operands for various +//! instructions. + /// Trait to interact with immediate operands. pub(crate) trait Imm { /// Get immediate operand as slice of bytes. @@ -5,8 +8,8 @@ pub(crate) trait Imm { } macro_rules! impl_imm { - ($name:ident, $size:expr, from: $( $from:ty ),* $(,)?) => { - /// Immediate operand. + (#[$doc:meta] $name:ident, $size:expr, from: { $( $from:ty ),* $(,)? }) => { + #[$doc] pub struct $name([u8; $size]); impl Imm for $name { @@ -29,7 +32,19 @@ macro_rules! impl_imm { } } -impl_imm!(Imm8, 1, from: u8, i8); -impl_imm!(Imm16, 2, from: u16, i16, u8, i8); -impl_imm!(Imm32, 4, from: u32, i32, u16, i16, u8, i8); -impl_imm!(Imm64, 8, from: u64, i64, u32, i32, u16, i16, u8, i8); +impl_imm!( + /// Type representing an 8 bit immediate. + Imm8, 1, from: { u8, i8 } +); +impl_imm!( + /// Type representing a 16 bit immediate. + Imm16, 2, from: { u16, i16, u8, i8 } +); +impl_imm!( + /// Type representing a 32 bit immediate. + Imm32, 4, from: { u32, i32, u16, i16, u8, i8 } +); +impl_imm!( + /// Type representing a 64 bit immediate. + Imm64, 8, from: { u64, i64, u32, i32, u16, i16, u8, i8 } +); diff --git a/src/insn.rs b/src/insn.rs index f3ba18d..db62e6c 100644 --- a/src/insn.rs +++ b/src/insn.rs @@ -1,3 +1,5 @@ +//! Trait definitions of various instructions. + mod add; mod dec; mod jmp; diff --git a/src/label.rs b/src/label.rs index b1f1133..a0bd864 100644 --- a/src/label.rs +++ b/src/label.rs @@ -1,3 +1,6 @@ +//! Definition of the lable type which can be used as jump target and can be bound to a location in +//! the emitted code. + use std::collections::HashSet; /// A label which is used as target for jump instructions. @@ -35,7 +38,11 @@ impl Label { } } - /// Bind the label to the `location`. + /// Bind the label to the `location`, can only be bound once. + /// + /// # Panics + /// + /// Panics if the lable is already bound. pub(crate) fn bind(&mut self, loc: usize) { // A label can only be bound once! assert!(!self.is_bound()); @@ -48,10 +55,13 @@ impl Label { self.offsets.insert(off); } + /// Get the location of the lable if already bound, `None` else. pub(crate) fn location(&self) -> Option { self.location } + /// Get the offsets which refer to the label. These are used to patch the jump instructions to + /// the label location. pub(crate) fn offsets_mut(&mut self) -> &mut HashSet { &mut self.offsets } diff --git a/src/lib.rs b/src/lib.rs index 892c08f..de12c57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,22 +1,104 @@ +//! A simple `x64` jit assembler with a minimal runtime to execute emitted code for fun. +//! +//! The following is an fibonacci example implementation. +//! ```rust +//! use juicebox_asm::prelude::*; +//! use juicebox_asm::Runtime; +//! +//! const fn fib_rs(n: u64) -> u64 { +//! match n { +//! 0 => 0, +//! 1 => 1, +//! _ => fib_rs(n - 2) + fib_rs(n - 1), +//! } +//! } +//! +//! fn main() { +//! let mut asm = Asm::new(); +//! +//! let mut lp = Label::new(); +//! let mut end = Label::new(); +//! +//! // Reference implementation: +//! // +//! // int fib(int n) { +//! // int tmp = 0; +//! // int prv = 1; +//! // int sum = 0; +//! // loop: +//! // if (n == 0) goto end; +//! // tmp = sum; +//! // sum += prv; +//! // prv = tmp; +//! // --n; +//! // goto loop; +//! // end: +//! // return sum; +//! // } +//! +//! // SystemV abi: +//! // rdi -> first argument +//! // rax -> return value +//! let n = Reg64::rdi; +//! let sum = Reg64::rax; +//! +//! let tmp = Reg64::rcx; +//! let prv = Reg64::rbx; +//! +//! asm.mov(tmp, Imm64::from(0)); +//! asm.mov(prv, Imm64::from(1)); +//! asm.mov(sum, Imm64::from(0)); +//! +//! asm.bind(&mut lp); +//! asm.test(n, n); +//! asm.jz(&mut end); +//! asm.mov(tmp, sum); +//! asm.add(sum, prv); +//! asm.mov(prv, tmp); +//! asm.dec(n); +//! asm.jmp(&mut lp); +//! asm.bind(&mut end); +//! asm.ret(); +//! +//! // Move code into executable page and get function pointer to it. +//! let rt = Runtime::new(&asm.into_code()); +//! let fib = unsafe { rt.as_fn:: u64>() }; +//! +//! for n in 0..15 { +//! let fib_jit = fib(n); +//! println!("fib({}) = {}", n, fib_jit); +//! assert_eq!(fib_jit, fib_rs(n)); +//! } +//! } +//! ``` + pub mod prelude; -pub mod rt; mod imm; mod insn; mod label; mod reg; +mod rt; + +pub use imm::{Imm16, Imm32, Imm64, Imm8}; +pub use label::Label; +pub use reg::{Reg16, Reg32, Reg64, Reg8}; +pub use rt::Runtime; use imm::Imm; -use label::Label; use reg::Reg; -use reg::{Reg16, Reg32, Reg64, Reg8}; +/// Type representing a memory operand. pub enum MemOp { + /// An indirect memory operand, eg `mov [rax], rcx`. Indirect(Reg64), + + /// An indirect memory operand with additional displacement, eg `mov [rax + 0x10], rcx`. IndirectDisp(Reg64, i32), } impl MemOp { + /// Get the base address register of the memory operand. const fn base(&self) -> Reg64 { match self { MemOp::Indirect(base) => *base, @@ -39,30 +121,41 @@ const fn modrm(mod_: u8, reg: u8, rm: u8) -> u8 { ((mod_ & 0b11) << 6) | ((reg & 0b111) << 3) | (rm & 0b111) } +/// `x64` jit assembler. pub struct Asm { buf: Vec, } impl Asm { + /// Create a new `x64` jit assembler. pub fn new() -> Asm { + // Some random default capacity. let buf = Vec::with_capacity(1024); Asm { buf } } + /// Consume the assembler and get the emitted code. pub fn into_code(self) -> Vec { self.buf } + /// Emit a slice of bytes. fn emit(&mut self, bytes: &[u8]) { self.buf.extend_from_slice(bytes); } + /// Emit a slice of optional bytes. fn emit_optional(&mut self, bytes: &[Option]) { for byte in bytes.iter().filter_map(|&b| b) { self.buf.push(byte); } } + /// Emit a slice of bytes at `pos`. + /// + /// # Panics + /// + /// Panics if [pos..pos+len] indexes out of bound of the underlying code buffer. fn emit_at(&mut self, pos: usize, bytes: &[u8]) { if let Some(buf) = self.buf.get_mut(pos..pos + bytes.len()) { buf.copy_from_slice(bytes); @@ -83,6 +176,7 @@ impl Asm { /// If the [Label] is bound, patch any pending relocation. pub fn resolve(&mut self, label: &mut Label) { if let Some(loc) = label.location() { + // For now we only support disp32 as label location. let loc = i32::try_from(loc).expect("Label location did not fit into i32."); // Resolve any pending relocations for the label. @@ -100,6 +194,7 @@ impl Asm { // -- Encode utilities. + /// Encode an register-register instruction. fn encode_rr(&mut self, opc: u8, op1: T, op2: T) where Self: EncodeRR, @@ -120,6 +215,8 @@ impl Asm { self.emit(&[opc, modrm]); } + /// Encode an offset-immediate instruction. + /// Register idx is encoded in the opcode. fn encode_oi(&mut self, opc: u8, op1: T, op2: U) where Self: EncodeR, @@ -133,6 +230,7 @@ impl Asm { self.emit(op2.bytes()); } + /// Encode a register-immediate instruction. fn encode_ri(&mut self, opc: u8, opc_ext: u8, op1: T, op2: U) where Self: EncodeR, @@ -154,6 +252,7 @@ impl Asm { self.emit(op2.bytes()); } + /// Encode a register instruction. fn encode_r(&mut self, opc: u8, opc_ext: u8, op1: T) where Self: EncodeR, @@ -174,6 +273,7 @@ impl Asm { self.emit(&[opc, modrm]); } + /// Encode a memory-register instruction. fn encode_mr(&mut self, opc: u8, op1: MemOp, op2: T) where Self: EncodeMR, @@ -207,6 +307,7 @@ impl Asm { } } + /// Encode a register-memory instruction. fn encode_rm(&mut self, opc: u8, op1: T, op2: MemOp) where Self: EncodeMR, @@ -217,6 +318,7 @@ impl Asm { self.encode_mr(opc, op2, op1); } + /// Encode a jump to label instruction. fn encode_jmp_label(&mut self, opc: &[u8], op1: &mut Label) { // Emit the opcode. self.emit(opc); @@ -225,6 +327,7 @@ impl Asm { op1.record_offset(self.buf.len()); // Emit a zeroed disp32, which serves as placeholder for the relocation. + // We currently only support disp32 jump targets. self.emit(&[0u8; 4]); // Resolve any pending relocations for the label. @@ -234,6 +337,7 @@ impl Asm { // -- Encoder helper. +/// Encode helper for register-register instructions. trait EncodeRR { fn legacy_prefix() -> Option { None @@ -257,6 +361,7 @@ impl EncodeRR for Asm { } impl EncodeRR for Asm {} +/// Encode helper for register instructions. trait EncodeR { fn legacy_prefix() -> Option { None @@ -280,6 +385,7 @@ impl EncodeR for Asm { } impl EncodeR for Asm {} +/// Encode helper for memory-register instructions. trait EncodeMR { fn legacy_prefix() -> Option { None diff --git a/src/prelude.rs b/src/prelude.rs index e1334ea..0093240 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -1,3 +1,5 @@ +//! Crate prelude, which can be used to import the most important types at once. + pub use crate::Asm; pub use crate::MemOp; diff --git a/src/reg.rs b/src/reg.rs index b349878..2dc2281 100644 --- a/src/reg.rs +++ b/src/reg.rs @@ -1,3 +1,5 @@ +//! Definition of registers which are used as input operands for various instructions. + /// Trait to interact with register operands. pub(crate) trait Reg { /// Get the raw x64 register code. @@ -35,9 +37,9 @@ pub(crate) trait Reg { } } -macro_rules! impl_reg { - (ENUM_ONLY, $name:ident, { $($reg:ident),+ $(,)? }) => { - /// General purpose register operands. +macro_rules! enum_reg { + (#[$doc:meta] $name:ident, { $($reg:ident),+ $(,)? }) => { + #[$doc] #[allow(non_camel_case_types)] #[derive(Copy, Clone)] #[repr(u8)] @@ -53,9 +55,11 @@ macro_rules! impl_reg { } } }; +} - ($name:ident, $rexw:expr, { $($reg:ident),+ $(,)? }) => { - impl_reg!(ENUM_ONLY, $name, { $( $reg, )+ }); +macro_rules! impl_reg { + (#[$doc:meta] $name:ident, $rexw:expr, { $($reg:ident),+ $(,)? }) => { + enum_reg!(#[$doc] $name, { $( $reg, )+ }); impl Reg for $name { /// Get the raw x64 register code. @@ -71,11 +75,18 @@ macro_rules! impl_reg { } } -impl_reg!(Reg64, true, { rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15 }); -impl_reg!(Reg32, false, { eax, ecx, edx, ebx, esp, ebp, esi, edi, r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d }); -impl_reg!(Reg16, false, { ax, cx, dx, bx, sp, bp, si, di, r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w }); -impl_reg!(ENUM_ONLY, - Reg8, { al, cl, dl, bl, spl, bpl, sil, dil, r8l, r9l, r10l, r11l, r12l, r13l, r14l, r15l, +impl_reg!( + /// Definition of 64 bit registers. + Reg64, true, { rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15 }); +impl_reg!( + /// Definition of 32 bit registers. + Reg32, false, { eax, ecx, edx, ebx, esp, ebp, esi, edi, r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d }); +impl_reg!( + /// Definition of 16 bit registers. + Reg16, false, { ax, cx, dx, bx, sp, bp, si, di, r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w }); +enum_reg!( + /// Definition of 8 bit registers. + Reg8, { al, cl, dl, bl, spl, bpl, sil, dil, r8l, r9l, r10l, r11l, r12l, r13l, r14l, r15l, ah, ch, dh, bh }); impl Reg for Reg8 { diff --git a/src/rt.rs b/src/rt.rs index fc8c930..1e9289e 100644 --- a/src/rt.rs +++ b/src/rt.rs @@ -1,3 +1,5 @@ +//! A simple runtime which can be used to execute emitted instructions. + use core::ffi::c_void; use nix::sys::mman::{mmap, munmap, MapFlags, ProtFlags}; -- cgit v1.2.3