aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rw-r--r--examples/fib.rs2
-rw-r--r--src/imm.rs27
-rw-r--r--src/insn.rs2
-rw-r--r--src/label.rs12
-rw-r--r--src/lib.rs112
-rw-r--r--src/prelude.rs2
-rw-r--r--src/reg.rs31
-rw-r--r--src/rt.rs2
9 files changed, 170 insertions, 22 deletions
diff --git a/README.md b/README.md
index f5842c6..9727b14 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ An `x64` jit assembler for learning purpose with the following two main goals:
```rust
use juicebox_asm::prelude::{Reg32::*, *};
-use juicebox_asm::rt::Runtime;
+use juicebox_asm::Runtime;
fn main() {
let mut asm = Asm::new();
diff --git a/examples/fib.rs b/examples/fib.rs
index c4233da..7acbb50 100644
--- a/examples/fib.rs
+++ b/examples/fib.rs
@@ -1,5 +1,5 @@
use juicebox_asm::prelude::*;
-use juicebox_asm::rt::Runtime;
+use juicebox_asm::Runtime;
const fn fib_rs(n: u64) -> u64 {
match n {
diff --git a/src/imm.rs b/src/imm.rs
index bcf0d31..85b2cbc 100644
--- a/src/imm.rs
+++ b/src/imm.rs
@@ -1,3 +1,6 @@
+//! Definition of different immediate types which are used as input operands for various
+//! instructions.
+
/// Trait to interact with immediate operands.
pub(crate) trait Imm {
/// Get immediate operand as slice of bytes.
@@ -5,8 +8,8 @@ pub(crate) trait Imm {
}
macro_rules! impl_imm {
- ($name:ident, $size:expr, from: $( $from:ty ),* $(,)?) => {
- /// Immediate operand.
+ (#[$doc:meta] $name:ident, $size:expr, from: { $( $from:ty ),* $(,)? }) => {
+ #[$doc]
pub struct $name([u8; $size]);
impl Imm for $name {
@@ -29,7 +32,19 @@ macro_rules! impl_imm {
}
}
-impl_imm!(Imm8, 1, from: u8, i8);
-impl_imm!(Imm16, 2, from: u16, i16, u8, i8);
-impl_imm!(Imm32, 4, from: u32, i32, u16, i16, u8, i8);
-impl_imm!(Imm64, 8, from: u64, i64, u32, i32, u16, i16, u8, i8);
+impl_imm!(
+ /// Type representing an 8 bit immediate.
+ Imm8, 1, from: { u8, i8 }
+);
+impl_imm!(
+ /// Type representing a 16 bit immediate.
+ Imm16, 2, from: { u16, i16, u8, i8 }
+);
+impl_imm!(
+ /// Type representing a 32 bit immediate.
+ Imm32, 4, from: { u32, i32, u16, i16, u8, i8 }
+);
+impl_imm!(
+ /// Type representing a 64 bit immediate.
+ Imm64, 8, from: { u64, i64, u32, i32, u16, i16, u8, i8 }
+);
diff --git a/src/insn.rs b/src/insn.rs
index f3ba18d..db62e6c 100644
--- a/src/insn.rs
+++ b/src/insn.rs
@@ -1,3 +1,5 @@
+//! Trait definitions of various instructions.
+
mod add;
mod dec;
mod jmp;
diff --git a/src/label.rs b/src/label.rs
index b1f1133..a0bd864 100644
--- a/src/label.rs
+++ b/src/label.rs
@@ -1,3 +1,6 @@
+//! Definition of the lable type which can be used as jump target and can be bound to a location in
+//! the emitted code.
+
use std::collections::HashSet;
/// A label which is used as target for jump instructions.
@@ -35,7 +38,11 @@ impl Label {
}
}
- /// Bind the label to the `location`.
+ /// Bind the label to the `location`, can only be bound once.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the lable is already bound.
pub(crate) fn bind(&mut self, loc: usize) {
// A label can only be bound once!
assert!(!self.is_bound());
@@ -48,10 +55,13 @@ impl Label {
self.offsets.insert(off);
}
+ /// Get the location of the lable if already bound, `None` else.
pub(crate) fn location(&self) -> Option<usize> {
self.location
}
+ /// Get the offsets which refer to the label. These are used to patch the jump instructions to
+ /// the label location.
pub(crate) fn offsets_mut(&mut self) -> &mut HashSet<usize> {
&mut self.offsets
}
diff --git a/src/lib.rs b/src/lib.rs
index 892c08f..de12c57 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,22 +1,104 @@
+//! A simple `x64` jit assembler with a minimal runtime to execute emitted code for fun.
+//!
+//! The following is an fibonacci example implementation.
+//! ```rust
+//! use juicebox_asm::prelude::*;
+//! use juicebox_asm::Runtime;
+//!
+//! const fn fib_rs(n: u64) -> u64 {
+//! match n {
+//! 0 => 0,
+//! 1 => 1,
+//! _ => fib_rs(n - 2) + fib_rs(n - 1),
+//! }
+//! }
+//!
+//! fn main() {
+//! let mut asm = Asm::new();
+//!
+//! let mut lp = Label::new();
+//! let mut end = Label::new();
+//!
+//! // Reference implementation:
+//! //
+//! // int fib(int n) {
+//! // int tmp = 0;
+//! // int prv = 1;
+//! // int sum = 0;
+//! // loop:
+//! // if (n == 0) goto end;
+//! // tmp = sum;
+//! // sum += prv;
+//! // prv = tmp;
+//! // --n;
+//! // goto loop;
+//! // end:
+//! // return sum;
+//! // }
+//!
+//! // SystemV abi:
+//! // rdi -> first argument
+//! // rax -> return value
+//! let n = Reg64::rdi;
+//! let sum = Reg64::rax;
+//!
+//! let tmp = Reg64::rcx;
+//! let prv = Reg64::rbx;
+//!
+//! asm.mov(tmp, Imm64::from(0));
+//! asm.mov(prv, Imm64::from(1));
+//! asm.mov(sum, Imm64::from(0));
+//!
+//! asm.bind(&mut lp);
+//! asm.test(n, n);
+//! asm.jz(&mut end);
+//! asm.mov(tmp, sum);
+//! asm.add(sum, prv);
+//! asm.mov(prv, tmp);
+//! asm.dec(n);
+//! asm.jmp(&mut lp);
+//! asm.bind(&mut end);
+//! asm.ret();
+//!
+//! // Move code into executable page and get function pointer to it.
+//! let rt = Runtime::new(&asm.into_code());
+//! let fib = unsafe { rt.as_fn::<extern "C" fn(u64) -> u64>() };
+//!
+//! for n in 0..15 {
+//! let fib_jit = fib(n);
+//! println!("fib({}) = {}", n, fib_jit);
+//! assert_eq!(fib_jit, fib_rs(n));
+//! }
+//! }
+//! ```
+
pub mod prelude;
-pub mod rt;
mod imm;
mod insn;
mod label;
mod reg;
+mod rt;
+
+pub use imm::{Imm16, Imm32, Imm64, Imm8};
+pub use label::Label;
+pub use reg::{Reg16, Reg32, Reg64, Reg8};
+pub use rt::Runtime;
use imm::Imm;
-use label::Label;
use reg::Reg;
-use reg::{Reg16, Reg32, Reg64, Reg8};
+/// Type representing a memory operand.
pub enum MemOp {
+ /// An indirect memory operand, eg `mov [rax], rcx`.
Indirect(Reg64),
+
+ /// An indirect memory operand with additional displacement, eg `mov [rax + 0x10], rcx`.
IndirectDisp(Reg64, i32),
}
impl MemOp {
+ /// Get the base address register of the memory operand.
const fn base(&self) -> Reg64 {
match self {
MemOp::Indirect(base) => *base,
@@ -39,30 +121,41 @@ const fn modrm(mod_: u8, reg: u8, rm: u8) -> u8 {
((mod_ & 0b11) << 6) | ((reg & 0b111) << 3) | (rm & 0b111)
}
+/// `x64` jit assembler.
pub struct Asm {
buf: Vec<u8>,
}
impl Asm {
+ /// Create a new `x64` jit assembler.
pub fn new() -> Asm {
+ // Some random default capacity.
let buf = Vec::with_capacity(1024);
Asm { buf }
}
+ /// Consume the assembler and get the emitted code.
pub fn into_code(self) -> Vec<u8> {
self.buf
}
+ /// Emit a slice of bytes.
fn emit(&mut self, bytes: &[u8]) {
self.buf.extend_from_slice(bytes);
}
+ /// Emit a slice of optional bytes.
fn emit_optional(&mut self, bytes: &[Option<u8>]) {
for byte in bytes.iter().filter_map(|&b| b) {
self.buf.push(byte);
}
}
+ /// Emit a slice of bytes at `pos`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if [pos..pos+len] indexes out of bound of the underlying code buffer.
fn emit_at(&mut self, pos: usize, bytes: &[u8]) {
if let Some(buf) = self.buf.get_mut(pos..pos + bytes.len()) {
buf.copy_from_slice(bytes);
@@ -83,6 +176,7 @@ impl Asm {
/// If the [Label] is bound, patch any pending relocation.
pub fn resolve(&mut self, label: &mut Label) {
if let Some(loc) = label.location() {
+ // For now we only support disp32 as label location.
let loc = i32::try_from(loc).expect("Label location did not fit into i32.");
// Resolve any pending relocations for the label.
@@ -100,6 +194,7 @@ impl Asm {
// -- Encode utilities.
+ /// Encode an register-register instruction.
fn encode_rr<T: Reg>(&mut self, opc: u8, op1: T, op2: T)
where
Self: EncodeRR<T>,
@@ -120,6 +215,8 @@ impl Asm {
self.emit(&[opc, modrm]);
}
+ /// Encode an offset-immediate instruction.
+ /// Register idx is encoded in the opcode.
fn encode_oi<T: Reg, U: Imm>(&mut self, opc: u8, op1: T, op2: U)
where
Self: EncodeR<T>,
@@ -133,6 +230,7 @@ impl Asm {
self.emit(op2.bytes());
}
+ /// Encode a register-immediate instruction.
fn encode_ri<T: Reg, U: Imm>(&mut self, opc: u8, opc_ext: u8, op1: T, op2: U)
where
Self: EncodeR<T>,
@@ -154,6 +252,7 @@ impl Asm {
self.emit(op2.bytes());
}
+ /// Encode a register instruction.
fn encode_r<T: Reg>(&mut self, opc: u8, opc_ext: u8, op1: T)
where
Self: EncodeR<T>,
@@ -174,6 +273,7 @@ impl Asm {
self.emit(&[opc, modrm]);
}
+ /// Encode a memory-register instruction.
fn encode_mr<T: Reg>(&mut self, opc: u8, op1: MemOp, op2: T)
where
Self: EncodeMR<T>,
@@ -207,6 +307,7 @@ impl Asm {
}
}
+ /// Encode a register-memory instruction.
fn encode_rm<T: Reg>(&mut self, opc: u8, op1: T, op2: MemOp)
where
Self: EncodeMR<T>,
@@ -217,6 +318,7 @@ impl Asm {
self.encode_mr(opc, op2, op1);
}
+ /// Encode a jump to label instruction.
fn encode_jmp_label(&mut self, opc: &[u8], op1: &mut Label) {
// Emit the opcode.
self.emit(opc);
@@ -225,6 +327,7 @@ impl Asm {
op1.record_offset(self.buf.len());
// Emit a zeroed disp32, which serves as placeholder for the relocation.
+ // We currently only support disp32 jump targets.
self.emit(&[0u8; 4]);
// Resolve any pending relocations for the label.
@@ -234,6 +337,7 @@ impl Asm {
// -- Encoder helper.
+/// Encode helper for register-register instructions.
trait EncodeRR<T: Reg> {
fn legacy_prefix() -> Option<u8> {
None
@@ -257,6 +361,7 @@ impl EncodeRR<Reg16> for Asm {
}
impl EncodeRR<Reg64> for Asm {}
+/// Encode helper for register instructions.
trait EncodeR<T: Reg> {
fn legacy_prefix() -> Option<u8> {
None
@@ -280,6 +385,7 @@ impl EncodeR<Reg16> for Asm {
}
impl EncodeR<Reg64> for Asm {}
+/// Encode helper for memory-register instructions.
trait EncodeMR<T: Reg> {
fn legacy_prefix() -> Option<u8> {
None
diff --git a/src/prelude.rs b/src/prelude.rs
index e1334ea..0093240 100644
--- a/src/prelude.rs
+++ b/src/prelude.rs
@@ -1,3 +1,5 @@
+//! Crate prelude, which can be used to import the most important types at once.
+
pub use crate::Asm;
pub use crate::MemOp;
diff --git a/src/reg.rs b/src/reg.rs
index b349878..2dc2281 100644
--- a/src/reg.rs
+++ b/src/reg.rs
@@ -1,3 +1,5 @@
+//! Definition of registers which are used as input operands for various instructions.
+
/// Trait to interact with register operands.
pub(crate) trait Reg {
/// Get the raw x64 register code.
@@ -35,9 +37,9 @@ pub(crate) trait Reg {
}
}
-macro_rules! impl_reg {
- (ENUM_ONLY, $name:ident, { $($reg:ident),+ $(,)? }) => {
- /// General purpose register operands.
+macro_rules! enum_reg {
+ (#[$doc:meta] $name:ident, { $($reg:ident),+ $(,)? }) => {
+ #[$doc]
#[allow(non_camel_case_types)]
#[derive(Copy, Clone)]
#[repr(u8)]
@@ -53,9 +55,11 @@ macro_rules! impl_reg {
}
}
};
+}
- ($name:ident, $rexw:expr, { $($reg:ident),+ $(,)? }) => {
- impl_reg!(ENUM_ONLY, $name, { $( $reg, )+ });
+macro_rules! impl_reg {
+ (#[$doc:meta] $name:ident, $rexw:expr, { $($reg:ident),+ $(,)? }) => {
+ enum_reg!(#[$doc] $name, { $( $reg, )+ });
impl Reg for $name {
/// Get the raw x64 register code.
@@ -71,11 +75,18 @@ macro_rules! impl_reg {
}
}
-impl_reg!(Reg64, true, { rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15 });
-impl_reg!(Reg32, false, { eax, ecx, edx, ebx, esp, ebp, esi, edi, r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d });
-impl_reg!(Reg16, false, { ax, cx, dx, bx, sp, bp, si, di, r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w });
-impl_reg!(ENUM_ONLY,
- Reg8, { al, cl, dl, bl, spl, bpl, sil, dil, r8l, r9l, r10l, r11l, r12l, r13l, r14l, r15l,
+impl_reg!(
+ /// Definition of 64 bit registers.
+ Reg64, true, { rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15 });
+impl_reg!(
+ /// Definition of 32 bit registers.
+ Reg32, false, { eax, ecx, edx, ebx, esp, ebp, esi, edi, r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d });
+impl_reg!(
+ /// Definition of 16 bit registers.
+ Reg16, false, { ax, cx, dx, bx, sp, bp, si, di, r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w });
+enum_reg!(
+ /// Definition of 8 bit registers.
+ Reg8, { al, cl, dl, bl, spl, bpl, sil, dil, r8l, r9l, r10l, r11l, r12l, r13l, r14l, r15l,
ah, ch, dh, bh });
impl Reg for Reg8 {
diff --git a/src/rt.rs b/src/rt.rs
index fc8c930..1e9289e 100644
--- a/src/rt.rs
+++ b/src/rt.rs
@@ -1,3 +1,5 @@
+//! A simple runtime which can be used to execute emitted instructions.
+
use core::ffi::c_void;
use nix::sys::mman::{mmap, munmap, MapFlags, ProtFlags};