aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJohannes Stoelp <johannes.stoelp@gmail.com>2024-12-07 01:59:18 +0100
committerJohannes Stoelp <johannes.stoelp@gmail.com>2024-12-07 01:59:18 +0100
commit9c3c3fd923d894d2351eb22129ea693eb98fa8ff (patch)
tree2bd2064f2421a5989fc13c8c8e0ea060c8555a5e
parent2699292790476eccd726fc5dae179b3a688a1468 (diff)
downloadjuicebox-asm-9c3c3fd923d894d2351eb22129ea693eb98fa8ff.tar.gz
juicebox-asm-9c3c3fd923d894d2351eb22129ea693eb98fa8ff.zip
bf: add jit compiler and interpreter
-rw-r--r--README.md3
-rw-r--r--examples/bf_vm.rs292
-rw-r--r--src/insn/cmp.rs8
3 files changed, 302 insertions, 1 deletions
diff --git a/README.md b/README.md
index e4d9ba3..29178ee 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,9 @@ The [`examples/`](examples/) folder provides additional examples:
a simple *jit compiler* which has a *jit cache* and translates each *basic
block* on first execution when running a VM guest image. For reference an
interepter is also implemented.
+- [`bf_vm.rs`](examples/bf_vm.rs) implements a
+ [brainfuck][https://en.wikipedia.org/wiki/Brainfuck] jit compiler
+ and interpreter.
## git hook for local development
diff --git a/examples/bf_vm.rs b/examples/bf_vm.rs
new file mode 100644
index 0000000..9045f24
--- /dev/null
+++ b/examples/bf_vm.rs
@@ -0,0 +1,292 @@
+//! Brainfuck VM.
+//!
+//! This example implements a simple
+//! [brainfuck][https://en.wikipedia.org/wiki/Brainfuck] interpreter
+//! [`BrainfuckInterp`] and a jit compiler [`BrainfuckJit`].
+//!
+//! Brainfuck is an esoteric programming languge existing of 8 commands.
+//! - `>` increment data pointer.
+//! - `<` decrement data pointer.
+//! - `+` increment data at current data pointer.
+//! - `-` decrement data at current data pointer.
+//! - `.` output data at current data pointer.
+//! - `,` read input and store at current data pointer.
+//! - `[` jump behind matching ']' if data at data pointer is zero.
+//! - `]` jump behind matching '[' if data at data pointer is non-zero.
+
+use std::collections::HashMap;
+use std::io::Write;
+
+use juicebox_asm::insn::*;
+use juicebox_asm::Runtime;
+use juicebox_asm::{Asm, Imm64, Imm8, Label, MemOp, MemOp8, Reg64, Reg8};
+
+struct BrainfuckInterp {
+ pc: usize,
+ imem: Vec<char>,
+ dptr: usize,
+ dmem: [u8; 256],
+ branches: HashMap<usize, usize>,
+}
+
+impl BrainfuckInterp {
+ fn new(prog: &str) -> Result<Self, String> {
+ // Do a first pass over the bf program to filter whitespace and detect invalid tokens.
+ // Additionally validate all conditional branches, and compute their branch target.
+ let (imem, branches) = {
+ // Instruction memory holding the final bf program.
+ let mut imem = Vec::new();
+ // Helper to track index of open brackets.
+ let mut lhs_brackets = Vec::new();
+ // Mapping from branch instruction to branch target.
+ let mut branches = HashMap::new();
+
+ for (idx, token) in prog.chars().filter(|c| !c.is_whitespace()).enumerate() {
+ match token {
+ '<' | '>' | '+' | '-' | '.' | ',' => { /* ignore valid bf tokens */ }
+ '[' => lhs_brackets.push(idx),
+ ']' => {
+ if let Some(lhs) = lhs_brackets.pop() {
+ branches.insert(lhs, idx);
+ branches.insert(idx, lhs);
+ } else {
+ return Err(format!("encountered un-balanced brackets, found ']' at index {idx} without matching '['"));
+ }
+ }
+ _ => return Err(format!("invalid bf token '{token}'")),
+ }
+ imem.push(token)
+ }
+
+ if !lhs_brackets.is_empty() {
+ return Err(String::from(
+ "encountered un-balanced brackets, left-over '[' after parsing bf program",
+ ));
+ }
+
+ (imem, branches)
+ };
+
+ Ok(BrainfuckInterp {
+ pc: 0,
+ imem,
+ dptr: 0,
+ dmem: [0; 256],
+ branches,
+ })
+ }
+}
+
+fn run_interp(prog: &str) {
+ let mut vm = BrainfuckInterp::new(prog).unwrap();
+
+ loop {
+ let insn = match vm.imem.get(vm.pc) {
+ Some(insn) => insn,
+ None => break, // End of bf program.
+ };
+
+ let putchar = |val: u8| {
+ std::io::stdout()
+ .write(&[val])
+ .expect("Failed to write to stdout!");
+ };
+
+ match insn {
+ '>' => {
+ vm.dptr += 1;
+ assert!(vm.dptr < vm.dmem.len());
+ }
+ '<' => {
+ assert!(vm.dptr > 0);
+ vm.dptr -= 1;
+ }
+ '+' => {
+ vm.dmem[vm.dptr] += 1;
+ }
+ '-' => {
+ vm.dmem[vm.dptr] -= 1;
+ }
+ '.' => {
+ putchar(vm.dmem[vm.dptr]);
+ }
+ ',' => {
+ unimplemented!("getchar");
+ }
+ '[' => {
+ if vm.dmem[vm.dptr] == 0 {
+ vm.pc = *vm.branches.get(&vm.pc).unwrap();
+ }
+ }
+ ']' => {
+ if vm.dmem[vm.dptr] != 0 {
+ vm.pc = *vm.branches.get(&vm.pc).unwrap();
+ }
+ }
+ _ => unreachable!(),
+ }
+
+ vm.pc += 1;
+ }
+}
+
+// -- BRAINFUCK JIT --------------------------------------------------------------
+
+#[cfg(not(any(target_arch = "x86_64", target_os = "linux")))]
+compile_error!("Only supported on x86_64 with SystemV abi");
+
+struct BrainfuckJit {
+ imem: Vec<char>,
+ dmem: [u8; 256],
+}
+
+impl BrainfuckJit {
+ fn new(prog: &str) -> Result<Self, String> {
+ // Do a first pass over the bf program to filter whitespace and detect invalid tokens.
+ let imem = prog
+ .chars()
+ .filter(|c| !c.is_whitespace())
+ .map(|c| match c {
+ '<' | '>' | '+' | '-' | '.' | ',' | '[' | ']' => Ok(c),
+ _ => Err(format!("invalid bf token '{c}'")),
+ })
+ .collect::<Result<Vec<char>, String>>()?;
+
+ Ok(BrainfuckJit {
+ imem,
+ dmem: [0; 256],
+ })
+ }
+}
+
+extern "C" fn putchar(c: u8) {
+ std::io::stdout()
+ .write(&[c])
+ .expect("Failed to write to stdout!");
+}
+
+fn run_jit(prog: &str) {
+ let mut vm = BrainfuckJit::new(prog).unwrap();
+
+ // Use callee saved registers to hold vm state, such that we don't
+ // need to save any state before calling out to putchar.
+ let dmem_base = Reg64::rbx;
+ let dmem_idx = Reg64::r12;
+
+ let mut asm = Asm::new();
+ // Move data memory pointer (argument on jit entry) into correct
+ // register.
+ asm.mov(dmem_base, Reg64::rdi);
+ // Clear data memory index.
+ asm.xor(dmem_idx, dmem_idx);
+
+ // A stack of label pairs, used to link up forward and backward
+ // jumps for a given '[]' pair.
+ let mut label_stack = Vec::new();
+
+ // Generate code for each instruction in the bf program.
+ for insn in vm.imem {
+ match insn {
+ '>' => {
+ // TODO: generate runtime bounds check.
+ asm.inc(dmem_idx);
+ }
+ '<' => {
+ // TODO: generate runtime bounds check.
+ asm.dec(dmem_idx);
+ }
+ '+' => {
+ asm.inc(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx)));
+ }
+ '-' => {
+ asm.dec(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx)));
+ }
+ '.' => {
+ // Load data memory from active cell into di register,
+ // which is the first argument register according to
+ // the SystemV abi, then call into putchar. Since we
+ // stored all out vm state in callee saved registers
+ // we don't need to save any registers before the
+ // call.
+ asm.mov(Reg8::dil, MemOp::IndirectBaseIndex(dmem_base, dmem_idx));
+ asm.mov(Reg64::rax, Imm64::from(putchar as usize));
+ asm.call(Reg64::rax);
+ }
+ ',' => {
+ unimplemented!("getchar");
+ }
+ '[' => {
+ // Create new label pair.
+ label_stack.push((Label::new(), Label::new()));
+ // UNWRAP: We just pushed a new entry on the stack.
+ let label_pair = label_stack.last_mut().unwrap();
+
+ // Goto label_pair.0 if data memory at active cell is 0.
+ // if vm.dmem[vm.dptr] == 0 goto label_pair.0
+ asm.cmp(
+ MemOp::IndirectBaseIndex(dmem_base, dmem_idx),
+ Imm8::from(0u8),
+ );
+ asm.jz(&mut label_pair.0);
+
+ // Bind label_pair.1 after the jump instruction, which
+ // will be the branch target for the matching ']'.
+ asm.bind(&mut label_pair.1);
+ }
+ ']' => {
+ let mut label_pair = label_stack
+ .pop()
+ .expect("encountered un-balanced brackets, found ']' without matching '['");
+
+ // Goto label_pair.1 if data memory at active cell is
+ // not 0.
+ // if vm.dmem[vm.dptr] != 0 goto label_pair.1
+ asm.cmp(
+ MemOp::IndirectBaseIndex(dmem_base, dmem_idx),
+ Imm8::from(0u8),
+ );
+ asm.jnz(&mut label_pair.1);
+
+ // Bind label_pair.0 after the jump instruction, which
+ // is the branch target for the matching '['.
+ asm.bind(&mut label_pair.0);
+ }
+ _ => unreachable!(),
+ }
+ }
+
+ // Return from bf program.
+ asm.ret();
+
+ if !label_stack.is_empty() {
+ panic!("encountered un-balanced brackets, left-over '[' after jitting bf program")
+ }
+
+ // Execute jitted bf program.
+ let mut rt = Runtime::new();
+ let bf_entry = unsafe { rt.add_code::<extern "C" fn(*mut u8)>(asm.into_code()) };
+ bf_entry(&mut vm.dmem as *mut u8);
+}
+
+fn main() {
+ // https://en.wikipedia.org/wiki/Brainfuck#Adding_two_values
+ //let inp = "++>+++++ [<+>-] ++++++++[<++++++>-]<.";
+ //println!("add-print-7 (wikipedia.org) - interp");
+ //run_interp(inp);
+ //println!("add-print-7 (wikipedia.org) - jit");
+ //run_jit(inp);
+
+ // https://en.wikipedia.org/wiki/Brainfuck#Hello_World!
+ let inp = "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++.";
+ println!("hello-world (wikipedia.org) - interp");
+ run_interp(inp);
+ println!("hello-world (wikipedia.org) - jit");
+ run_jit(inp);
+
+ // https://programmingwiki.de/Brainfuck
+ let inp = ">+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.[-]>++++++++[<++++>-] <.>+++++++++++[<++++++++>-]<-.--------.+++.------.--------.[-]>++++++++[<++++>- ]<+.[-]++++++++++.";
+ println!("hello-world (programmingwiki.de) - interp");
+ run_interp(inp);
+ println!("hello-world (programmingwiki.de) - jit");
+ run_jit(inp);
+}
diff --git a/src/insn/cmp.rs b/src/insn/cmp.rs
index 835202c..95c513d 100644
--- a/src/insn/cmp.rs
+++ b/src/insn/cmp.rs
@@ -1,5 +1,11 @@
use super::Cmp;
-use crate::{Asm, Imm16, MemOp};
+use crate::{Asm, Imm16, Imm8, MemOp};
+
+impl Cmp<MemOp, Imm8> for Asm {
+ fn cmp(&mut self, op1: MemOp, op2: Imm8) {
+ self.encode_mi(0x80, 0x7, op1, op2);
+ }
+}
impl Cmp<MemOp, Imm16> for Asm {
fn cmp(&mut self, op1: MemOp, op2: Imm16) {