From 9c3c3fd923d894d2351eb22129ea693eb98fa8ff Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Sat, 7 Dec 2024 01:59:18 +0100 Subject: bf: add jit compiler and interpreter --- README.md | 3 + examples/bf_vm.rs | 292 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/insn/cmp.rs | 8 +- 3 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 examples/bf_vm.rs diff --git a/README.md b/README.md index e4d9ba3..29178ee 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,9 @@ The [`examples/`](examples/) folder provides additional examples: a simple *jit compiler* which has a *jit cache* and translates each *basic block* on first execution when running a VM guest image. For reference an interepter is also implemented. +- [`bf_vm.rs`](examples/bf_vm.rs) implements a + [brainfuck][https://en.wikipedia.org/wiki/Brainfuck] jit compiler + and interpreter. ## git hook for local development diff --git a/examples/bf_vm.rs b/examples/bf_vm.rs new file mode 100644 index 0000000..9045f24 --- /dev/null +++ b/examples/bf_vm.rs @@ -0,0 +1,292 @@ +//! Brainfuck VM. +//! +//! This example implements a simple +//! [brainfuck][https://en.wikipedia.org/wiki/Brainfuck] interpreter +//! [`BrainfuckInterp`] and a jit compiler [`BrainfuckJit`]. +//! +//! Brainfuck is an esoteric programming languge existing of 8 commands. +//! - `>` increment data pointer. +//! - `<` decrement data pointer. +//! - `+` increment data at current data pointer. +//! - `-` decrement data at current data pointer. +//! - `.` output data at current data pointer. +//! - `,` read input and store at current data pointer. +//! - `[` jump behind matching ']' if data at data pointer is zero. +//! - `]` jump behind matching '[' if data at data pointer is non-zero. + +use std::collections::HashMap; +use std::io::Write; + +use juicebox_asm::insn::*; +use juicebox_asm::Runtime; +use juicebox_asm::{Asm, Imm64, Imm8, Label, MemOp, MemOp8, Reg64, Reg8}; + +struct BrainfuckInterp { + pc: usize, + imem: Vec, + dptr: usize, + dmem: [u8; 256], + branches: HashMap, +} + +impl BrainfuckInterp { + fn new(prog: &str) -> Result { + // Do a first pass over the bf program to filter whitespace and detect invalid tokens. + // Additionally validate all conditional branches, and compute their branch target. + let (imem, branches) = { + // Instruction memory holding the final bf program. + let mut imem = Vec::new(); + // Helper to track index of open brackets. + let mut lhs_brackets = Vec::new(); + // Mapping from branch instruction to branch target. + let mut branches = HashMap::new(); + + for (idx, token) in prog.chars().filter(|c| !c.is_whitespace()).enumerate() { + match token { + '<' | '>' | '+' | '-' | '.' | ',' => { /* ignore valid bf tokens */ } + '[' => lhs_brackets.push(idx), + ']' => { + if let Some(lhs) = lhs_brackets.pop() { + branches.insert(lhs, idx); + branches.insert(idx, lhs); + } else { + return Err(format!("encountered un-balanced brackets, found ']' at index {idx} without matching '['")); + } + } + _ => return Err(format!("invalid bf token '{token}'")), + } + imem.push(token) + } + + if !lhs_brackets.is_empty() { + return Err(String::from( + "encountered un-balanced brackets, left-over '[' after parsing bf program", + )); + } + + (imem, branches) + }; + + Ok(BrainfuckInterp { + pc: 0, + imem, + dptr: 0, + dmem: [0; 256], + branches, + }) + } +} + +fn run_interp(prog: &str) { + let mut vm = BrainfuckInterp::new(prog).unwrap(); + + loop { + let insn = match vm.imem.get(vm.pc) { + Some(insn) => insn, + None => break, // End of bf program. + }; + + let putchar = |val: u8| { + std::io::stdout() + .write(&[val]) + .expect("Failed to write to stdout!"); + }; + + match insn { + '>' => { + vm.dptr += 1; + assert!(vm.dptr < vm.dmem.len()); + } + '<' => { + assert!(vm.dptr > 0); + vm.dptr -= 1; + } + '+' => { + vm.dmem[vm.dptr] += 1; + } + '-' => { + vm.dmem[vm.dptr] -= 1; + } + '.' => { + putchar(vm.dmem[vm.dptr]); + } + ',' => { + unimplemented!("getchar"); + } + '[' => { + if vm.dmem[vm.dptr] == 0 { + vm.pc = *vm.branches.get(&vm.pc).unwrap(); + } + } + ']' => { + if vm.dmem[vm.dptr] != 0 { + vm.pc = *vm.branches.get(&vm.pc).unwrap(); + } + } + _ => unreachable!(), + } + + vm.pc += 1; + } +} + +// -- BRAINFUCK JIT -------------------------------------------------------------- + +#[cfg(not(any(target_arch = "x86_64", target_os = "linux")))] +compile_error!("Only supported on x86_64 with SystemV abi"); + +struct BrainfuckJit { + imem: Vec, + dmem: [u8; 256], +} + +impl BrainfuckJit { + fn new(prog: &str) -> Result { + // Do a first pass over the bf program to filter whitespace and detect invalid tokens. + let imem = prog + .chars() + .filter(|c| !c.is_whitespace()) + .map(|c| match c { + '<' | '>' | '+' | '-' | '.' | ',' | '[' | ']' => Ok(c), + _ => Err(format!("invalid bf token '{c}'")), + }) + .collect::, String>>()?; + + Ok(BrainfuckJit { + imem, + dmem: [0; 256], + }) + } +} + +extern "C" fn putchar(c: u8) { + std::io::stdout() + .write(&[c]) + .expect("Failed to write to stdout!"); +} + +fn run_jit(prog: &str) { + let mut vm = BrainfuckJit::new(prog).unwrap(); + + // Use callee saved registers to hold vm state, such that we don't + // need to save any state before calling out to putchar. + let dmem_base = Reg64::rbx; + let dmem_idx = Reg64::r12; + + let mut asm = Asm::new(); + // Move data memory pointer (argument on jit entry) into correct + // register. + asm.mov(dmem_base, Reg64::rdi); + // Clear data memory index. + asm.xor(dmem_idx, dmem_idx); + + // A stack of label pairs, used to link up forward and backward + // jumps for a given '[]' pair. + let mut label_stack = Vec::new(); + + // Generate code for each instruction in the bf program. + for insn in vm.imem { + match insn { + '>' => { + // TODO: generate runtime bounds check. + asm.inc(dmem_idx); + } + '<' => { + // TODO: generate runtime bounds check. + asm.dec(dmem_idx); + } + '+' => { + asm.inc(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx))); + } + '-' => { + asm.dec(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx))); + } + '.' => { + // Load data memory from active cell into di register, + // which is the first argument register according to + // the SystemV abi, then call into putchar. Since we + // stored all out vm state in callee saved registers + // we don't need to save any registers before the + // call. + asm.mov(Reg8::dil, MemOp::IndirectBaseIndex(dmem_base, dmem_idx)); + asm.mov(Reg64::rax, Imm64::from(putchar as usize)); + asm.call(Reg64::rax); + } + ',' => { + unimplemented!("getchar"); + } + '[' => { + // Create new label pair. + label_stack.push((Label::new(), Label::new())); + // UNWRAP: We just pushed a new entry on the stack. + let label_pair = label_stack.last_mut().unwrap(); + + // Goto label_pair.0 if data memory at active cell is 0. + // if vm.dmem[vm.dptr] == 0 goto label_pair.0 + asm.cmp( + MemOp::IndirectBaseIndex(dmem_base, dmem_idx), + Imm8::from(0u8), + ); + asm.jz(&mut label_pair.0); + + // Bind label_pair.1 after the jump instruction, which + // will be the branch target for the matching ']'. + asm.bind(&mut label_pair.1); + } + ']' => { + let mut label_pair = label_stack + .pop() + .expect("encountered un-balanced brackets, found ']' without matching '['"); + + // Goto label_pair.1 if data memory at active cell is + // not 0. + // if vm.dmem[vm.dptr] != 0 goto label_pair.1 + asm.cmp( + MemOp::IndirectBaseIndex(dmem_base, dmem_idx), + Imm8::from(0u8), + ); + asm.jnz(&mut label_pair.1); + + // Bind label_pair.0 after the jump instruction, which + // is the branch target for the matching '['. + asm.bind(&mut label_pair.0); + } + _ => unreachable!(), + } + } + + // Return from bf program. + asm.ret(); + + if !label_stack.is_empty() { + panic!("encountered un-balanced brackets, left-over '[' after jitting bf program") + } + + // Execute jitted bf program. + let mut rt = Runtime::new(); + let bf_entry = unsafe { rt.add_code::(asm.into_code()) }; + bf_entry(&mut vm.dmem as *mut u8); +} + +fn main() { + // https://en.wikipedia.org/wiki/Brainfuck#Adding_two_values + //let inp = "++>+++++ [<+>-] ++++++++[<++++++>-]<."; + //println!("add-print-7 (wikipedia.org) - interp"); + //run_interp(inp); + //println!("add-print-7 (wikipedia.org) - jit"); + //run_jit(inp); + + // https://en.wikipedia.org/wiki/Brainfuck#Hello_World! + let inp = "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++."; + println!("hello-world (wikipedia.org) - interp"); + run_interp(inp); + println!("hello-world (wikipedia.org) - jit"); + run_jit(inp); + + // https://programmingwiki.de/Brainfuck + let inp = ">+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.[-]>++++++++[<++++>-] <.>+++++++++++[<++++++++>-]<-.--------.+++.------.--------.[-]>++++++++[<++++>- ]<+.[-]++++++++++."; + println!("hello-world (programmingwiki.de) - interp"); + run_interp(inp); + println!("hello-world (programmingwiki.de) - jit"); + run_jit(inp); +} diff --git a/src/insn/cmp.rs b/src/insn/cmp.rs index 835202c..95c513d 100644 --- a/src/insn/cmp.rs +++ b/src/insn/cmp.rs @@ -1,5 +1,11 @@ use super::Cmp; -use crate::{Asm, Imm16, MemOp}; +use crate::{Asm, Imm16, Imm8, MemOp}; + +impl Cmp for Asm { + fn cmp(&mut self, op1: MemOp, op2: Imm8) { + self.encode_mi(0x80, 0x7, op1, op2); + } +} impl Cmp for Asm { fn cmp(&mut self, op1: MemOp, op2: Imm16) { -- cgit v1.2.3