From 5f90bf71ab2d347fbe2d0dc89b71ab5d6e923b46 Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Sun, 8 Dec 2024 23:48:15 +0100 Subject: bf: rename file and add to ci --- README.md | 2 +- ci/Makefile | 1 + examples/bf.rs | 292 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ examples/bf_vm.rs | 292 ------------------------------------------------------ 4 files changed, 294 insertions(+), 293 deletions(-) create mode 100644 examples/bf.rs delete mode 100644 examples/bf_vm.rs diff --git a/README.md b/README.md index 6ca332c..abdc70f 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ The [`examples/`](examples/) folder provides additional examples: a simple *jit compiler* which has a *jit cache* and translates each *basic block* on first execution when running a VM guest image. For reference an interepter is also implemented. -- [`bf_vm.rs`](examples/bf_vm.rs) implements a +- [`bfrs`](examples/bf.rs) implements a [brainfuck](https://en.wikipedia.org/wiki/Brainfuck) jit compiler and interpreter. diff --git a/ci/Makefile b/ci/Makefile index 9a76fdc..b433ed2 100644 --- a/ci/Makefile +++ b/ci/Makefile @@ -28,3 +28,4 @@ run-examples: cargo run --example add cargo run --example tiny_vm cargo run --example tiny_vm jit + cargo run --example bf diff --git a/examples/bf.rs b/examples/bf.rs new file mode 100644 index 0000000..ce2a682 --- /dev/null +++ b/examples/bf.rs @@ -0,0 +1,292 @@ +//! Brainfuck VM. +//! +//! This example implements a simple +//! [brainfuck](https://en.wikipedia.org/wiki/Brainfuck) interpreter +//! [`BrainfuckInterp`] and a jit compiler [`BrainfuckJit`]. +//! +//! Brainfuck is an esoteric programming languge existing of 8 commands. +//! - `>` increment data pointer. +//! - `<` decrement data pointer. +//! - `+` increment data at current data pointer. +//! - `-` decrement data at current data pointer. +//! - `.` output data at current data pointer. +//! - `,` read input and store at current data pointer. +//! - `[` jump behind matching ']' if data at data pointer is zero. +//! - `]` jump behind matching '[' if data at data pointer is non-zero. + +use std::collections::HashMap; +use std::io::Write; + +use juicebox_asm::insn::*; +use juicebox_asm::Runtime; +use juicebox_asm::{Asm, Imm64, Imm8, Label, MemOp, MemOp8, Reg64, Reg8}; + +struct BrainfuckInterp { + pc: usize, + imem: Vec, + dptr: usize, + dmem: [u8; 256], + branches: HashMap, +} + +impl BrainfuckInterp { + fn new(prog: &str) -> Result { + // Do a first pass over the bf program to filter whitespace and detect invalid tokens. + // Additionally validate all conditional branches, and compute their branch target. + let (imem, branches) = { + // Instruction memory holding the final bf program. + let mut imem = Vec::new(); + // Helper to track index of open brackets. + let mut lhs_brackets = Vec::new(); + // Mapping from branch instruction to branch target. + let mut branches = HashMap::new(); + + for (idx, token) in prog.chars().filter(|c| !c.is_whitespace()).enumerate() { + match token { + '<' | '>' | '+' | '-' | '.' | ',' => { /* ignore valid bf tokens */ } + '[' => lhs_brackets.push(idx), + ']' => { + if let Some(lhs) = lhs_brackets.pop() { + branches.insert(lhs, idx); + branches.insert(idx, lhs); + } else { + return Err(format!("encountered un-balanced brackets, found ']' at index {idx} without matching '['")); + } + } + _ => return Err(format!("invalid bf token '{token}'")), + } + imem.push(token) + } + + if !lhs_brackets.is_empty() { + return Err(String::from( + "encountered un-balanced brackets, left-over '[' after parsing bf program", + )); + } + + (imem, branches) + }; + + Ok(BrainfuckInterp { + pc: 0, + imem, + dptr: 0, + dmem: [0; 256], + branches, + }) + } +} + +fn run_interp(prog: &str) { + let mut vm = BrainfuckInterp::new(prog).unwrap(); + + loop { + let insn = match vm.imem.get(vm.pc) { + Some(insn) => insn, + None => break, // End of bf program. + }; + + let putchar = |val: u8| { + std::io::stdout() + .write(&[val]) + .expect("Failed to write to stdout!"); + }; + + match insn { + '>' => { + vm.dptr += 1; + assert!(vm.dptr < vm.dmem.len()); + } + '<' => { + assert!(vm.dptr > 0); + vm.dptr -= 1; + } + '+' => { + vm.dmem[vm.dptr] += 1; + } + '-' => { + vm.dmem[vm.dptr] -= 1; + } + '.' => { + putchar(vm.dmem[vm.dptr]); + } + ',' => { + unimplemented!("getchar"); + } + '[' => { + if vm.dmem[vm.dptr] == 0 { + vm.pc = *vm.branches.get(&vm.pc).unwrap(); + } + } + ']' => { + if vm.dmem[vm.dptr] != 0 { + vm.pc = *vm.branches.get(&vm.pc).unwrap(); + } + } + _ => unreachable!(), + } + + vm.pc += 1; + } +} + +// -- BRAINFUCK JIT -------------------------------------------------------------- + +#[cfg(not(any(target_arch = "x86_64", target_os = "linux")))] +compile_error!("Only supported on x86_64 with SystemV abi"); + +struct BrainfuckJit { + imem: Vec, + dmem: [u8; 256], +} + +impl BrainfuckJit { + fn new(prog: &str) -> Result { + // Do a first pass over the bf program to filter whitespace and detect invalid tokens. + let imem = prog + .chars() + .filter(|c| !c.is_whitespace()) + .map(|c| match c { + '<' | '>' | '+' | '-' | '.' | ',' | '[' | ']' => Ok(c), + _ => Err(format!("invalid bf token '{c}'")), + }) + .collect::, String>>()?; + + Ok(BrainfuckJit { + imem, + dmem: [0; 256], + }) + } +} + +extern "C" fn putchar(c: u8) { + std::io::stdout() + .write(&[c]) + .expect("Failed to write to stdout!"); +} + +fn run_jit(prog: &str) { + let mut vm = BrainfuckJit::new(prog).unwrap(); + + // Use callee saved registers to hold vm state, such that we don't + // need to save any state before calling out to putchar. + let dmem_base = Reg64::rbx; + let dmem_idx = Reg64::r12; + + let mut asm = Asm::new(); + // Move data memory pointer (argument on jit entry) into correct + // register. + asm.mov(dmem_base, Reg64::rdi); + // Clear data memory index. + asm.xor(dmem_idx, dmem_idx); + + // A stack of label pairs, used to link up forward and backward + // jumps for a given '[]' pair. + let mut label_stack = Vec::new(); + + // Generate code for each instruction in the bf program. + for insn in vm.imem { + match insn { + '>' => { + // TODO: generate runtime bounds check. + asm.inc(dmem_idx); + } + '<' => { + // TODO: generate runtime bounds check. + asm.dec(dmem_idx); + } + '+' => { + asm.inc(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx))); + } + '-' => { + asm.dec(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx))); + } + '.' => { + // Load data memory from active cell into di register, + // which is the first argument register according to + // the SystemV abi, then call into putchar. Since we + // stored all out vm state in callee saved registers + // we don't need to save any registers before the + // call. + asm.mov(Reg8::dil, MemOp::IndirectBaseIndex(dmem_base, dmem_idx)); + asm.mov(Reg64::rax, Imm64::from(putchar as usize)); + asm.call(Reg64::rax); + } + ',' => { + unimplemented!("getchar"); + } + '[' => { + // Create new label pair. + label_stack.push((Label::new(), Label::new())); + // UNWRAP: We just pushed a new entry on the stack. + let label_pair = label_stack.last_mut().unwrap(); + + // Goto label_pair.0 if data memory at active cell is 0. + // if vm.dmem[vm.dptr] == 0 goto label_pair.0 + asm.cmp( + MemOp::IndirectBaseIndex(dmem_base, dmem_idx), + Imm8::from(0u8), + ); + asm.jz(&mut label_pair.0); + + // Bind label_pair.1 after the jump instruction, which + // will be the branch target for the matching ']'. + asm.bind(&mut label_pair.1); + } + ']' => { + let mut label_pair = label_stack + .pop() + .expect("encountered un-balanced brackets, found ']' without matching '['"); + + // Goto label_pair.1 if data memory at active cell is + // not 0. + // if vm.dmem[vm.dptr] != 0 goto label_pair.1 + asm.cmp( + MemOp::IndirectBaseIndex(dmem_base, dmem_idx), + Imm8::from(0u8), + ); + asm.jnz(&mut label_pair.1); + + // Bind label_pair.0 after the jump instruction, which + // is the branch target for the matching '['. + asm.bind(&mut label_pair.0); + } + _ => unreachable!(), + } + } + + // Return from bf program. + asm.ret(); + + if !label_stack.is_empty() { + panic!("encountered un-balanced brackets, left-over '[' after jitting bf program") + } + + // Execute jitted bf program. + let mut rt = Runtime::new(); + let bf_entry = unsafe { rt.add_code::(asm.into_code()) }; + bf_entry(&mut vm.dmem as *mut u8); +} + +fn main() { + // https://en.wikipedia.org/wiki/Brainfuck#Adding_two_values + //let inp = "++>+++++ [<+>-] ++++++++[<++++++>-]<."; + //println!("add-print-7 (wikipedia.org) - interp"); + //run_interp(inp); + //println!("add-print-7 (wikipedia.org) - jit"); + //run_jit(inp); + + // https://en.wikipedia.org/wiki/Brainfuck#Hello_World! + let inp = "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++."; + println!("hello-world (wikipedia.org) - interp"); + run_interp(inp); + println!("hello-world (wikipedia.org) - jit"); + run_jit(inp); + + // https://programmingwiki.de/Brainfuck + let inp = ">+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.[-]>++++++++[<++++>-] <.>+++++++++++[<++++++++>-]<-.--------.+++.------.--------.[-]>++++++++[<++++>- ]<+.[-]++++++++++."; + println!("hello-world (programmingwiki.de) - interp"); + run_interp(inp); + println!("hello-world (programmingwiki.de) - jit"); + run_jit(inp); +} diff --git a/examples/bf_vm.rs b/examples/bf_vm.rs deleted file mode 100644 index ce2a682..0000000 --- a/examples/bf_vm.rs +++ /dev/null @@ -1,292 +0,0 @@ -//! Brainfuck VM. -//! -//! This example implements a simple -//! [brainfuck](https://en.wikipedia.org/wiki/Brainfuck) interpreter -//! [`BrainfuckInterp`] and a jit compiler [`BrainfuckJit`]. -//! -//! Brainfuck is an esoteric programming languge existing of 8 commands. -//! - `>` increment data pointer. -//! - `<` decrement data pointer. -//! - `+` increment data at current data pointer. -//! - `-` decrement data at current data pointer. -//! - `.` output data at current data pointer. -//! - `,` read input and store at current data pointer. -//! - `[` jump behind matching ']' if data at data pointer is zero. -//! - `]` jump behind matching '[' if data at data pointer is non-zero. - -use std::collections::HashMap; -use std::io::Write; - -use juicebox_asm::insn::*; -use juicebox_asm::Runtime; -use juicebox_asm::{Asm, Imm64, Imm8, Label, MemOp, MemOp8, Reg64, Reg8}; - -struct BrainfuckInterp { - pc: usize, - imem: Vec, - dptr: usize, - dmem: [u8; 256], - branches: HashMap, -} - -impl BrainfuckInterp { - fn new(prog: &str) -> Result { - // Do a first pass over the bf program to filter whitespace and detect invalid tokens. - // Additionally validate all conditional branches, and compute their branch target. - let (imem, branches) = { - // Instruction memory holding the final bf program. - let mut imem = Vec::new(); - // Helper to track index of open brackets. - let mut lhs_brackets = Vec::new(); - // Mapping from branch instruction to branch target. - let mut branches = HashMap::new(); - - for (idx, token) in prog.chars().filter(|c| !c.is_whitespace()).enumerate() { - match token { - '<' | '>' | '+' | '-' | '.' | ',' => { /* ignore valid bf tokens */ } - '[' => lhs_brackets.push(idx), - ']' => { - if let Some(lhs) = lhs_brackets.pop() { - branches.insert(lhs, idx); - branches.insert(idx, lhs); - } else { - return Err(format!("encountered un-balanced brackets, found ']' at index {idx} without matching '['")); - } - } - _ => return Err(format!("invalid bf token '{token}'")), - } - imem.push(token) - } - - if !lhs_brackets.is_empty() { - return Err(String::from( - "encountered un-balanced brackets, left-over '[' after parsing bf program", - )); - } - - (imem, branches) - }; - - Ok(BrainfuckInterp { - pc: 0, - imem, - dptr: 0, - dmem: [0; 256], - branches, - }) - } -} - -fn run_interp(prog: &str) { - let mut vm = BrainfuckInterp::new(prog).unwrap(); - - loop { - let insn = match vm.imem.get(vm.pc) { - Some(insn) => insn, - None => break, // End of bf program. - }; - - let putchar = |val: u8| { - std::io::stdout() - .write(&[val]) - .expect("Failed to write to stdout!"); - }; - - match insn { - '>' => { - vm.dptr += 1; - assert!(vm.dptr < vm.dmem.len()); - } - '<' => { - assert!(vm.dptr > 0); - vm.dptr -= 1; - } - '+' => { - vm.dmem[vm.dptr] += 1; - } - '-' => { - vm.dmem[vm.dptr] -= 1; - } - '.' => { - putchar(vm.dmem[vm.dptr]); - } - ',' => { - unimplemented!("getchar"); - } - '[' => { - if vm.dmem[vm.dptr] == 0 { - vm.pc = *vm.branches.get(&vm.pc).unwrap(); - } - } - ']' => { - if vm.dmem[vm.dptr] != 0 { - vm.pc = *vm.branches.get(&vm.pc).unwrap(); - } - } - _ => unreachable!(), - } - - vm.pc += 1; - } -} - -// -- BRAINFUCK JIT -------------------------------------------------------------- - -#[cfg(not(any(target_arch = "x86_64", target_os = "linux")))] -compile_error!("Only supported on x86_64 with SystemV abi"); - -struct BrainfuckJit { - imem: Vec, - dmem: [u8; 256], -} - -impl BrainfuckJit { - fn new(prog: &str) -> Result { - // Do a first pass over the bf program to filter whitespace and detect invalid tokens. - let imem = prog - .chars() - .filter(|c| !c.is_whitespace()) - .map(|c| match c { - '<' | '>' | '+' | '-' | '.' | ',' | '[' | ']' => Ok(c), - _ => Err(format!("invalid bf token '{c}'")), - }) - .collect::, String>>()?; - - Ok(BrainfuckJit { - imem, - dmem: [0; 256], - }) - } -} - -extern "C" fn putchar(c: u8) { - std::io::stdout() - .write(&[c]) - .expect("Failed to write to stdout!"); -} - -fn run_jit(prog: &str) { - let mut vm = BrainfuckJit::new(prog).unwrap(); - - // Use callee saved registers to hold vm state, such that we don't - // need to save any state before calling out to putchar. - let dmem_base = Reg64::rbx; - let dmem_idx = Reg64::r12; - - let mut asm = Asm::new(); - // Move data memory pointer (argument on jit entry) into correct - // register. - asm.mov(dmem_base, Reg64::rdi); - // Clear data memory index. - asm.xor(dmem_idx, dmem_idx); - - // A stack of label pairs, used to link up forward and backward - // jumps for a given '[]' pair. - let mut label_stack = Vec::new(); - - // Generate code for each instruction in the bf program. - for insn in vm.imem { - match insn { - '>' => { - // TODO: generate runtime bounds check. - asm.inc(dmem_idx); - } - '<' => { - // TODO: generate runtime bounds check. - asm.dec(dmem_idx); - } - '+' => { - asm.inc(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx))); - } - '-' => { - asm.dec(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx))); - } - '.' => { - // Load data memory from active cell into di register, - // which is the first argument register according to - // the SystemV abi, then call into putchar. Since we - // stored all out vm state in callee saved registers - // we don't need to save any registers before the - // call. - asm.mov(Reg8::dil, MemOp::IndirectBaseIndex(dmem_base, dmem_idx)); - asm.mov(Reg64::rax, Imm64::from(putchar as usize)); - asm.call(Reg64::rax); - } - ',' => { - unimplemented!("getchar"); - } - '[' => { - // Create new label pair. - label_stack.push((Label::new(), Label::new())); - // UNWRAP: We just pushed a new entry on the stack. - let label_pair = label_stack.last_mut().unwrap(); - - // Goto label_pair.0 if data memory at active cell is 0. - // if vm.dmem[vm.dptr] == 0 goto label_pair.0 - asm.cmp( - MemOp::IndirectBaseIndex(dmem_base, dmem_idx), - Imm8::from(0u8), - ); - asm.jz(&mut label_pair.0); - - // Bind label_pair.1 after the jump instruction, which - // will be the branch target for the matching ']'. - asm.bind(&mut label_pair.1); - } - ']' => { - let mut label_pair = label_stack - .pop() - .expect("encountered un-balanced brackets, found ']' without matching '['"); - - // Goto label_pair.1 if data memory at active cell is - // not 0. - // if vm.dmem[vm.dptr] != 0 goto label_pair.1 - asm.cmp( - MemOp::IndirectBaseIndex(dmem_base, dmem_idx), - Imm8::from(0u8), - ); - asm.jnz(&mut label_pair.1); - - // Bind label_pair.0 after the jump instruction, which - // is the branch target for the matching '['. - asm.bind(&mut label_pair.0); - } - _ => unreachable!(), - } - } - - // Return from bf program. - asm.ret(); - - if !label_stack.is_empty() { - panic!("encountered un-balanced brackets, left-over '[' after jitting bf program") - } - - // Execute jitted bf program. - let mut rt = Runtime::new(); - let bf_entry = unsafe { rt.add_code::(asm.into_code()) }; - bf_entry(&mut vm.dmem as *mut u8); -} - -fn main() { - // https://en.wikipedia.org/wiki/Brainfuck#Adding_two_values - //let inp = "++>+++++ [<+>-] ++++++++[<++++++>-]<."; - //println!("add-print-7 (wikipedia.org) - interp"); - //run_interp(inp); - //println!("add-print-7 (wikipedia.org) - jit"); - //run_jit(inp); - - // https://en.wikipedia.org/wiki/Brainfuck#Hello_World! - let inp = "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++."; - println!("hello-world (wikipedia.org) - interp"); - run_interp(inp); - println!("hello-world (wikipedia.org) - jit"); - run_jit(inp); - - // https://programmingwiki.de/Brainfuck - let inp = ">+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.[-]>++++++++[<++++>-] <.>+++++++++++[<++++++++>-]<-.--------.+++.------.--------.[-]>++++++++[<++++>- ]<+.[-]++++++++++."; - println!("hello-world (programmingwiki.de) - interp"); - run_interp(inp); - println!("hello-world (programmingwiki.de) - jit"); - run_jit(inp); -} -- cgit v1.2.3