aboutsummaryrefslogtreecommitdiffhomepage
path: root/examples/bf_vm.rs
diff options
context:
space:
mode:
Diffstat (limited to 'examples/bf_vm.rs')
-rw-r--r--examples/bf_vm.rs292
1 files changed, 0 insertions, 292 deletions
diff --git a/examples/bf_vm.rs b/examples/bf_vm.rs
deleted file mode 100644
index ce2a682..0000000
--- a/examples/bf_vm.rs
+++ /dev/null
@@ -1,292 +0,0 @@
-//! Brainfuck VM.
-//!
-//! This example implements a simple
-//! [brainfuck](https://en.wikipedia.org/wiki/Brainfuck) interpreter
-//! [`BrainfuckInterp`] and a jit compiler [`BrainfuckJit`].
-//!
-//! Brainfuck is an esoteric programming languge existing of 8 commands.
-//! - `>` increment data pointer.
-//! - `<` decrement data pointer.
-//! - `+` increment data at current data pointer.
-//! - `-` decrement data at current data pointer.
-//! - `.` output data at current data pointer.
-//! - `,` read input and store at current data pointer.
-//! - `[` jump behind matching ']' if data at data pointer is zero.
-//! - `]` jump behind matching '[' if data at data pointer is non-zero.
-
-use std::collections::HashMap;
-use std::io::Write;
-
-use juicebox_asm::insn::*;
-use juicebox_asm::Runtime;
-use juicebox_asm::{Asm, Imm64, Imm8, Label, MemOp, MemOp8, Reg64, Reg8};
-
-struct BrainfuckInterp {
- pc: usize,
- imem: Vec<char>,
- dptr: usize,
- dmem: [u8; 256],
- branches: HashMap<usize, usize>,
-}
-
-impl BrainfuckInterp {
- fn new(prog: &str) -> Result<Self, String> {
- // Do a first pass over the bf program to filter whitespace and detect invalid tokens.
- // Additionally validate all conditional branches, and compute their branch target.
- let (imem, branches) = {
- // Instruction memory holding the final bf program.
- let mut imem = Vec::new();
- // Helper to track index of open brackets.
- let mut lhs_brackets = Vec::new();
- // Mapping from branch instruction to branch target.
- let mut branches = HashMap::new();
-
- for (idx, token) in prog.chars().filter(|c| !c.is_whitespace()).enumerate() {
- match token {
- '<' | '>' | '+' | '-' | '.' | ',' => { /* ignore valid bf tokens */ }
- '[' => lhs_brackets.push(idx),
- ']' => {
- if let Some(lhs) = lhs_brackets.pop() {
- branches.insert(lhs, idx);
- branches.insert(idx, lhs);
- } else {
- return Err(format!("encountered un-balanced brackets, found ']' at index {idx} without matching '['"));
- }
- }
- _ => return Err(format!("invalid bf token '{token}'")),
- }
- imem.push(token)
- }
-
- if !lhs_brackets.is_empty() {
- return Err(String::from(
- "encountered un-balanced brackets, left-over '[' after parsing bf program",
- ));
- }
-
- (imem, branches)
- };
-
- Ok(BrainfuckInterp {
- pc: 0,
- imem,
- dptr: 0,
- dmem: [0; 256],
- branches,
- })
- }
-}
-
-fn run_interp(prog: &str) {
- let mut vm = BrainfuckInterp::new(prog).unwrap();
-
- loop {
- let insn = match vm.imem.get(vm.pc) {
- Some(insn) => insn,
- None => break, // End of bf program.
- };
-
- let putchar = |val: u8| {
- std::io::stdout()
- .write(&[val])
- .expect("Failed to write to stdout!");
- };
-
- match insn {
- '>' => {
- vm.dptr += 1;
- assert!(vm.dptr < vm.dmem.len());
- }
- '<' => {
- assert!(vm.dptr > 0);
- vm.dptr -= 1;
- }
- '+' => {
- vm.dmem[vm.dptr] += 1;
- }
- '-' => {
- vm.dmem[vm.dptr] -= 1;
- }
- '.' => {
- putchar(vm.dmem[vm.dptr]);
- }
- ',' => {
- unimplemented!("getchar");
- }
- '[' => {
- if vm.dmem[vm.dptr] == 0 {
- vm.pc = *vm.branches.get(&vm.pc).unwrap();
- }
- }
- ']' => {
- if vm.dmem[vm.dptr] != 0 {
- vm.pc = *vm.branches.get(&vm.pc).unwrap();
- }
- }
- _ => unreachable!(),
- }
-
- vm.pc += 1;
- }
-}
-
-// -- BRAINFUCK JIT --------------------------------------------------------------
-
-#[cfg(not(any(target_arch = "x86_64", target_os = "linux")))]
-compile_error!("Only supported on x86_64 with SystemV abi");
-
-struct BrainfuckJit {
- imem: Vec<char>,
- dmem: [u8; 256],
-}
-
-impl BrainfuckJit {
- fn new(prog: &str) -> Result<Self, String> {
- // Do a first pass over the bf program to filter whitespace and detect invalid tokens.
- let imem = prog
- .chars()
- .filter(|c| !c.is_whitespace())
- .map(|c| match c {
- '<' | '>' | '+' | '-' | '.' | ',' | '[' | ']' => Ok(c),
- _ => Err(format!("invalid bf token '{c}'")),
- })
- .collect::<Result<Vec<char>, String>>()?;
-
- Ok(BrainfuckJit {
- imem,
- dmem: [0; 256],
- })
- }
-}
-
-extern "C" fn putchar(c: u8) {
- std::io::stdout()
- .write(&[c])
- .expect("Failed to write to stdout!");
-}
-
-fn run_jit(prog: &str) {
- let mut vm = BrainfuckJit::new(prog).unwrap();
-
- // Use callee saved registers to hold vm state, such that we don't
- // need to save any state before calling out to putchar.
- let dmem_base = Reg64::rbx;
- let dmem_idx = Reg64::r12;
-
- let mut asm = Asm::new();
- // Move data memory pointer (argument on jit entry) into correct
- // register.
- asm.mov(dmem_base, Reg64::rdi);
- // Clear data memory index.
- asm.xor(dmem_idx, dmem_idx);
-
- // A stack of label pairs, used to link up forward and backward
- // jumps for a given '[]' pair.
- let mut label_stack = Vec::new();
-
- // Generate code for each instruction in the bf program.
- for insn in vm.imem {
- match insn {
- '>' => {
- // TODO: generate runtime bounds check.
- asm.inc(dmem_idx);
- }
- '<' => {
- // TODO: generate runtime bounds check.
- asm.dec(dmem_idx);
- }
- '+' => {
- asm.inc(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx)));
- }
- '-' => {
- asm.dec(MemOp8::from(MemOp::IndirectBaseIndex(dmem_base, dmem_idx)));
- }
- '.' => {
- // Load data memory from active cell into di register,
- // which is the first argument register according to
- // the SystemV abi, then call into putchar. Since we
- // stored all out vm state in callee saved registers
- // we don't need to save any registers before the
- // call.
- asm.mov(Reg8::dil, MemOp::IndirectBaseIndex(dmem_base, dmem_idx));
- asm.mov(Reg64::rax, Imm64::from(putchar as usize));
- asm.call(Reg64::rax);
- }
- ',' => {
- unimplemented!("getchar");
- }
- '[' => {
- // Create new label pair.
- label_stack.push((Label::new(), Label::new()));
- // UNWRAP: We just pushed a new entry on the stack.
- let label_pair = label_stack.last_mut().unwrap();
-
- // Goto label_pair.0 if data memory at active cell is 0.
- // if vm.dmem[vm.dptr] == 0 goto label_pair.0
- asm.cmp(
- MemOp::IndirectBaseIndex(dmem_base, dmem_idx),
- Imm8::from(0u8),
- );
- asm.jz(&mut label_pair.0);
-
- // Bind label_pair.1 after the jump instruction, which
- // will be the branch target for the matching ']'.
- asm.bind(&mut label_pair.1);
- }
- ']' => {
- let mut label_pair = label_stack
- .pop()
- .expect("encountered un-balanced brackets, found ']' without matching '['");
-
- // Goto label_pair.1 if data memory at active cell is
- // not 0.
- // if vm.dmem[vm.dptr] != 0 goto label_pair.1
- asm.cmp(
- MemOp::IndirectBaseIndex(dmem_base, dmem_idx),
- Imm8::from(0u8),
- );
- asm.jnz(&mut label_pair.1);
-
- // Bind label_pair.0 after the jump instruction, which
- // is the branch target for the matching '['.
- asm.bind(&mut label_pair.0);
- }
- _ => unreachable!(),
- }
- }
-
- // Return from bf program.
- asm.ret();
-
- if !label_stack.is_empty() {
- panic!("encountered un-balanced brackets, left-over '[' after jitting bf program")
- }
-
- // Execute jitted bf program.
- let mut rt = Runtime::new();
- let bf_entry = unsafe { rt.add_code::<extern "C" fn(*mut u8)>(asm.into_code()) };
- bf_entry(&mut vm.dmem as *mut u8);
-}
-
-fn main() {
- // https://en.wikipedia.org/wiki/Brainfuck#Adding_two_values
- //let inp = "++>+++++ [<+>-] ++++++++[<++++++>-]<.";
- //println!("add-print-7 (wikipedia.org) - interp");
- //run_interp(inp);
- //println!("add-print-7 (wikipedia.org) - jit");
- //run_jit(inp);
-
- // https://en.wikipedia.org/wiki/Brainfuck#Hello_World!
- let inp = "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++.";
- println!("hello-world (wikipedia.org) - interp");
- run_interp(inp);
- println!("hello-world (wikipedia.org) - jit");
- run_jit(inp);
-
- // https://programmingwiki.de/Brainfuck
- let inp = ">+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.[-]>++++++++[<++++>-] <.>+++++++++++[<++++++++>-]<-.--------.+++.------.--------.[-]>++++++++[<++++>- ]<+.[-]++++++++++.";
- println!("hello-world (programmingwiki.de) - interp");
- run_interp(inp);
- println!("hello-world (programmingwiki.de) - jit");
- run_jit(inp);
-}