From 0efb15a9eb706896cdabb9ca5d2b0c295c2dffcf Mon Sep 17 00:00:00 2001 From: Brian Cully Date: Sun, 24 Aug 2025 14:22:48 -0400 Subject: rename parser → compiler, interp → vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reflects reality better --- src/forth/vm.rs | 532 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 532 insertions(+) create mode 100644 src/forth/vm.rs (limited to 'src/forth/vm.rs') diff --git a/src/forth/vm.rs b/src/forth/vm.rs new file mode 100644 index 0000000..b4ce674 --- /dev/null +++ b/src/forth/vm.rs @@ -0,0 +1,532 @@ +use log::debug; + +use std::ops::Index; + +#[derive(Clone, Debug, PartialEq)] +pub enum OpCode { + Num(i32), + Str(usize, usize), + Call(usize), + TCall(usize), // tail call, really just ‘jmp’, but named to indicate desired usage. + If(usize, Option), // true word index, optional false word index. + TIf(usize, Option), // tail version + Add, + Sub, + Mul, + Div, + Dup, + Drop, + EQ, + GT, + GTE, + LT, + LTE, + // todo: maybe get rid of ret? it's only used in word definitions, + // which are bounded, so running of the end can be treated as an + // implicit ‘ret’. + Ret, +} + +#[derive(Clone, Debug)] +pub struct ByteCode(pub Vec); +impl std::ops::Deref for ByteCode { + type Target = Vec; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl ByteCode { + pub fn len(&self) -> usize { + self.0.len() + } +} + +impl Index for ByteCode { + type Output = OpCode; + + fn index(&self, index: usize) -> &Self::Output { + &self.0[index] + } +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct InstructionPointer { + pub word: usize, + pub offset: usize, +} + +impl InstructionPointer { + pub fn new() -> Self { + Self { + word: 0, + offset: 0, + } + } +} + +#[derive(Debug)] +pub struct DataStack(pub Vec); + +#[derive(Debug)] +pub struct CallStack(pub Vec); + +#[derive(Clone, Debug)] +pub struct WordList(pub Vec); +impl std::ops::Deref for WordList { + type Target = Vec; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[derive(Debug)] +pub struct VM { + // todo: don't be pub, probably + pub stack: DataStack, + pub callstack: CallStack, + pub wordlist: WordList, + pub ip: InstructionPointer, +} + +#[derive(Debug)] +pub enum RuntimeError { + StackUnderflow, +} + +impl VM { + pub fn new(wordlist: WordList) -> Self { + Self { + stack: DataStack(Vec::new()), + callstack: CallStack(Vec::new()), + ip: InstructionPointer::new(), + wordlist, + } + } + + pub fn jmp(&mut self, word: usize) { + self.ip.word = word; + self.ip.offset = 0; + } + + pub fn call(&mut self, word: usize) { + self.callstack.0.push(self.ip); + self.jmp(word); + } + + // bool is still_running + pub fn tick(&mut self) -> Result { + let bc = &self.wordlist.0[self.ip.word]; + match bc[self.ip.offset] { + OpCode::Num(n) => self.stack.0.push(n), + OpCode::Str(start, end) => debug!("got str: {} to {}", start, end), + OpCode::Add => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.stack.0.push(n2 + n1); + }, + OpCode::Sub => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.stack.0.push(n2 - n1); + }, + OpCode::Mul => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.stack.0.push(n2 * n1); + }, + OpCode::Div => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.stack.0.push(n2 / n1); + }, + OpCode::Dup => { + let n = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.stack.0.push(n); + self.stack.0.push(n); + }, + OpCode::Drop => { + self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + }, + OpCode::EQ => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let v = if n2 == n1 { -1 } else { 0 }; + self.stack.0.push(v); + }, + OpCode::GT => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let v = if n2 > n1 { -1 } else { 0 }; + self.stack.0.push(v); + }, + OpCode::GTE => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let v = if n2 >= n1 { -1 } else { 0 }; + self.stack.0.push(v); + }, + OpCode::LT => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let v = if n2 < n1 { -1 } else { 0 }; + self.stack.0.push(v); + }, + OpCode::LTE => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let v = if n2 <= n1 { -1 } else { 0 }; + self.stack.0.push(v); + }, + OpCode::Ret => { + self.ip = self.callstack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + }, + OpCode::Call(i) => { + self.call(i); + // skip the offset increment + return Ok(true) + }, + OpCode::TCall(i) => { + self.jmp(i); + // skip the offset increment + return Ok(true) + }, + OpCode::If(true_clause, false_clause) => { + let flag = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + if flag != 0 { + self.call(true_clause); + return Ok(true) + } else if let Some(false_clause) = false_clause { + self.call(false_clause); + return Ok(true) + } + }, + OpCode::TIf(true_clause, false_clause) => { + let flag = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + if flag != 0 { + self.jmp(true_clause); + return Ok(true) + } else if let Some(false_clause) = false_clause { + self.jmp(false_clause); + return Ok(true) + } + }, + } + self.ip.offset += 1; + Ok(self.ip.offset < self.wordlist.0[self.ip.word].len()) + } + + pub fn run(&mut self) -> Result { + let mut count = 0; + while self.tick()? { count += 1 } + Ok(count) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::OpCode; + + #[test] + fn simple_ticks() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Num(3), OpCode::Add])); + let mut vm = VM::new(wordlist); + assert_eq!(vm.tick()?, true, "first arg running"); + assert_eq!(vm.ip.word, 0); + assert_eq!(vm.ip.offset, 1); + assert_eq!(vm.stack.0.len(), 1); + assert_eq!(vm.stack.0[0], 2, "first argument"); + assert_eq!(vm.tick()?, true, "second arg running"); + assert_eq!(vm.ip.word, 0); + assert_eq!(vm.ip.offset, 2); + assert_eq!(vm.stack.0.len(), 2); + assert_eq!(vm.stack.0[1], 3, "second argument"); + assert_eq!(vm.tick()?, false, "stopped after addition"); + assert_eq!(vm.ip.word, 0); + assert_eq!(vm.ip.offset, 3); + assert_eq!(vm.stack.0.len(), 1); + assert_eq!(vm.stack.0[0], 5, "result of addition"); + + Ok(()) + } + + #[test] + fn custom_word() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![ + OpCode::Num(2), OpCode::Num(3), OpCode::Call(1), OpCode::Num(-2), OpCode::Add, + OpCode::Sub, OpCode::Ret, + ])); + let mut vm = VM::new(wordlist); + // "sub" definition + vm.wordlist.0.push(ByteCode(vec![ + OpCode::Sub, OpCode::Ret, + ])); + + vm.tick()?; + assert_eq!(vm.ip.word, 0); + assert_eq!(vm.ip.offset, 1); + assert_eq!(vm.stack.0.len(), 1); + assert_eq!(vm.stack.0[0], 2, "first argument"); + vm.tick()?; + assert_eq!(vm.ip.word, 0); + assert_eq!(vm.ip.offset, 2); + assert_eq!(vm.stack.0.len(), 2); + assert_eq!(vm.stack.0[1], 3, "second argument"); + + vm.tick()?; // call sub + assert_eq!(vm.ip.word, 1); + assert_eq!(vm.ip.offset, 0); + assert_eq!(vm.stack.0.len(), 2); + + vm.tick()?; // - + assert_eq!(vm.ip.word, 1); + assert_eq!(vm.ip.offset, 1); + assert_eq!(vm.stack.0.len(), 1); + + vm.tick()?; // ret + assert_eq!(vm.ip.word, 0); + assert_eq!(vm.ip.offset, 3); + assert_eq!(vm.stack.0.len(), 1); + assert_eq!(vm.stack.0[0], -1, "result of sub word"); + + vm.tick()?; // 2 + assert_eq!(vm.ip.word, 0); + assert_eq!(vm.ip.offset, 4); + assert_eq!(vm.stack.0.len(), 2); + assert_eq!(vm.stack.0[1], -2, "post sub arg"); + + vm.tick()?; // - + assert_eq!(vm.ip.word, 0); + assert_eq!(vm.ip.offset, 5); + assert_eq!(vm.stack.0.len(), 1); + assert_eq!(vm.stack.0[0], -3, "add opcode result"); + + Ok(()) + } + + #[test] + fn tail_call() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![OpCode::Call(1)])); + wordlist.0.push(ByteCode(vec![OpCode::TCall(2), OpCode::Ret])); + wordlist.0.push(ByteCode(vec![OpCode::Ret])); + let mut vm = VM::new(wordlist); + + vm.tick()?; // call + assert_eq!(vm.callstack.0.len(), 1, "callstack after call"); + vm.tick()?; // tcall + assert_eq!(vm.callstack.0.len(), 1, "callstack after tcall"); + vm.tick()?; // ret + assert_eq!(vm.callstack.0.len(), 0, "callstack after ret"); + Ok(()) + } + + #[test] + fn if_then_true() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![OpCode::Num(-1), OpCode::If(1, None), OpCode::Num(0)])); + wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret])); + let mut vm = VM::new(wordlist); + + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push -1 stack len"); + assert_eq!(vm.stack.0[0], -1, "push -1 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "if stack "); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 1 stack len"); + assert_eq!(vm.stack.0[0], 1, "push 1 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "ret stack len"); + assert_eq!(vm.stack.0[0], 1, "ret stack val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 2, "push 0 stack len"); + assert_eq!(vm.stack.0[1], 0, "push 0 stack val"); + + Ok(()) + } + + #[test] + fn if_then_false() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![OpCode::Num(0), OpCode::If(1, None), OpCode::Num(-1)])); + wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret])); + let mut vm = VM::new(wordlist); + + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 0 stack len"); + assert_eq!(vm.stack.0[0], 0, "push 0 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "if stack len"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push -1 stack len"); + assert_eq!(vm.stack.0[0], -1, "push -1 val"); + + Ok(()) + } + + #[test] + fn if_else_then_true() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![OpCode::Num(-1), OpCode::If(1, Some(2)), OpCode::Num(-2)])); + wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret])); + wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Ret])); + let mut vm = VM::new(wordlist); + + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push -1 stack len"); + assert_eq!(vm.stack.0[0], -1, "push -1 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "if stack len"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 1 stack len"); + assert_eq!(vm.stack.0[0], 1, "push 1 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "ret stack len"); + assert_eq!(vm.stack.0[0], 1, "ret val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 2, "push -2 stack len"); + assert_eq!(vm.stack.0[1], -2, "push -2 val"); + + Ok(()) + } + + #[test] + fn if_else_then_false() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![OpCode::Num(0), OpCode::If(1, Some(2)), OpCode::Num(-2)])); + wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret])); + wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Ret])); + let mut vm = VM::new(wordlist); + + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 0 stack len"); + assert_eq!(vm.stack.0[0], 0, "push 0 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "if"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 2 stack len"); + assert_eq!(vm.stack.0[0], 2, "push 2 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "ret"); + assert_eq!(vm.stack.0[0], 2, "ret"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 2, "push -2 stack len"); + assert_eq!(vm.stack.0[1], -2, "push -2 val"); + + Ok(()) + } + + #[test] + fn tail_if_then_true() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![OpCode::Call(1), OpCode::Num(0)])); + wordlist.0.push(ByteCode(vec![OpCode::Num(-1), OpCode::TIf(2, None), OpCode::Ret])); + wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret])); + let mut vm = VM::new(wordlist); + + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "call(1) stack len"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push -1 stack len"); + assert_eq!(vm.stack.0[0], -1, "push -1 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "tif stack "); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 1 stack len"); + assert_eq!(vm.stack.0[0], 1, "push 1 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "ret stack len"); + assert_eq!(vm.stack.0[0], 1, "ret stack val"); + vm.tick()?; // no ret on wordlist[1] since it's ‘tif’ + assert_eq!(vm.stack.0.len(), 2, "push 0 stack len"); + assert_eq!(vm.stack.0[1], 0, "push 0 stack val"); + + Ok(()) + } + + #[test] + fn tail_if_then_false() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![OpCode::Call(1), OpCode::Num(0)])); + wordlist.0.push(ByteCode(vec![OpCode::Num(0), OpCode::TIf(2, None), OpCode::Ret])); + wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret])); + let mut vm = VM::new(wordlist); + + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "call(1) stack len"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 0 stack len"); + assert_eq!(vm.stack.0[0], 0, "push 0 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "tif stack len"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "ret stack len"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 0 stack len"); + assert_eq!(vm.stack.0[0], 0, "push 0 val"); + + Ok(()) + } + + #[test] + fn tail_if_else_then_true() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![OpCode::Call(1), OpCode::Num(-2)])); + wordlist.0.push(ByteCode(vec![OpCode::Num(-1), OpCode::TIf(2, Some(3)), OpCode::Ret])); + wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret])); + wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Ret])); + let mut vm = VM::new(wordlist); + + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "call(1) stack len"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push -1 stack len"); + assert_eq!(vm.stack.0[0], -1, "push -1 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "tif stack len"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 1 stack len"); + assert_eq!(vm.stack.0[0], 1, "push 1 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "ret stack len"); + assert_eq!(vm.stack.0[0], 1, "ret val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 2, "push -2 stack len"); + assert_eq!(vm.stack.0[1], -2, "push -2 val"); + + Ok(()) + } + + #[test] + fn tail_if_else_then_false() -> Result<(), RuntimeError> { + let mut wordlist = WordList(vec![]); + wordlist.0.push(ByteCode(vec![OpCode::Call(1), OpCode::Num(-2)])); + wordlist.0.push(ByteCode(vec![OpCode::Num(0), OpCode::TIf(2, Some(3)), OpCode::Ret])); + wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret])); + wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Ret])); + let mut vm = VM::new(wordlist); + + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "call(1) stack len"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 0 stack len"); + assert_eq!(vm.stack.0[0], 0, "push 0 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 0, "tif stack len"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "push 2 stack len"); + assert_eq!(vm.stack.0[0], 2, "push 2 val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 1, "ret stack len"); + assert_eq!(vm.stack.0[0], 2, "ret val"); + vm.tick()?; + assert_eq!(vm.stack.0.len(), 2, "push -2 stack len"); + assert_eq!(vm.stack.0[1], -2, "push -2 val"); + + Ok(()) + } +} -- cgit v1.3