summaryrefslogtreecommitdiffstats
path: root/src/forth/vm.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/forth/vm.rs')
-rw-r--r--src/forth/vm.rs532
1 files changed, 532 insertions, 0 deletions
diff --git a/src/forth/vm.rs b/src/forth/vm.rs
new file mode 100644
index 0000000..b4ce674
--- /dev/null
+++ b/src/forth/vm.rs
@@ -0,0 +1,532 @@
+use log::debug;
+
+use std::ops::Index;
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum OpCode {
+ Num(i32),
+ Str(usize, usize),
+ Call(usize),
+ TCall(usize), // tail call, really just ‘jmp’, but named to indicate desired usage.
+ If(usize, Option<usize>), // true word index, optional false word index.
+ TIf(usize, Option<usize>), // tail version
+ Add,
+ Sub,
+ Mul,
+ Div,
+ Dup,
+ Drop,
+ EQ,
+ GT,
+ GTE,
+ LT,
+ LTE,
+ // todo: maybe get rid of ret? it's only used in word definitions,
+ // which are bounded, so running of the end can be treated as an
+ // implicit ‘ret’.
+ Ret,
+}
+
+#[derive(Clone, Debug)]
+pub struct ByteCode(pub Vec<OpCode>);
+impl std::ops::Deref for ByteCode {
+ type Target = Vec<OpCode>;
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl ByteCode {
+ pub fn len(&self) -> usize {
+ self.0.len()
+ }
+}
+
+impl Index<usize> for ByteCode {
+ type Output = OpCode;
+
+ fn index(&self, index: usize) -> &Self::Output {
+ &self.0[index]
+ }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct InstructionPointer {
+ pub word: usize,
+ pub offset: usize,
+}
+
+impl InstructionPointer {
+ pub fn new() -> Self {
+ Self {
+ word: 0,
+ offset: 0,
+ }
+ }
+}
+
+#[derive(Debug)]
+pub struct DataStack(pub Vec<i32>);
+
+#[derive(Debug)]
+pub struct CallStack(pub Vec<InstructionPointer>);
+
+#[derive(Clone, Debug)]
+pub struct WordList(pub Vec<ByteCode>);
+impl std::ops::Deref for WordList {
+ type Target = Vec<ByteCode>;
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+#[derive(Debug)]
+pub struct VM {
+ // todo: don't be pub, probably
+ pub stack: DataStack,
+ pub callstack: CallStack,
+ pub wordlist: WordList,
+ pub ip: InstructionPointer,
+}
+
+#[derive(Debug)]
+pub enum RuntimeError {
+ StackUnderflow,
+}
+
+impl VM {
+ pub fn new(wordlist: WordList) -> Self {
+ Self {
+ stack: DataStack(Vec::new()),
+ callstack: CallStack(Vec::new()),
+ ip: InstructionPointer::new(),
+ wordlist,
+ }
+ }
+
+ pub fn jmp(&mut self, word: usize) {
+ self.ip.word = word;
+ self.ip.offset = 0;
+ }
+
+ pub fn call(&mut self, word: usize) {
+ self.callstack.0.push(self.ip);
+ self.jmp(word);
+ }
+
+ // bool is still_running
+ pub fn tick(&mut self) -> Result<bool, RuntimeError> {
+ let bc = &self.wordlist.0[self.ip.word];
+ match bc[self.ip.offset] {
+ OpCode::Num(n) => self.stack.0.push(n),
+ OpCode::Str(start, end) => debug!("got str: {} to {}", start, end),
+ OpCode::Add => {
+ let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ self.stack.0.push(n2 + n1);
+ },
+ OpCode::Sub => {
+ let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ self.stack.0.push(n2 - n1);
+ },
+ OpCode::Mul => {
+ let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ self.stack.0.push(n2 * n1);
+ },
+ OpCode::Div => {
+ let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ self.stack.0.push(n2 / n1);
+ },
+ OpCode::Dup => {
+ let n = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ self.stack.0.push(n);
+ self.stack.0.push(n);
+ },
+ OpCode::Drop => {
+ self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ },
+ OpCode::EQ => {
+ let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let v = if n2 == n1 { -1 } else { 0 };
+ self.stack.0.push(v);
+ },
+ OpCode::GT => {
+ let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let v = if n2 > n1 { -1 } else { 0 };
+ self.stack.0.push(v);
+ },
+ OpCode::GTE => {
+ let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let v = if n2 >= n1 { -1 } else { 0 };
+ self.stack.0.push(v);
+ },
+ OpCode::LT => {
+ let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let v = if n2 < n1 { -1 } else { 0 };
+ self.stack.0.push(v);
+ },
+ OpCode::LTE => {
+ let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ let v = if n2 <= n1 { -1 } else { 0 };
+ self.stack.0.push(v);
+ },
+ OpCode::Ret => {
+ self.ip = self.callstack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ },
+ OpCode::Call(i) => {
+ self.call(i);
+ // skip the offset increment
+ return Ok(true)
+ },
+ OpCode::TCall(i) => {
+ self.jmp(i);
+ // skip the offset increment
+ return Ok(true)
+ },
+ OpCode::If(true_clause, false_clause) => {
+ let flag = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ if flag != 0 {
+ self.call(true_clause);
+ return Ok(true)
+ } else if let Some(false_clause) = false_clause {
+ self.call(false_clause);
+ return Ok(true)
+ }
+ },
+ OpCode::TIf(true_clause, false_clause) => {
+ let flag = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?;
+ if flag != 0 {
+ self.jmp(true_clause);
+ return Ok(true)
+ } else if let Some(false_clause) = false_clause {
+ self.jmp(false_clause);
+ return Ok(true)
+ }
+ },
+ }
+ self.ip.offset += 1;
+ Ok(self.ip.offset < self.wordlist.0[self.ip.word].len())
+ }
+
+ pub fn run(&mut self) -> Result<usize, RuntimeError> {
+ let mut count = 0;
+ while self.tick()? { count += 1 }
+ Ok(count)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use super::OpCode;
+
+ #[test]
+ fn simple_ticks() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Num(3), OpCode::Add]));
+ let mut vm = VM::new(wordlist);
+ assert_eq!(vm.tick()?, true, "first arg running");
+ assert_eq!(vm.ip.word, 0);
+ assert_eq!(vm.ip.offset, 1);
+ assert_eq!(vm.stack.0.len(), 1);
+ assert_eq!(vm.stack.0[0], 2, "first argument");
+ assert_eq!(vm.tick()?, true, "second arg running");
+ assert_eq!(vm.ip.word, 0);
+ assert_eq!(vm.ip.offset, 2);
+ assert_eq!(vm.stack.0.len(), 2);
+ assert_eq!(vm.stack.0[1], 3, "second argument");
+ assert_eq!(vm.tick()?, false, "stopped after addition");
+ assert_eq!(vm.ip.word, 0);
+ assert_eq!(vm.ip.offset, 3);
+ assert_eq!(vm.stack.0.len(), 1);
+ assert_eq!(vm.stack.0[0], 5, "result of addition");
+
+ Ok(())
+ }
+
+ #[test]
+ fn custom_word() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![
+ OpCode::Num(2), OpCode::Num(3), OpCode::Call(1), OpCode::Num(-2), OpCode::Add,
+ OpCode::Sub, OpCode::Ret,
+ ]));
+ let mut vm = VM::new(wordlist);
+ // "sub" definition
+ vm.wordlist.0.push(ByteCode(vec![
+ OpCode::Sub, OpCode::Ret,
+ ]));
+
+ vm.tick()?;
+ assert_eq!(vm.ip.word, 0);
+ assert_eq!(vm.ip.offset, 1);
+ assert_eq!(vm.stack.0.len(), 1);
+ assert_eq!(vm.stack.0[0], 2, "first argument");
+ vm.tick()?;
+ assert_eq!(vm.ip.word, 0);
+ assert_eq!(vm.ip.offset, 2);
+ assert_eq!(vm.stack.0.len(), 2);
+ assert_eq!(vm.stack.0[1], 3, "second argument");
+
+ vm.tick()?; // call sub
+ assert_eq!(vm.ip.word, 1);
+ assert_eq!(vm.ip.offset, 0);
+ assert_eq!(vm.stack.0.len(), 2);
+
+ vm.tick()?; // -
+ assert_eq!(vm.ip.word, 1);
+ assert_eq!(vm.ip.offset, 1);
+ assert_eq!(vm.stack.0.len(), 1);
+
+ vm.tick()?; // ret
+ assert_eq!(vm.ip.word, 0);
+ assert_eq!(vm.ip.offset, 3);
+ assert_eq!(vm.stack.0.len(), 1);
+ assert_eq!(vm.stack.0[0], -1, "result of sub word");
+
+ vm.tick()?; // 2
+ assert_eq!(vm.ip.word, 0);
+ assert_eq!(vm.ip.offset, 4);
+ assert_eq!(vm.stack.0.len(), 2);
+ assert_eq!(vm.stack.0[1], -2, "post sub arg");
+
+ vm.tick()?; // -
+ assert_eq!(vm.ip.word, 0);
+ assert_eq!(vm.ip.offset, 5);
+ assert_eq!(vm.stack.0.len(), 1);
+ assert_eq!(vm.stack.0[0], -3, "add opcode result");
+
+ Ok(())
+ }
+
+ #[test]
+ fn tail_call() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![OpCode::Call(1)]));
+ wordlist.0.push(ByteCode(vec![OpCode::TCall(2), OpCode::Ret]));
+ wordlist.0.push(ByteCode(vec![OpCode::Ret]));
+ let mut vm = VM::new(wordlist);
+
+ vm.tick()?; // call
+ assert_eq!(vm.callstack.0.len(), 1, "callstack after call");
+ vm.tick()?; // tcall
+ assert_eq!(vm.callstack.0.len(), 1, "callstack after tcall");
+ vm.tick()?; // ret
+ assert_eq!(vm.callstack.0.len(), 0, "callstack after ret");
+ Ok(())
+ }
+
+ #[test]
+ fn if_then_true() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![OpCode::Num(-1), OpCode::If(1, None), OpCode::Num(0)]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret]));
+ let mut vm = VM::new(wordlist);
+
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push -1 stack len");
+ assert_eq!(vm.stack.0[0], -1, "push -1 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "if stack ");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 1 stack len");
+ assert_eq!(vm.stack.0[0], 1, "push 1 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "ret stack len");
+ assert_eq!(vm.stack.0[0], 1, "ret stack val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 2, "push 0 stack len");
+ assert_eq!(vm.stack.0[1], 0, "push 0 stack val");
+
+ Ok(())
+ }
+
+ #[test]
+ fn if_then_false() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![OpCode::Num(0), OpCode::If(1, None), OpCode::Num(-1)]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret]));
+ let mut vm = VM::new(wordlist);
+
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 0 stack len");
+ assert_eq!(vm.stack.0[0], 0, "push 0 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "if stack len");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push -1 stack len");
+ assert_eq!(vm.stack.0[0], -1, "push -1 val");
+
+ Ok(())
+ }
+
+ #[test]
+ fn if_else_then_true() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![OpCode::Num(-1), OpCode::If(1, Some(2)), OpCode::Num(-2)]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Ret]));
+ let mut vm = VM::new(wordlist);
+
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push -1 stack len");
+ assert_eq!(vm.stack.0[0], -1, "push -1 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "if stack len");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 1 stack len");
+ assert_eq!(vm.stack.0[0], 1, "push 1 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "ret stack len");
+ assert_eq!(vm.stack.0[0], 1, "ret val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 2, "push -2 stack len");
+ assert_eq!(vm.stack.0[1], -2, "push -2 val");
+
+ Ok(())
+ }
+
+ #[test]
+ fn if_else_then_false() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![OpCode::Num(0), OpCode::If(1, Some(2)), OpCode::Num(-2)]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Ret]));
+ let mut vm = VM::new(wordlist);
+
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 0 stack len");
+ assert_eq!(vm.stack.0[0], 0, "push 0 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "if");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 2 stack len");
+ assert_eq!(vm.stack.0[0], 2, "push 2 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "ret");
+ assert_eq!(vm.stack.0[0], 2, "ret");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 2, "push -2 stack len");
+ assert_eq!(vm.stack.0[1], -2, "push -2 val");
+
+ Ok(())
+ }
+
+ #[test]
+ fn tail_if_then_true() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![OpCode::Call(1), OpCode::Num(0)]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(-1), OpCode::TIf(2, None), OpCode::Ret]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret]));
+ let mut vm = VM::new(wordlist);
+
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "call(1) stack len");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push -1 stack len");
+ assert_eq!(vm.stack.0[0], -1, "push -1 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "tif stack ");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 1 stack len");
+ assert_eq!(vm.stack.0[0], 1, "push 1 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "ret stack len");
+ assert_eq!(vm.stack.0[0], 1, "ret stack val");
+ vm.tick()?; // no ret on wordlist[1] since it's ‘tif’
+ assert_eq!(vm.stack.0.len(), 2, "push 0 stack len");
+ assert_eq!(vm.stack.0[1], 0, "push 0 stack val");
+
+ Ok(())
+ }
+
+ #[test]
+ fn tail_if_then_false() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![OpCode::Call(1), OpCode::Num(0)]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(0), OpCode::TIf(2, None), OpCode::Ret]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret]));
+ let mut vm = VM::new(wordlist);
+
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "call(1) stack len");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 0 stack len");
+ assert_eq!(vm.stack.0[0], 0, "push 0 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "tif stack len");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "ret stack len");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 0 stack len");
+ assert_eq!(vm.stack.0[0], 0, "push 0 val");
+
+ Ok(())
+ }
+
+ #[test]
+ fn tail_if_else_then_true() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![OpCode::Call(1), OpCode::Num(-2)]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(-1), OpCode::TIf(2, Some(3)), OpCode::Ret]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Ret]));
+ let mut vm = VM::new(wordlist);
+
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "call(1) stack len");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push -1 stack len");
+ assert_eq!(vm.stack.0[0], -1, "push -1 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "tif stack len");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 1 stack len");
+ assert_eq!(vm.stack.0[0], 1, "push 1 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "ret stack len");
+ assert_eq!(vm.stack.0[0], 1, "ret val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 2, "push -2 stack len");
+ assert_eq!(vm.stack.0[1], -2, "push -2 val");
+
+ Ok(())
+ }
+
+ #[test]
+ fn tail_if_else_then_false() -> Result<(), RuntimeError> {
+ let mut wordlist = WordList(vec![]);
+ wordlist.0.push(ByteCode(vec![OpCode::Call(1), OpCode::Num(-2)]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(0), OpCode::TIf(2, Some(3)), OpCode::Ret]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(1), OpCode::Ret]));
+ wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Ret]));
+ let mut vm = VM::new(wordlist);
+
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "call(1) stack len");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 0 stack len");
+ assert_eq!(vm.stack.0[0], 0, "push 0 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 0, "tif stack len");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "push 2 stack len");
+ assert_eq!(vm.stack.0[0], 2, "push 2 val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 1, "ret stack len");
+ assert_eq!(vm.stack.0[0], 2, "ret val");
+ vm.tick()?;
+ assert_eq!(vm.stack.0.len(), 2, "push -2 stack len");
+ assert_eq!(vm.stack.0[1], -2, "push -2 val");
+
+ Ok(())
+ }
+}