From bd33129b72372512a4b8a570c101da01e8877fff Mon Sep 17 00:00:00 2001 From: Brian Cully Date: Tue, 19 Aug 2025 13:07:41 -0400 Subject: interpreter now uses same structure as parser --- src/forth/interp.rs | 93 ++++++++++++++++++++++++++++++++--------------------- src/forth/parser.rs | 35 +++++++++----------- 2 files changed, 73 insertions(+), 55 deletions(-) (limited to 'src') diff --git a/src/forth/interp.rs b/src/forth/interp.rs index 2ade22d..406c424 100644 --- a/src/forth/interp.rs +++ b/src/forth/interp.rs @@ -5,8 +5,7 @@ use std::ops::Index; pub enum OpCode { Num(i32), Str(usize, usize), - WordI(usize), - Word(&'static str), + Call(usize), Add, Sub, Ret, @@ -29,8 +28,21 @@ impl Index for ByteCode { } } +// .0 is wordlist entry's bytecode, .1 is index into that bytecode #[derive(Debug, Copy, Clone, PartialEq)] -pub(super) struct InstructionPointer(pub(super) usize); +pub(super) struct InstructionPointer { + pub(super) word: usize, + pub(super) offset: usize, +} + +impl InstructionPointer { + pub fn new() -> Self { + Self { + word: 0, + offset: 0, + } + } +} #[derive(Debug)] pub(super) struct DataStack(pub(super) Vec); @@ -39,7 +51,7 @@ pub(super) struct DataStack(pub(super) Vec); pub(super) struct CallStack(pub(super) Vec); #[derive(Debug)] -pub(super) struct WordList(pub(super) HashMap<&'static str, InstructionPointer>); +pub(super) struct WordList(pub(super) Vec); #[derive(Debug)] pub(super) struct WordCatalog<'a>(pub(super) HashMap<&'a str, usize>); @@ -49,14 +61,12 @@ pub struct Interp { stack: DataStack, callstack: CallStack, wordlist: WordList, - bytecode: ByteCode, ip: InstructionPointer, } #[derive(Debug)] pub enum RuntimeError { StackUnderflow, - UndefinedWord, } impl Interp { @@ -64,14 +74,14 @@ impl Interp { Self { stack: DataStack(Vec::new()), callstack: CallStack(Vec::new()), - wordlist: WordList(HashMap::new()), - bytecode: ByteCode(Vec::new()), - ip: InstructionPointer(0), + wordlist: WordList(vec![]), + ip: InstructionPointer::new(), } } pub fn tick(&mut self) -> Result<(), RuntimeError> { - match self.bytecode[self.ip.0] { + let bc = &self.wordlist.0[self.ip.word]; + match bc[self.ip.offset] { OpCode::Num(n) => self.stack.0.push(n), OpCode::Str(start, end) => eprintln!("got str: {} to {}", start, end), OpCode::Add => { @@ -85,19 +95,18 @@ impl Interp { self.stack.0.push(n2 - n1); }, OpCode::Ret => { - let ip = self.callstack.0.pop().ok_or(RuntimeError::StackUnderflow)?; - self.ip = ip; - }, - OpCode::Word(w) => { - let ip = self.wordlist.0.get(w).ok_or(RuntimeError::UndefinedWord)?; - self.callstack.0.push(self.ip); - self.ip.0 = ip.0 - 1; // we auto-increment at the end + self.ip = self.callstack.0.pop().ok_or(RuntimeError::StackUnderflow)?; }, - OpCode::WordI(i) => { + OpCode::Call(i) => { eprintln!("should jump to word based on index {}", i); + self.callstack.0.push(self.ip); + self.ip.word = i; + self.ip.offset = 0; + // skip the offset increment + return Ok(()) } } - self.ip.0 += 1; + self.ip.offset += 1; Ok(()) } } @@ -111,17 +120,20 @@ mod tests { fn simple_ticks() -> Result<(), RuntimeError> { let mut interp = Interp::new(); - interp.bytecode = ByteCode(vec![OpCode::Num(2), OpCode::Num(3), OpCode::Add]); + interp.wordlist.0.push(ByteCode(vec![OpCode::Num(2), OpCode::Num(3), OpCode::Add])); interp.tick()?; - assert_eq!(interp.ip.0, 1); + assert_eq!(interp.ip.word, 0); + assert_eq!(interp.ip.offset, 1); assert_eq!(interp.stack.0.len(), 1); assert_eq!(interp.stack.0[0], 2, "first argument"); interp.tick()?; - assert_eq!(interp.ip.0, 2); + assert_eq!(interp.ip.word, 0); + assert_eq!(interp.ip.offset, 2); assert_eq!(interp.stack.0.len(), 2); assert_eq!(interp.stack.0[1], 3, "second argument"); interp.tick()?; - assert_eq!(interp.ip.0, 3); + assert_eq!(interp.ip.word, 0); + assert_eq!(interp.ip.offset, 3); assert_eq!(interp.stack.0.len(), 1); assert_eq!(interp.stack.0[0], 5, "result of addition"); @@ -131,42 +143,51 @@ mod tests { #[test] fn custom_word() -> Result<(), RuntimeError> { let mut interp = Interp::new(); - interp.bytecode = ByteCode(vec![ - OpCode::Num(2), OpCode::Num(3), OpCode::Word("sub"), OpCode::Num(-2), OpCode::Add, - // "sub" definition + interp.wordlist.0.push(ByteCode(vec![ + OpCode::Num(2), OpCode::Num(3), OpCode::Call(1), OpCode::Num(-2), OpCode::Add, + OpCode::Sub, OpCode::Ret, + ])); + // "sub" definition + interp.wordlist.0.push(ByteCode(vec![ OpCode::Sub, OpCode::Ret, - ]); - // 5 is offset of w - interp.wordlist.0.insert("sub", InstructionPointer(5)); + ])); + interp.tick()?; - assert_eq!(interp.ip.0, 1); + assert_eq!(interp.ip.word, 0); + assert_eq!(interp.ip.offset, 1); assert_eq!(interp.stack.0.len(), 1); assert_eq!(interp.stack.0[0], 2, "first argument"); interp.tick()?; - assert_eq!(interp.ip.0, 2); + assert_eq!(interp.ip.word, 0); + assert_eq!(interp.ip.offset, 2); assert_eq!(interp.stack.0.len(), 2); assert_eq!(interp.stack.0[1], 3, "second argument"); interp.tick()?; // call sub - assert_eq!(interp.ip.0, 5); + assert_eq!(interp.ip.word, 1); + assert_eq!(interp.ip.offset, 0); assert_eq!(interp.stack.0.len(), 2); interp.tick()?; // - - assert_eq!(interp.ip.0, 6); + assert_eq!(interp.ip.word, 1); + assert_eq!(interp.ip.offset, 1); assert_eq!(interp.stack.0.len(), 1); interp.tick()?; // ret - assert_eq!(interp.ip.0, 3); + assert_eq!(interp.ip.word, 0); + assert_eq!(interp.ip.offset, 3); assert_eq!(interp.stack.0.len(), 1); assert_eq!(interp.stack.0[0], -1, "result of sub word"); interp.tick()?; // 2 - assert_eq!(interp.ip.0, 4); + assert_eq!(interp.ip.word, 0); + assert_eq!(interp.ip.offset, 4); assert_eq!(interp.stack.0.len(), 2); assert_eq!(interp.stack.0[1], -2, "post sub arg"); interp.tick()?; // - - assert_eq!(interp.ip.0, 5); + assert_eq!(interp.ip.word, 0); + assert_eq!(interp.ip.offset, 5); assert_eq!(interp.stack.0.len(), 1); assert_eq!(interp.stack.0[0], -3, "add opcode result"); diff --git a/src/forth/parser.rs b/src/forth/parser.rs index 26a5686..9e444c9 100644 --- a/src/forth/parser.rs +++ b/src/forth/parser.rs @@ -1,4 +1,4 @@ -use super::interp::{ByteCode, OpCode, WordCatalog}; +use super::interp::{ByteCode, OpCode, WordCatalog, WordList}; use std::collections::HashMap; use std::iter::{Enumerate, Iterator}; @@ -25,14 +25,11 @@ impl std::error::Error for ParseError {} type ParseResult = Result; -// a list of all bytecode defs, with the main routine at ‘0’. -type ParserWordList = Vec; - #[derive(Debug)] pub struct Parser<'a> { text: &'a str, enumerator: Enumerate>, - wordlist: ParserWordList, + wordlist: WordList, wordalog: WordCatalog<'a>, namestack: Vec<&'a str>, } @@ -40,13 +37,13 @@ pub struct Parser<'a> { impl<'a> Parser<'a> { pub fn new(text: &'a str) -> Self { let enumerator = text.chars().enumerate(); - let mut wordlist = vec![]; + let mut wl = vec![]; // main routine is always the first entry. - wordlist.push(ByteCode(vec![])); + wl.push(ByteCode(vec![])); Self { text, enumerator, - wordlist, + wordlist: WordList(wl), wordalog: WordCatalog(HashMap::new()), namestack: vec![], } @@ -78,7 +75,7 @@ impl<'a> Parser<'a> { None => &0, Some(name) => self.wordalog.0.get(name).ok_or(ParseError::NameStackEmpty)?, }; - self.wordlist[*word_index].0.push(op); + self.wordlist.0[*word_index].0.push(op); Ok(()) } @@ -87,7 +84,7 @@ impl<'a> Parser<'a> { if let Ok(i) = word.parse::() { self.bc_push(OpCode::Num(i))?; } else if let Some(i) = self.wordalog.0.get(word) { - self.bc_push(OpCode::WordI(*i))?; + self.bc_push(OpCode::Call(*i))?; } else { match word { r#"s""# => { @@ -100,8 +97,8 @@ impl<'a> Parser<'a> { ":" => { let (name, _, _) = self.next_word().ok_or(ParseError::EOF)?; self.namestack.push(name); - self.wordalog.0.insert(name, self.wordlist.len()); - self.wordlist.push(ByteCode(vec![])); + self.wordalog.0.insert(name, self.wordlist.0.len()); + self.wordlist.0.push(ByteCode(vec![])); }, ";" => { self.bc_push(OpCode::Ret)?; @@ -131,7 +128,7 @@ mod tests { #[test] fn literal_num() { let p = parser_for("1\n"); - let main = &p.wordlist[0]; + let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Num(1)); } @@ -139,7 +136,7 @@ mod tests { #[test] fn literal_string() { let p = parser_for(r#"s" hello there""#); - let main = &p.wordlist[0]; + let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Str(3, 14)); } @@ -147,7 +144,7 @@ mod tests { #[test] fn add_opcode() { let p = parser_for("+\n"); - let main = &p.wordlist[0]; + let main = &p.wordlist.0[0]; eprintln!("main {:?}", main); assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Add); @@ -156,7 +153,7 @@ mod tests { #[test] fn sub_opcode() { let p = parser_for("-\n"); - let main = &p.wordlist[0]; + let main = &p.wordlist.0[0]; eprintln!("main {:?}", main); assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Sub); @@ -165,16 +162,16 @@ mod tests { #[test] fn def_word() { let p = parser_for(": add2 2 + ; 3 add2\n"); - let main = &p.wordlist[0]; + let main = &p.wordlist.0[0]; eprintln!("main {:?}", main); let add2_index = p.wordalog.0.get("add2").expect("add2 has entry in wordlist"); - let add2 = &p.wordlist[*add2_index]; + let add2 = &p.wordlist.0[*add2_index]; eprintln!("add2 {:?}", add2); assert_eq!(main.len(), 2); assert_eq!(main[0], OpCode::Num(3)); - assert_eq!(main[1], OpCode::WordI(*add2_index)); + assert_eq!(main[1], OpCode::Call(*add2_index)); assert_eq!(add2.len(), 3); assert_eq!(add2[0], OpCode::Num(2)); assert_eq!(add2[1], OpCode::Add); -- cgit v1.3