From 2eecdb1a4bae0fbced05f4c375cd6126929fb1fb Mon Sep 17 00:00:00 2001 From: Brian Cully Date: Thu, 7 Aug 2025 14:16:43 -0400 Subject: parser now emits list of words and hashmap of string→index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wordlist is just a vector of bytecode entries. to associate a word to an entry in the wordlist, use the word catalog to look up by name. --- src/forth/interp.rs | 175 +++++++++++++++++++++++++++++++++++++++++++++++++ src/forth/mod.rs | 2 + src/forth/parser.rs | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 24 +++++++ 4 files changed, 384 insertions(+) create mode 100644 src/forth/interp.rs create mode 100644 src/forth/mod.rs create mode 100644 src/forth/parser.rs create mode 100644 src/lib.rs (limited to 'src') diff --git a/src/forth/interp.rs b/src/forth/interp.rs new file mode 100644 index 0000000..2ade22d --- /dev/null +++ b/src/forth/interp.rs @@ -0,0 +1,175 @@ +use std::collections::HashMap; +use std::ops::Index; + +#[derive(Clone, Debug, PartialEq)] +pub enum OpCode { + Num(i32), + Str(usize, usize), + WordI(usize), + Word(&'static str), + Add, + Sub, + Ret, +} + +#[derive(Debug)] +pub(super) struct ByteCode(pub(super) Vec); + +impl ByteCode { + pub fn len(&self) -> usize { + self.0.len() + } +} + +impl Index for ByteCode { + type Output = OpCode; + + fn index(&self, index: usize) -> &Self::Output { + &self.0[index] + } +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub(super) struct InstructionPointer(pub(super) usize); + +#[derive(Debug)] +pub(super) struct DataStack(pub(super) Vec); + +#[derive(Debug)] +pub(super) struct CallStack(pub(super) Vec); + +#[derive(Debug)] +pub(super) struct WordList(pub(super) HashMap<&'static str, InstructionPointer>); + +#[derive(Debug)] +pub(super) struct WordCatalog<'a>(pub(super) HashMap<&'a str, usize>); + +#[derive(Debug)] +pub struct Interp { + stack: DataStack, + callstack: CallStack, + wordlist: WordList, + bytecode: ByteCode, + ip: InstructionPointer, +} + +#[derive(Debug)] +pub enum RuntimeError { + StackUnderflow, + UndefinedWord, +} + +impl Interp { + pub fn new() -> Self { + Self { + stack: DataStack(Vec::new()), + callstack: CallStack(Vec::new()), + wordlist: WordList(HashMap::new()), + bytecode: ByteCode(Vec::new()), + ip: InstructionPointer(0), + } + } + + pub fn tick(&mut self) -> Result<(), RuntimeError> { + match self.bytecode[self.ip.0] { + OpCode::Num(n) => self.stack.0.push(n), + OpCode::Str(start, end) => eprintln!("got str: {} to {}", start, end), + OpCode::Add => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.stack.0.push(n2 + n1); + }, + OpCode::Sub => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.stack.0.push(n2 - n1); + }, + OpCode::Ret => { + let ip = self.callstack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.ip = ip; + }, + OpCode::Word(w) => { + let ip = self.wordlist.0.get(w).ok_or(RuntimeError::UndefinedWord)?; + self.callstack.0.push(self.ip); + self.ip.0 = ip.0 - 1; // we auto-increment at the end + }, + OpCode::WordI(i) => { + eprintln!("should jump to word based on index {}", i); + } + } + self.ip.0 += 1; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::OpCode; + + #[test] + fn simple_ticks() -> Result<(), RuntimeError> { + + let mut interp = Interp::new(); + interp.bytecode = ByteCode(vec![OpCode::Num(2), OpCode::Num(3), OpCode::Add]); + interp.tick()?; + assert_eq!(interp.ip.0, 1); + assert_eq!(interp.stack.0.len(), 1); + assert_eq!(interp.stack.0[0], 2, "first argument"); + interp.tick()?; + assert_eq!(interp.ip.0, 2); + assert_eq!(interp.stack.0.len(), 2); + assert_eq!(interp.stack.0[1], 3, "second argument"); + interp.tick()?; + assert_eq!(interp.ip.0, 3); + assert_eq!(interp.stack.0.len(), 1); + assert_eq!(interp.stack.0[0], 5, "result of addition"); + + Ok(()) + } + + #[test] + fn custom_word() -> Result<(), RuntimeError> { + let mut interp = Interp::new(); + interp.bytecode = ByteCode(vec![ + OpCode::Num(2), OpCode::Num(3), OpCode::Word("sub"), OpCode::Num(-2), OpCode::Add, + // "sub" definition + OpCode::Sub, OpCode::Ret, + ]); + // 5 is offset of w + interp.wordlist.0.insert("sub", InstructionPointer(5)); + interp.tick()?; + assert_eq!(interp.ip.0, 1); + assert_eq!(interp.stack.0.len(), 1); + assert_eq!(interp.stack.0[0], 2, "first argument"); + interp.tick()?; + assert_eq!(interp.ip.0, 2); + assert_eq!(interp.stack.0.len(), 2); + assert_eq!(interp.stack.0[1], 3, "second argument"); + + interp.tick()?; // call sub + assert_eq!(interp.ip.0, 5); + assert_eq!(interp.stack.0.len(), 2); + + interp.tick()?; // - + assert_eq!(interp.ip.0, 6); + assert_eq!(interp.stack.0.len(), 1); + + interp.tick()?; // ret + assert_eq!(interp.ip.0, 3); + assert_eq!(interp.stack.0.len(), 1); + assert_eq!(interp.stack.0[0], -1, "result of sub word"); + + interp.tick()?; // 2 + assert_eq!(interp.ip.0, 4); + assert_eq!(interp.stack.0.len(), 2); + assert_eq!(interp.stack.0[1], -2, "post sub arg"); + + interp.tick()?; // - + assert_eq!(interp.ip.0, 5); + assert_eq!(interp.stack.0.len(), 1); + assert_eq!(interp.stack.0[0], -3, "add opcode result"); + + Ok(()) + } +} diff --git a/src/forth/mod.rs b/src/forth/mod.rs new file mode 100644 index 0000000..fdda066 --- /dev/null +++ b/src/forth/mod.rs @@ -0,0 +1,2 @@ +mod interp; +mod parser; diff --git a/src/forth/parser.rs b/src/forth/parser.rs new file mode 100644 index 0000000..26a5686 --- /dev/null +++ b/src/forth/parser.rs @@ -0,0 +1,183 @@ +use super::interp::{ByteCode, OpCode, WordCatalog}; + +use std::collections::HashMap; +use std::iter::{Enumerate, Iterator}; +use std::str::Chars; + +#[derive(Debug)] +pub enum ParseError { + EOF, + NameStackEmpty, + MissingQuote, + UnknownWord(String), +} +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::EOF => write!(f, "premature end-of-file"), + Self::NameStackEmpty => write!(f, "name stack empty"), + Self::MissingQuote => write!(f, "missing ending quote"), + Self::UnknownWord(word) => write!(f, "unknown word: {}", word), + } + } +} +impl std::error::Error for ParseError {} + +type ParseResult = Result; + +// a list of all bytecode defs, with the main routine at ‘0’. +type ParserWordList = Vec; + +#[derive(Debug)] +pub struct Parser<'a> { + text: &'a str, + enumerator: Enumerate>, + wordlist: ParserWordList, + wordalog: WordCatalog<'a>, + namestack: Vec<&'a str>, +} + +impl<'a> Parser<'a> { + pub fn new(text: &'a str) -> Self { + let enumerator = text.chars().enumerate(); + let mut wordlist = vec![]; + // main routine is always the first entry. + wordlist.push(ByteCode(vec![])); + Self { + text, + enumerator, + wordlist, + wordalog: WordCatalog(HashMap::new()), + namestack: vec![], + } + } + + // pull the next, whitespace-delimited word off the input stream. + fn next_word(&mut self) -> Option<(&'a str, usize, usize)> { + let mut start = 0; + let chars = + self.enumerator.by_ref() + .skip_while(|(i, c)| { + start = *i; + return c.is_whitespace() + }); + for (i, c) in chars { + if c.is_whitespace() { + let end = i; + let word = self.text.get(start..end).unwrap(); + return Some((word, start, end)) + } + } + None + } + + // push `op` onto the currently building bytecode, as determined + // by the top of the `namestack`. + fn bc_push(&mut self, op: OpCode) -> ParseResult<()> { + let word_index = match self.namestack.last() { + None => &0, + Some(name) => self.wordalog.0.get(name).ok_or(ParseError::NameStackEmpty)?, + }; + self.wordlist[*word_index].0.push(op); + Ok(()) + } + + fn parse(&mut self) -> ParseResult<()> { + while let Some((word, _start, end)) = self.next_word() { + if let Ok(i) = word.parse::() { + self.bc_push(OpCode::Num(i))?; + } else if let Some(i) = self.wordalog.0.get(word) { + self.bc_push(OpCode::WordI(*i))?; + } else { + match word { + r#"s""# => { + let (s_end, _) = + self.enumerator + .find(|(_i, c)| return *c == '"') + .ok_or(ParseError::MissingQuote)?; + self.bc_push(OpCode::Str(end+1, s_end))?; + }, + ":" => { + let (name, _, _) = self.next_word().ok_or(ParseError::EOF)?; + self.namestack.push(name); + self.wordalog.0.insert(name, self.wordlist.len()); + self.wordlist.push(ByteCode(vec![])); + }, + ";" => { + self.bc_push(OpCode::Ret)?; + self.namestack.pop(); + }, + "+" => self.bc_push(OpCode::Add)?, + "-" => self.bc_push(OpCode::Sub)?, + other => return Err(ParseError::UnknownWord(String::from(other))), + } + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::super::interp::OpCode; + + fn parser_for(text: &str) -> Parser { + let mut p = Parser::new(text); + p.parse().expect("badparse"); + p + } + + #[test] + fn literal_num() { + let p = parser_for("1\n"); + let main = &p.wordlist[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Num(1)); + } + + #[test] + fn literal_string() { + let p = parser_for(r#"s" hello there""#); + let main = &p.wordlist[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Str(3, 14)); + } + + #[test] + fn add_opcode() { + let p = parser_for("+\n"); + let main = &p.wordlist[0]; + eprintln!("main {:?}", main); + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Add); + } + + #[test] + fn sub_opcode() { + let p = parser_for("-\n"); + let main = &p.wordlist[0]; + eprintln!("main {:?}", main); + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Sub); + } + + #[test] + fn def_word() { + let p = parser_for(": add2 2 + ; 3 add2\n"); + let main = &p.wordlist[0]; + eprintln!("main {:?}", main); + + let add2_index = p.wordalog.0.get("add2").expect("add2 has entry in wordlist"); + let add2 = &p.wordlist[*add2_index]; + eprintln!("add2 {:?}", add2); + + assert_eq!(main.len(), 2); + assert_eq!(main[0], OpCode::Num(3)); + assert_eq!(main[1], OpCode::WordI(*add2_index)); + assert_eq!(add2.len(), 3); + assert_eq!(add2[0], OpCode::Num(2)); + assert_eq!(add2[1], OpCode::Add); + assert_eq!(add2[2], OpCode::Ret); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..da1469b --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,24 @@ +use wasm_bindgen::prelude::*; + +mod forth; + +#[wasm_bindgen] +extern { + #[wasm_bindgen(js_namespace = console)] + fn debug(s: &str); +} + +#[wasm_bindgen(start)] +pub fn run() -> Result<(), JsValue> { + let window = web_sys::window().expect("no global `window` exists"); + let document = window.document().expect("should have `document` on window"); + let body = document.body().expect("document should have `body`"); + + let val = document.create_element("p")?; + val.set_text_content(Some("hi there")); + body.append_child(&val)?; + + debug("done in rust's run"); + + Ok(()) +} -- cgit v1.3