summaryrefslogtreecommitdiffstats
path: root/src/forth/parser.rs
diff options
context:
space:
mode:
authorBrian Cully <bjc@spork.org>2025-08-24 14:22:48 -0400
committerBrian Cully <bjc@spork.org>2025-08-24 15:36:55 -0400
commit0efb15a9eb706896cdabb9ca5d2b0c295c2dffcf (patch)
treed4bb89acd2ab1dd45ee40d9dffed5a4b73554b12 /src/forth/parser.rs
parent57f32dc1d36650a5f282f1ba3906e9772b89709d (diff)
downloadautomathon-0efb15a9eb706896cdabb9ca5d2b0c295c2dffcf.tar.gz
automathon-0efb15a9eb706896cdabb9ca5d2b0c295c2dffcf.zip
rename parser → compiler, interp → vm
reflects reality better
Diffstat (limited to 'src/forth/parser.rs')
-rw-r--r--src/forth/parser.rs384
1 files changed, 0 insertions, 384 deletions
diff --git a/src/forth/parser.rs b/src/forth/parser.rs
deleted file mode 100644
index c934c4a..0000000
--- a/src/forth/parser.rs
+++ /dev/null
@@ -1,384 +0,0 @@
-use super::interp::{ByteCode, OpCode, WordList};
-
-use std::collections::HashMap;
-use std::iter::{Enumerate, Iterator};
-use std::str::Chars;
-
-#[derive(Debug)]
-pub enum ParseError {
- EOF,
- DefStackEmpty,
- MissingIf,
- MissingQuote,
- UnknownWord(String),
-}
-impl std::fmt::Display for ParseError {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- match self {
- Self::EOF => write!(f, "premature end-of-file"),
- Self::DefStackEmpty => write!(f, "def stack empty"),
- Self::MissingIf => write!(f, "missing if"),
- Self::MissingQuote => write!(f, "missing ending quote"),
- Self::UnknownWord(word) => write!(f, "unknown word: {}", word),
- }
- }
-}
-impl std::error::Error for ParseError {}
-
-type ParseResult<T> = Result<T, ParseError>;
-
-// todo: the annotations should be directly tied to the wordlist so
-// they can't get out of sync.
-#[derive(Debug)]
-pub struct Annotation {
- // (start, end) index in program text
- pub loc: (usize, usize),
-}
-
-#[derive(Debug)]
-pub struct WordCatalog<'a>(pub(super) HashMap<&'a str, usize>);
-
-#[derive(Debug)]
-pub enum IfClauses {
- True(usize),
- TrueFalse(usize, usize),
-}
-
-#[derive(Debug)]
-pub struct Parser<'a> {
- text: &'a str,
- enumerator: Enumerate<Chars<'a>>,
- // list of all word definitions, in the order defined. the main
- // routine is always in the first entry.
- // todo: don't be pub, have a method to extract a wordlist
- pub wordlist: WordList,
- pub annotations: Vec<Vec<Annotation>>,
- // catalog of word to word index in `wordlist`
- pub wordalog: WordCatalog<'a>,
- // holds a stack of indices into `wordlist` that are currently
- // being defined, with the top of stack being the most recent
- // definition.
- defstack: Vec<usize>,
- // number of clauses currently being defined for a particular ‘if’
- // construct. a stack is needed in order to allow for nesting.
- if_stack: Vec<(IfClauses, Annotation)>,
-}
-
-impl<'a> Parser<'a> {
- pub fn new(text: &'a str) -> Self {
- let mut wl = vec![];
- // main routine is always the first entry.
- wl.push(ByteCode(vec![]));
- Self {
- text,
- enumerator: text.chars().enumerate(),
- wordlist: WordList(wl),
- annotations: vec![vec![]],
- wordalog: WordCatalog(HashMap::new()),
- defstack: vec![],
- if_stack: vec![],
- }
- }
-
- // pull the next, whitespace-delimited word off the input stream.
- fn next_word(&mut self) -> Option<(&'a str, usize, usize)> {
- let (start, _c) =
- self.enumerator.by_ref()
- .find(|(_i, c)| !c.is_whitespace())?;
- let (end, _c) =
- self.enumerator.by_ref()
- .find(|(_i, c)| c.is_whitespace())?;
- let word = &self.text[start..end];
- Some((word, start, end))
- }
-
- // currently ‘active’ bytecode being built.
- fn bc_mut(&mut self) -> &mut ByteCode {
- let word_index = self.defstack.last().unwrap_or(&0);
- &mut self.wordlist.0[*word_index]
- }
-
- fn bc(&self) -> &ByteCode {
- let word_index = self.defstack.last().unwrap_or(&0);
- &self.wordlist.0[*word_index]
- }
-
- fn anno_mut(&mut self) -> &mut Vec<Annotation> {
- let word_index = self.defstack.last().unwrap_or(&0);
- &mut self.annotations[*word_index]
- }
-
- // push `op` onto the currently building bytecode, as determined
- // by the top of the `namestack`.
- fn bc_push(&mut self, op: OpCode, anno: Annotation) {
- self.bc_mut().0.push(op);
- self.anno_mut().push(anno);
- }
-
- pub fn parse(&mut self) -> ParseResult<()> {
- while let Some((word, start, end)) = self.next_word() {
- let anno = Annotation { loc: (start, end) };
- if let Ok(i) = word.parse::<i32>() {
- self.bc_push(OpCode::Num(i), anno);
- } else if let Some(i) = self.wordalog.0.get(word) {
- self.bc_push(OpCode::Call(*i), anno);
- } else {
- match word {
- r#"s""# => {
- let (s_end, _) =
- self.enumerator
- .find(|(_i, c)| return *c == '"')
- .ok_or(ParseError::MissingQuote)?;
- self.bc_push(OpCode::Str(end+1, s_end), anno);
- },
- ":" => {
- let (name, _, _) = self.next_word().ok_or(ParseError::EOF)?;
- self.wordalog.0.insert(name, self.wordlist.0.len());
- self.defstack.push(self.wordlist.0.len());
- self.wordlist.0.push(ByteCode(vec![]));
- self.annotations.push(vec![]);
- },
- ";" => {
- match self.bc().0.last() {
- Some(OpCode::Call(i)) => {
- let k = *i;
- self.bc_mut().0.pop();
- self.bc_mut().0.push(OpCode::TCall(k));
- },
- Some(OpCode::If(t, f)) => {
- let t = *t;
- let f = *f;
- self.bc_mut().0.pop();
- self.bc_mut().0.push(OpCode::TIf(t, f));
- // technically only needed if ‘f’ is None, but whatever.
- self.bc_push(OpCode::Ret, anno);
- },
- _ => self.bc_push(OpCode::Ret, anno),
- }
- self.defstack.pop().ok_or(ParseError::DefStackEmpty)?;
- },
- "if" => {
- let i = self.wordlist.0.len();
- self.wordlist.0.push(ByteCode(vec![]));
- self.annotations.push(vec![]);
- self.defstack.push(i);
-
- self.if_stack.push((IfClauses::True(i), anno));
- },
- "else" => {
- self.bc_push(OpCode::Ret, anno);
- self.defstack.pop();
-
- let i = self.wordlist.0.len();
- self.wordlist.0.push(ByteCode(vec![]));
- self.annotations.push(vec![]);
- self.defstack.push(i);
-
- let (true_clause, anno) = match self.if_stack.pop() {
- None => return Err(ParseError::MissingIf),
- Some((IfClauses::TrueFalse(_, _), _)) => return Err(ParseError::MissingIf),
- Some((IfClauses::True(cl), anno)) => (cl, anno),
- };
- self.if_stack.push((IfClauses::TrueFalse(true_clause, i), anno));
- },
- "then" => {
- self.bc_push(OpCode::Ret, anno);
- self.defstack.pop();
-
- match self.if_stack.pop() {
- None => return Err(ParseError::MissingIf),
- Some((IfClauses::True(true_clause), anno)) => {
- self.bc_push(OpCode::If(true_clause, None), anno);
- },
- Some((IfClauses::TrueFalse(true_clause, false_clause), anno)) => {
- self.bc_push(OpCode::If(true_clause, Some(false_clause)), anno);
- }
- }
- },
- "+" => self.bc_push(OpCode::Add, anno),
- "-" => self.bc_push(OpCode::Sub, anno),
- "*" => self.bc_push(OpCode::Mul, anno),
- "/" => self.bc_push(OpCode::Div, anno),
- "dup" => self.bc_push(OpCode::Dup, anno),
- "drop" => self.bc_push(OpCode::Drop, anno),
- "=" => self.bc_push(OpCode::EQ, anno),
- ">" => self.bc_push(OpCode::GT, anno),
- ">=" => self.bc_push(OpCode::GTE, anno),
- "<" => self.bc_push(OpCode::LT, anno),
- "<=" => self.bc_push(OpCode::LTE, anno),
- other => return Err(ParseError::UnknownWord(String::from(other))),
- }
- }
- }
- Ok(())
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
- use super::super::interp::OpCode;
-
- fn parser_for(text: &str) -> Parser {
- let mut p = Parser::new(text);
- p.parse().expect("badparse");
- p
- }
-
- fn eprintwordlist(wordlist: &WordList) {
- for i in 0..wordlist.0.len() {
- eprintln!("wordlist[{}]: {:?}", i, wordlist.0[i]);
- }
- }
-
- #[test]
- fn literal_num() {
- let p = parser_for("1\n");
- let main = &p.wordlist.0[0];
- assert_eq!(main.len(), 1);
- assert_eq!(main[0], OpCode::Num(1));
- }
-
- #[test]
- fn literal_string() {
- let p = parser_for(r#"s" hello there""#);
- let main = &p.wordlist.0[0];
- assert_eq!(main.len(), 1);
- assert_eq!(main[0], OpCode::Str(3, 14));
- }
-
- #[test]
- fn add_opcode() {
- let p = parser_for("+\n");
- let main = &p.wordlist.0[0];
- assert_eq!(main.len(), 1);
- assert_eq!(main[0], OpCode::Add);
- }
-
- #[test]
- fn sub_opcode() {
- let p = parser_for("-\n");
- let main = &p.wordlist.0[0];
- assert_eq!(main.len(), 1);
- assert_eq!(main[0], OpCode::Sub);
- }
-
- #[test]
- fn def_word() {
- let p = parser_for(": add2 2 + ; 3 add2\n");
- let main = &p.wordlist.0[0];
- let add2_index = p.wordalog.0.get("add2").expect("add2 has entry in wordlist");
- let add2 = &p.wordlist.0[*add2_index];
- assert_eq!(main.len(), 2);
- assert_eq!(main[0], OpCode::Num(3));
- assert_eq!(main[1], OpCode::Call(*add2_index));
- assert_eq!(add2.len(), 3);
- assert_eq!(add2[0], OpCode::Num(2));
- assert_eq!(add2[1], OpCode::Add);
- assert_eq!(add2[2], OpCode::Ret);
- }
-
- #[test]
- fn if_then() {
- let p = parser_for("if 1 2 + then\n");
- eprintwordlist(&p.wordlist);
- assert_eq!(p.wordlist.0.len(), 2);
- let main = &p.wordlist.0[0];
- assert_eq!(main.len(), 1);
- assert_eq!(main[0], OpCode::If(1, None));
- let tr = &p.wordlist.0[1];
- assert_eq!(tr.len(), 4);
- assert_eq!(tr[0], OpCode::Num(1));
- assert_eq!(tr[1], OpCode::Num(2));
- assert_eq!(tr[2], OpCode::Add);
- assert_eq!(tr[3], OpCode::Ret);
- }
-
- #[test]
- fn if_else_then() {
- let p = parser_for("if 1 2 + else 1 2 - then\n");
- eprintwordlist(&p.wordlist);
- assert_eq!(p.wordlist.0.len(), 3, "wordlist length");
- let main = &p.wordlist.0[0];
- assert_eq!(main.len(), 1);
- assert_eq!(main[0], OpCode::If(1, Some(2)));
- let tr = &p.wordlist.0[1];
- assert_eq!(tr.len(), 4);
- assert_eq!(tr[0], OpCode::Num(1));
- assert_eq!(tr[1], OpCode::Num(2));
- assert_eq!(tr[2], OpCode::Add);
- assert_eq!(tr[3], OpCode::Ret);
- let fl = &p.wordlist.0[2];
- assert_eq!(fl.len(), 4);
- assert_eq!(fl[0], OpCode::Num(1));
- assert_eq!(fl[1], OpCode::Num(2));
- assert_eq!(fl[2], OpCode::Sub);
- assert_eq!(fl[3], OpCode::Ret);
- }
-
- #[test]
- fn tail_call() {
- let p = parser_for(": first + ; : second 3 first ; 1 second\n");
- assert_eq!(p.wordlist.0.len(), 3, "wordlist length");
- eprintwordlist(&p.wordlist);
- }
-
- #[test]
- fn tail_if() {
- let p = parser_for(": t if 1 2 + then ; t\n");
- eprintwordlist(&p.wordlist);
- assert_eq!(p.wordlist.0.len(), 3);
- let main = &p.wordlist.0[0];
- assert_eq!(main.len(), 1);
- assert_eq!(main[0], OpCode::Call(1));
- let t = &p.wordlist.0[1];
- assert_eq!(t.len(), 2);
- assert_eq!(t[0], OpCode::TIf(2, None));
- assert_eq!(t[1], OpCode::Ret);
- let tr = &p.wordlist.0[2];
- assert_eq!(tr.len(), 4);
- assert_eq!(tr[0], OpCode::Num(1));
- assert_eq!(tr[1], OpCode::Num(2));
- assert_eq!(tr[2], OpCode::Add);
- assert_eq!(tr[3], OpCode::Ret);
- }
-
- #[test]
- fn tail_if_else() {
- let p = parser_for(": t if 1 2 + else 1 2 - then ; t\n");
- eprintwordlist(&p.wordlist);
- assert_eq!(p.wordlist.0.len(), 4, "wordlist length");
- let main = &p.wordlist.0[0];
- assert_eq!(main.len(), 1);
- assert_eq!(main[0], OpCode::Call(1));
- let t = &p.wordlist.0[1];
- assert_eq!(t.len(), 2);
- assert_eq!(t[0], OpCode::TIf(2, Some(3)));
- assert_eq!(t[1], OpCode::Ret);
- let tr = &p.wordlist.0[2];
- assert_eq!(tr.len(), 4);
- assert_eq!(tr[0], OpCode::Num(1));
- assert_eq!(tr[1], OpCode::Num(2));
- assert_eq!(tr[2], OpCode::Add);
- assert_eq!(tr[3], OpCode::Ret);
- let fl = &p.wordlist.0[3];
- assert_eq!(fl.len(), 4);
- assert_eq!(fl[0], OpCode::Num(1));
- assert_eq!(fl[1], OpCode::Num(2));
- assert_eq!(fl[2], OpCode::Sub);
- assert_eq!(fl[3], OpCode::Ret);
- }
-
- #[test]
- fn recursion() {
- let p = parser_for(": foo foo ; foo\n");
- eprintwordlist(&p.wordlist);
- assert_eq!(p.wordlist.0.len(), 2);
- let main = &p.wordlist.0[0];
- assert_eq!(main.len(), 1);
- assert_eq!(main[0], OpCode::Call(1));
- let foo = &p.wordlist.0[1];
- assert_eq!(foo.len(), 1);
- assert_eq!(foo[0], OpCode::TCall(1));
- }
-}