summaryrefslogtreecommitdiffstats
path: root/src/forth/parser.rs
diff options
context:
space:
mode:
authorBrian Cully <bjc@spork.org>2025-08-21 11:08:27 -0400
committerBrian Cully <bjc@spork.org>2025-08-21 12:33:12 -0400
commitc4b52f5f0770d3a83d67815b18e3cd76b01f0258 (patch)
tree3dc982c60e7c1103af472ebdb515c358b51152e4 /src/forth/parser.rs
parentbd33129b72372512a4b8a570c101da01e8877fff (diff)
downloadautomathon-c4b52f5f0770d3a83d67815b18e3cd76b01f0258.tar.gz
automathon-c4b52f5f0770d3a83d67815b18e3cd76b01f0258.zip
add tail call elimination for call and if.
Diffstat (limited to 'src/forth/parser.rs')
-rw-r--r--src/forth/parser.rs245
1 files changed, 212 insertions, 33 deletions
diff --git a/src/forth/parser.rs b/src/forth/parser.rs
index 9e444c9..ddc48a9 100644
--- a/src/forth/parser.rs
+++ b/src/forth/parser.rs
@@ -7,7 +7,8 @@ use std::str::Chars;
#[derive(Debug)]
pub enum ParseError {
EOF,
- NameStackEmpty,
+ DefStackEmpty,
+ MissingIf,
MissingQuote,
UnknownWord(String),
}
@@ -15,7 +16,8 @@ impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EOF => write!(f, "premature end-of-file"),
- Self::NameStackEmpty => write!(f, "name stack empty"),
+ Self::DefStackEmpty => write!(f, "def stack empty"),
+ Self::MissingIf => write!(f, "missing if"),
Self::MissingQuote => write!(f, "missing ending quote"),
Self::UnknownWord(word) => write!(f, "unknown word: {}", word),
}
@@ -26,56 +28,73 @@ impl std::error::Error for ParseError {}
type ParseResult<T> = Result<T, ParseError>;
#[derive(Debug)]
+pub enum IfClauses {
+ True(usize),
+ TrueFalse(usize, usize),
+}
+
+#[derive(Debug)]
pub struct Parser<'a> {
text: &'a str,
enumerator: Enumerate<Chars<'a>>,
+ // list of all word definitions, in the order defined. the main
+ // routine is always in the first entry.
wordlist: WordList,
+ // catalog of word to word index in `wordlist`
wordalog: WordCatalog<'a>,
- namestack: Vec<&'a str>,
+ // holds a stack of indices into `wordlist` that are currently
+ // being defined, with the top of stack being the most recent
+ // definition.
+ defstack: Vec<usize>,
+ // number of clauses currently being defined for a particular ‘if’
+ // construct. a stack is needed in order to allow for nesting.
+ if_stack: Vec<IfClauses>,
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
- let enumerator = text.chars().enumerate();
let mut wl = vec![];
// main routine is always the first entry.
wl.push(ByteCode(vec![]));
Self {
text,
- enumerator,
+ enumerator: text.chars().enumerate(),
wordlist: WordList(wl),
wordalog: WordCatalog(HashMap::new()),
- namestack: vec![],
+ defstack: vec![],
+ if_stack: vec![],
}
}
// pull the next, whitespace-delimited word off the input stream.
fn next_word(&mut self) -> Option<(&'a str, usize, usize)> {
- let mut start = 0;
- let chars =
+ let (start, _c) =
self.enumerator.by_ref()
- .skip_while(|(i, c)| {
- start = *i;
- return c.is_whitespace()
- });
- for (i, c) in chars {
- if c.is_whitespace() {
- let end = i;
- let word = self.text.get(start..end).unwrap();
- return Some((word, start, end))
- }
- }
- None
+ .find(|(_i, c)| !c.is_whitespace())?;
+ let (end, _c) =
+ self.enumerator.by_ref()
+ .find(|(_i, c)| c.is_whitespace())?;
+ let word = self.text.get(start..end).unwrap();
+ Some((word, start, end))
+ }
+
+ // currently ‘active’ bytecode being built.
+ fn bc_mut(&mut self) -> &mut ByteCode {
+ let word_index = self.defstack.last().unwrap_or(&0);
+ &mut self.wordlist.0[*word_index]
+ }
+
+ fn bc(&self) -> &ByteCode {
+ let word_index = self.defstack.last().unwrap_or(&0);
+ &self.wordlist.0[*word_index]
}
// push `op` onto the currently building bytecode, as determined
// by the top of the `namestack`.
fn bc_push(&mut self, op: OpCode) -> ParseResult<()> {
- let word_index = match self.namestack.last() {
- None => &0,
- Some(name) => self.wordalog.0.get(name).ok_or(ParseError::NameStackEmpty)?,
- };
- self.wordlist.0[*word_index].0.push(op);
+ // let word_index = self.defstack.last().unwrap_or(&0);
+ // self.wordlist.0[*word_index].0.push(op);
+ self.bc_mut().0.push(op);
Ok(())
}
@@ -96,13 +115,69 @@ impl<'a> Parser<'a> {
},
":" => {
let (name, _, _) = self.next_word().ok_or(ParseError::EOF)?;
- self.namestack.push(name);
self.wordalog.0.insert(name, self.wordlist.0.len());
+ self.defstack.push(self.wordlist.0.len());
self.wordlist.0.push(ByteCode(vec![]));
},
";" => {
+ match self.bc().0.last() {
+ Some(OpCode::Call(i)) => {
+ let k = *i;
+ self.bc_mut().0.pop();
+ self.bc_mut().0.push(OpCode::TCall(k));
+ },
+ Some(OpCode::If(t, f)) => {
+ let t = *t;
+ let f = *f;
+ self.bc_mut().0.pop();
+ self.bc_mut().0.push(OpCode::TIf(t, f));
+ // technically only needed if ‘f’ is None, but whatever.
+ self.bc_mut().0.push(OpCode::Ret);
+ },
+ _ => self.bc_push(OpCode::Ret)?,
+ }
+ self.defstack.pop().ok_or(ParseError::DefStackEmpty)?;
+ },
+ "if" => {
+ eprintln!("got if, if_stack: {:?}", self.if_stack);
+ let i = self.wordlist.0.len();
+ self.wordlist.0.push(ByteCode(vec![]));
+ self.defstack.push(i);
+
+ self.if_stack.push(IfClauses::True(i));
+ },
+ "else" => {
+ eprintln!("got else, if_stack: {:?}", self.if_stack);
self.bc_push(OpCode::Ret)?;
- self.namestack.pop();
+ self.defstack.pop();
+
+ let i = self.wordlist.0.len();
+ self.wordlist.0.push(ByteCode(vec![]));
+ self.defstack.push(i);
+
+ let true_clause = match self.if_stack.pop() {
+ None => return Err(ParseError::MissingIf),
+ Some(IfClauses::TrueFalse(_, _)) => return Err(ParseError::MissingIf),
+ Some(IfClauses::True(cl)) => cl,
+ };
+ self.if_stack.push(IfClauses::TrueFalse(true_clause, i));
+ },
+ "then" => {
+ eprintln!("got then, if_stack: {:?}", self.if_stack);
+ self.bc_push(OpCode::Ret)?;
+ self.defstack.pop();
+
+ match self.if_stack.pop() {
+ None => return Err(ParseError::MissingIf),
+ Some(IfClauses::True(true_clause)) => {
+ eprintln!("single clause true at {}", true_clause);
+ self.bc_push(OpCode::If(true_clause, None))?;
+ },
+ Some(IfClauses::TrueFalse(true_clause, false_clause)) => {
+ eprintln!("two clause true at {} and {}", true_clause, false_clause);
+ self.bc_push(OpCode::If(true_clause, Some(false_clause)))?;
+ }
+ }
},
"+" => self.bc_push(OpCode::Add)?,
"-" => self.bc_push(OpCode::Sub)?,
@@ -125,6 +200,12 @@ mod tests {
p
}
+ fn eprintwordlist(wordlist: &WordList) {
+ for i in 0..wordlist.0.len() {
+ eprintln!("wordlist[{}]: {:?}", i, wordlist.0[i]);
+ }
+ }
+
#[test]
fn literal_num() {
let p = parser_for("1\n");
@@ -145,7 +226,6 @@ mod tests {
fn add_opcode() {
let p = parser_for("+\n");
let main = &p.wordlist.0[0];
- eprintln!("main {:?}", main);
assert_eq!(main.len(), 1);
assert_eq!(main[0], OpCode::Add);
}
@@ -154,7 +234,6 @@ mod tests {
fn sub_opcode() {
let p = parser_for("-\n");
let main = &p.wordlist.0[0];
- eprintln!("main {:?}", main);
assert_eq!(main.len(), 1);
assert_eq!(main[0], OpCode::Sub);
}
@@ -163,12 +242,8 @@ mod tests {
fn def_word() {
let p = parser_for(": add2 2 + ; 3 add2\n");
let main = &p.wordlist.0[0];
- eprintln!("main {:?}", main);
-
let add2_index = p.wordalog.0.get("add2").expect("add2 has entry in wordlist");
let add2 = &p.wordlist.0[*add2_index];
- eprintln!("add2 {:?}", add2);
-
assert_eq!(main.len(), 2);
assert_eq!(main[0], OpCode::Num(3));
assert_eq!(main[1], OpCode::Call(*add2_index));
@@ -177,4 +252,108 @@ mod tests {
assert_eq!(add2[1], OpCode::Add);
assert_eq!(add2[2], OpCode::Ret);
}
+
+ #[test]
+ fn if_then() {
+ let p = parser_for("if 1 2 + then\n");
+ eprintwordlist(&p.wordlist);
+ assert_eq!(p.wordlist.0.len(), 2);
+ let main = &p.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::If(1, None));
+ let tr = &p.wordlist.0[1];
+ assert_eq!(tr.len(), 4);
+ assert_eq!(tr[0], OpCode::Num(1));
+ assert_eq!(tr[1], OpCode::Num(2));
+ assert_eq!(tr[2], OpCode::Add);
+ assert_eq!(tr[3], OpCode::Ret);
+ }
+
+ #[test]
+ fn if_else_then() {
+ let p = parser_for("if 1 2 + else 1 2 - then\n");
+ eprintwordlist(&p.wordlist);
+ assert_eq!(p.wordlist.0.len(), 3, "wordlist length");
+ let main = &p.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::If(1, Some(2)));
+ let tr = &p.wordlist.0[1];
+ assert_eq!(tr.len(), 4);
+ assert_eq!(tr[0], OpCode::Num(1));
+ assert_eq!(tr[1], OpCode::Num(2));
+ assert_eq!(tr[2], OpCode::Add);
+ assert_eq!(tr[3], OpCode::Ret);
+ let fl = &p.wordlist.0[2];
+ assert_eq!(fl.len(), 4);
+ assert_eq!(fl[0], OpCode::Num(1));
+ assert_eq!(fl[1], OpCode::Num(2));
+ assert_eq!(fl[2], OpCode::Sub);
+ assert_eq!(fl[3], OpCode::Ret);
+ }
+
+ #[test]
+ fn tail_call() {
+ let p = parser_for(": first + ; : second 3 first ; 1 second\n");
+ assert_eq!(p.wordlist.0.len(), 3, "wordlist length");
+ eprintwordlist(&p.wordlist);
+ }
+
+ #[test]
+ fn tail_if() {
+ let p = parser_for(": t if 1 2 + then ; t\n");
+ eprintwordlist(&p.wordlist);
+ assert_eq!(p.wordlist.0.len(), 3);
+ let main = &p.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::Call(1));
+ let t = &p.wordlist.0[1];
+ assert_eq!(t.len(), 2);
+ assert_eq!(t[0], OpCode::TIf(2, None));
+ assert_eq!(t[1], OpCode::Ret);
+ let tr = &p.wordlist.0[2];
+ assert_eq!(tr.len(), 4);
+ assert_eq!(tr[0], OpCode::Num(1));
+ assert_eq!(tr[1], OpCode::Num(2));
+ assert_eq!(tr[2], OpCode::Add);
+ assert_eq!(tr[3], OpCode::Ret);
+ }
+
+ #[test]
+ fn tail_if_else() {
+ let p = parser_for(": t if 1 2 + else 1 2 - then ; t\n");
+ eprintwordlist(&p.wordlist);
+ assert_eq!(p.wordlist.0.len(), 4, "wordlist length");
+ let main = &p.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::Call(1));
+ let t = &p.wordlist.0[1];
+ assert_eq!(t.len(), 2);
+ assert_eq!(t[0], OpCode::TIf(2, Some(3)));
+ assert_eq!(t[1], OpCode::Ret);
+ let tr = &p.wordlist.0[2];
+ assert_eq!(tr.len(), 4);
+ assert_eq!(tr[0], OpCode::Num(1));
+ assert_eq!(tr[1], OpCode::Num(2));
+ assert_eq!(tr[2], OpCode::Add);
+ assert_eq!(tr[3], OpCode::Ret);
+ let fl = &p.wordlist.0[3];
+ assert_eq!(fl.len(), 4);
+ assert_eq!(fl[0], OpCode::Num(1));
+ assert_eq!(fl[1], OpCode::Num(2));
+ assert_eq!(fl[2], OpCode::Sub);
+ assert_eq!(fl[3], OpCode::Ret);
+ }
+
+ #[test]
+ fn recursion() {
+ let p = parser_for(": foo foo ; foo\n");
+ eprintwordlist(&p.wordlist);
+ assert_eq!(p.wordlist.0.len(), 2);
+ let main = &p.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::Call(1));
+ let foo = &p.wordlist.0[1];
+ assert_eq!(foo.len(), 1);
+ assert_eq!(foo[0], OpCode::TCall(1));
+ }
}