diff options
Diffstat (limited to 'src/forth/parser.rs')
| -rw-r--r-- | src/forth/parser.rs | 384 |
1 files changed, 0 insertions, 384 deletions
diff --git a/src/forth/parser.rs b/src/forth/parser.rs deleted file mode 100644 index c934c4a..0000000 --- a/src/forth/parser.rs +++ /dev/null @@ -1,384 +0,0 @@ -use super::interp::{ByteCode, OpCode, WordList}; - -use std::collections::HashMap; -use std::iter::{Enumerate, Iterator}; -use std::str::Chars; - -#[derive(Debug)] -pub enum ParseError { - EOF, - DefStackEmpty, - MissingIf, - MissingQuote, - UnknownWord(String), -} -impl std::fmt::Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::EOF => write!(f, "premature end-of-file"), - Self::DefStackEmpty => write!(f, "def stack empty"), - Self::MissingIf => write!(f, "missing if"), - Self::MissingQuote => write!(f, "missing ending quote"), - Self::UnknownWord(word) => write!(f, "unknown word: {}", word), - } - } -} -impl std::error::Error for ParseError {} - -type ParseResult<T> = Result<T, ParseError>; - -// todo: the annotations should be directly tied to the wordlist so -// they can't get out of sync. -#[derive(Debug)] -pub struct Annotation { - // (start, end) index in program text - pub loc: (usize, usize), -} - -#[derive(Debug)] -pub struct WordCatalog<'a>(pub(super) HashMap<&'a str, usize>); - -#[derive(Debug)] -pub enum IfClauses { - True(usize), - TrueFalse(usize, usize), -} - -#[derive(Debug)] -pub struct Parser<'a> { - text: &'a str, - enumerator: Enumerate<Chars<'a>>, - // list of all word definitions, in the order defined. the main - // routine is always in the first entry. - // todo: don't be pub, have a method to extract a wordlist - pub wordlist: WordList, - pub annotations: Vec<Vec<Annotation>>, - // catalog of word to word index in `wordlist` - pub wordalog: WordCatalog<'a>, - // holds a stack of indices into `wordlist` that are currently - // being defined, with the top of stack being the most recent - // definition. - defstack: Vec<usize>, - // number of clauses currently being defined for a particular ‘if’ - // construct. a stack is needed in order to allow for nesting. - if_stack: Vec<(IfClauses, Annotation)>, -} - -impl<'a> Parser<'a> { - pub fn new(text: &'a str) -> Self { - let mut wl = vec![]; - // main routine is always the first entry. - wl.push(ByteCode(vec![])); - Self { - text, - enumerator: text.chars().enumerate(), - wordlist: WordList(wl), - annotations: vec![vec![]], - wordalog: WordCatalog(HashMap::new()), - defstack: vec![], - if_stack: vec![], - } - } - - // pull the next, whitespace-delimited word off the input stream. - fn next_word(&mut self) -> Option<(&'a str, usize, usize)> { - let (start, _c) = - self.enumerator.by_ref() - .find(|(_i, c)| !c.is_whitespace())?; - let (end, _c) = - self.enumerator.by_ref() - .find(|(_i, c)| c.is_whitespace())?; - let word = &self.text[start..end]; - Some((word, start, end)) - } - - // currently ‘active’ bytecode being built. - fn bc_mut(&mut self) -> &mut ByteCode { - let word_index = self.defstack.last().unwrap_or(&0); - &mut self.wordlist.0[*word_index] - } - - fn bc(&self) -> &ByteCode { - let word_index = self.defstack.last().unwrap_or(&0); - &self.wordlist.0[*word_index] - } - - fn anno_mut(&mut self) -> &mut Vec<Annotation> { - let word_index = self.defstack.last().unwrap_or(&0); - &mut self.annotations[*word_index] - } - - // push `op` onto the currently building bytecode, as determined - // by the top of the `namestack`. - fn bc_push(&mut self, op: OpCode, anno: Annotation) { - self.bc_mut().0.push(op); - self.anno_mut().push(anno); - } - - pub fn parse(&mut self) -> ParseResult<()> { - while let Some((word, start, end)) = self.next_word() { - let anno = Annotation { loc: (start, end) }; - if let Ok(i) = word.parse::<i32>() { - self.bc_push(OpCode::Num(i), anno); - } else if let Some(i) = self.wordalog.0.get(word) { - self.bc_push(OpCode::Call(*i), anno); - } else { - match word { - r#"s""# => { - let (s_end, _) = - self.enumerator - .find(|(_i, c)| return *c == '"') - .ok_or(ParseError::MissingQuote)?; - self.bc_push(OpCode::Str(end+1, s_end), anno); - }, - ":" => { - let (name, _, _) = self.next_word().ok_or(ParseError::EOF)?; - self.wordalog.0.insert(name, self.wordlist.0.len()); - self.defstack.push(self.wordlist.0.len()); - self.wordlist.0.push(ByteCode(vec![])); - self.annotations.push(vec![]); - }, - ";" => { - match self.bc().0.last() { - Some(OpCode::Call(i)) => { - let k = *i; - self.bc_mut().0.pop(); - self.bc_mut().0.push(OpCode::TCall(k)); - }, - Some(OpCode::If(t, f)) => { - let t = *t; - let f = *f; - self.bc_mut().0.pop(); - self.bc_mut().0.push(OpCode::TIf(t, f)); - // technically only needed if ‘f’ is None, but whatever. - self.bc_push(OpCode::Ret, anno); - }, - _ => self.bc_push(OpCode::Ret, anno), - } - self.defstack.pop().ok_or(ParseError::DefStackEmpty)?; - }, - "if" => { - let i = self.wordlist.0.len(); - self.wordlist.0.push(ByteCode(vec![])); - self.annotations.push(vec![]); - self.defstack.push(i); - - self.if_stack.push((IfClauses::True(i), anno)); - }, - "else" => { - self.bc_push(OpCode::Ret, anno); - self.defstack.pop(); - - let i = self.wordlist.0.len(); - self.wordlist.0.push(ByteCode(vec![])); - self.annotations.push(vec![]); - self.defstack.push(i); - - let (true_clause, anno) = match self.if_stack.pop() { - None => return Err(ParseError::MissingIf), - Some((IfClauses::TrueFalse(_, _), _)) => return Err(ParseError::MissingIf), - Some((IfClauses::True(cl), anno)) => (cl, anno), - }; - self.if_stack.push((IfClauses::TrueFalse(true_clause, i), anno)); - }, - "then" => { - self.bc_push(OpCode::Ret, anno); - self.defstack.pop(); - - match self.if_stack.pop() { - None => return Err(ParseError::MissingIf), - Some((IfClauses::True(true_clause), anno)) => { - self.bc_push(OpCode::If(true_clause, None), anno); - }, - Some((IfClauses::TrueFalse(true_clause, false_clause), anno)) => { - self.bc_push(OpCode::If(true_clause, Some(false_clause)), anno); - } - } - }, - "+" => self.bc_push(OpCode::Add, anno), - "-" => self.bc_push(OpCode::Sub, anno), - "*" => self.bc_push(OpCode::Mul, anno), - "/" => self.bc_push(OpCode::Div, anno), - "dup" => self.bc_push(OpCode::Dup, anno), - "drop" => self.bc_push(OpCode::Drop, anno), - "=" => self.bc_push(OpCode::EQ, anno), - ">" => self.bc_push(OpCode::GT, anno), - ">=" => self.bc_push(OpCode::GTE, anno), - "<" => self.bc_push(OpCode::LT, anno), - "<=" => self.bc_push(OpCode::LTE, anno), - other => return Err(ParseError::UnknownWord(String::from(other))), - } - } - } - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use super::super::interp::OpCode; - - fn parser_for(text: &str) -> Parser { - let mut p = Parser::new(text); - p.parse().expect("badparse"); - p - } - - fn eprintwordlist(wordlist: &WordList) { - for i in 0..wordlist.0.len() { - eprintln!("wordlist[{}]: {:?}", i, wordlist.0[i]); - } - } - - #[test] - fn literal_num() { - let p = parser_for("1\n"); - let main = &p.wordlist.0[0]; - assert_eq!(main.len(), 1); - assert_eq!(main[0], OpCode::Num(1)); - } - - #[test] - fn literal_string() { - let p = parser_for(r#"s" hello there""#); - let main = &p.wordlist.0[0]; - assert_eq!(main.len(), 1); - assert_eq!(main[0], OpCode::Str(3, 14)); - } - - #[test] - fn add_opcode() { - let p = parser_for("+\n"); - let main = &p.wordlist.0[0]; - assert_eq!(main.len(), 1); - assert_eq!(main[0], OpCode::Add); - } - - #[test] - fn sub_opcode() { - let p = parser_for("-\n"); - let main = &p.wordlist.0[0]; - assert_eq!(main.len(), 1); - assert_eq!(main[0], OpCode::Sub); - } - - #[test] - fn def_word() { - let p = parser_for(": add2 2 + ; 3 add2\n"); - let main = &p.wordlist.0[0]; - let add2_index = p.wordalog.0.get("add2").expect("add2 has entry in wordlist"); - let add2 = &p.wordlist.0[*add2_index]; - assert_eq!(main.len(), 2); - assert_eq!(main[0], OpCode::Num(3)); - assert_eq!(main[1], OpCode::Call(*add2_index)); - assert_eq!(add2.len(), 3); - assert_eq!(add2[0], OpCode::Num(2)); - assert_eq!(add2[1], OpCode::Add); - assert_eq!(add2[2], OpCode::Ret); - } - - #[test] - fn if_then() { - let p = parser_for("if 1 2 + then\n"); - eprintwordlist(&p.wordlist); - assert_eq!(p.wordlist.0.len(), 2); - let main = &p.wordlist.0[0]; - assert_eq!(main.len(), 1); - assert_eq!(main[0], OpCode::If(1, None)); - let tr = &p.wordlist.0[1]; - assert_eq!(tr.len(), 4); - assert_eq!(tr[0], OpCode::Num(1)); - assert_eq!(tr[1], OpCode::Num(2)); - assert_eq!(tr[2], OpCode::Add); - assert_eq!(tr[3], OpCode::Ret); - } - - #[test] - fn if_else_then() { - let p = parser_for("if 1 2 + else 1 2 - then\n"); - eprintwordlist(&p.wordlist); - assert_eq!(p.wordlist.0.len(), 3, "wordlist length"); - let main = &p.wordlist.0[0]; - assert_eq!(main.len(), 1); - assert_eq!(main[0], OpCode::If(1, Some(2))); - let tr = &p.wordlist.0[1]; - assert_eq!(tr.len(), 4); - assert_eq!(tr[0], OpCode::Num(1)); - assert_eq!(tr[1], OpCode::Num(2)); - assert_eq!(tr[2], OpCode::Add); - assert_eq!(tr[3], OpCode::Ret); - let fl = &p.wordlist.0[2]; - assert_eq!(fl.len(), 4); - assert_eq!(fl[0], OpCode::Num(1)); - assert_eq!(fl[1], OpCode::Num(2)); - assert_eq!(fl[2], OpCode::Sub); - assert_eq!(fl[3], OpCode::Ret); - } - - #[test] - fn tail_call() { - let p = parser_for(": first + ; : second 3 first ; 1 second\n"); - assert_eq!(p.wordlist.0.len(), 3, "wordlist length"); - eprintwordlist(&p.wordlist); - } - - #[test] - fn tail_if() { - let p = parser_for(": t if 1 2 + then ; t\n"); - eprintwordlist(&p.wordlist); - assert_eq!(p.wordlist.0.len(), 3); - let main = &p.wordlist.0[0]; - assert_eq!(main.len(), 1); - assert_eq!(main[0], OpCode::Call(1)); - let t = &p.wordlist.0[1]; - assert_eq!(t.len(), 2); - assert_eq!(t[0], OpCode::TIf(2, None)); - assert_eq!(t[1], OpCode::Ret); - let tr = &p.wordlist.0[2]; - assert_eq!(tr.len(), 4); - assert_eq!(tr[0], OpCode::Num(1)); - assert_eq!(tr[1], OpCode::Num(2)); - assert_eq!(tr[2], OpCode::Add); - assert_eq!(tr[3], OpCode::Ret); - } - - #[test] - fn tail_if_else() { - let p = parser_for(": t if 1 2 + else 1 2 - then ; t\n"); - eprintwordlist(&p.wordlist); - assert_eq!(p.wordlist.0.len(), 4, "wordlist length"); - let main = &p.wordlist.0[0]; - assert_eq!(main.len(), 1); - assert_eq!(main[0], OpCode::Call(1)); - let t = &p.wordlist.0[1]; - assert_eq!(t.len(), 2); - assert_eq!(t[0], OpCode::TIf(2, Some(3))); - assert_eq!(t[1], OpCode::Ret); - let tr = &p.wordlist.0[2]; - assert_eq!(tr.len(), 4); - assert_eq!(tr[0], OpCode::Num(1)); - assert_eq!(tr[1], OpCode::Num(2)); - assert_eq!(tr[2], OpCode::Add); - assert_eq!(tr[3], OpCode::Ret); - let fl = &p.wordlist.0[3]; - assert_eq!(fl.len(), 4); - assert_eq!(fl[0], OpCode::Num(1)); - assert_eq!(fl[1], OpCode::Num(2)); - assert_eq!(fl[2], OpCode::Sub); - assert_eq!(fl[3], OpCode::Ret); - } - - #[test] - fn recursion() { - let p = parser_for(": foo foo ; foo\n"); - eprintwordlist(&p.wordlist); - assert_eq!(p.wordlist.0.len(), 2); - let main = &p.wordlist.0[0]; - assert_eq!(main.len(), 1); - assert_eq!(main[0], OpCode::Call(1)); - let foo = &p.wordlist.0[1]; - assert_eq!(foo.len(), 1); - assert_eq!(foo[0], OpCode::TCall(1)); - } -} |
