use super::interp::{ByteCode, OpCode, WordList}; use log::debug; use std::collections::HashMap; use std::iter::{Enumerate, Iterator}; use std::str::Chars; #[derive(Debug)] pub enum ParseError { EOF, DefStackEmpty, MissingIf, MissingQuote, UnknownWord(String), } impl std::fmt::Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::EOF => write!(f, "premature end-of-file"), Self::DefStackEmpty => write!(f, "def stack empty"), Self::MissingIf => write!(f, "missing if"), Self::MissingQuote => write!(f, "missing ending quote"), Self::UnknownWord(word) => write!(f, "unknown word: {}", word), } } } impl std::error::Error for ParseError {} type ParseResult = Result; #[derive(Debug)] pub struct WordCatalog<'a>(pub(super) HashMap<&'a str, usize>); #[derive(Debug)] pub enum IfClauses { True(usize), TrueFalse(usize, usize), } #[derive(Debug)] pub struct Parser<'a> { text: &'a str, enumerator: Enumerate>, // list of all word definitions, in the order defined. the main // routine is always in the first entry. // todo: don't be pub, have a method to extract a wordlist pub wordlist: WordList, // catalog of word to word index in `wordlist` pub wordalog: WordCatalog<'a>, // holds a stack of indices into `wordlist` that are currently // being defined, with the top of stack being the most recent // definition. defstack: Vec, // number of clauses currently being defined for a particular ‘if’ // construct. a stack is needed in order to allow for nesting. if_stack: Vec, } impl<'a> Parser<'a> { pub fn new(text: &'a str) -> Self { let mut wl = vec![]; // main routine is always the first entry. wl.push(ByteCode(vec![])); Self { text, enumerator: text.chars().enumerate(), wordlist: WordList(wl), wordalog: WordCatalog(HashMap::new()), defstack: vec![], if_stack: vec![], } } // pull the next, whitespace-delimited word off the input stream. fn next_word(&mut self) -> Option<(&'a str, usize, usize)> { let (start, _c) = self.enumerator.by_ref() .find(|(_i, c)| !c.is_whitespace())?; let (end, _c) = self.enumerator.by_ref() .find(|(_i, c)| c.is_whitespace())?; let word = &self.text[start..end]; debug!("Parser::next_word → ‘{}’ ({} → {})", word, start, end); Some((word, start, end)) } // currently ‘active’ bytecode being built. fn bc_mut(&mut self) -> &mut ByteCode { let word_index = self.defstack.last().unwrap_or(&0); &mut self.wordlist.0[*word_index] } fn bc(&self) -> &ByteCode { let word_index = self.defstack.last().unwrap_or(&0); &self.wordlist.0[*word_index] } // push `op` onto the currently building bytecode, as determined // by the top of the `namestack`. fn bc_push(&mut self, op: OpCode) -> ParseResult<()> { // let word_index = self.defstack.last().unwrap_or(&0); // self.wordlist.0[*word_index].0.push(op); self.bc_mut().0.push(op); Ok(()) } pub fn parse(&mut self) -> ParseResult<()> { while let Some((word, _start, end)) = self.next_word() { if let Ok(i) = word.parse::() { self.bc_push(OpCode::Num(i))?; } else if let Some(i) = self.wordalog.0.get(word) { self.bc_push(OpCode::Call(*i))?; } else { match word { r#"s""# => { let (s_end, _) = self.enumerator .find(|(_i, c)| return *c == '"') .ok_or(ParseError::MissingQuote)?; self.bc_push(OpCode::Str(end+1, s_end))?; }, ":" => { let (name, _, _) = self.next_word().ok_or(ParseError::EOF)?; self.wordalog.0.insert(name, self.wordlist.0.len()); self.defstack.push(self.wordlist.0.len()); self.wordlist.0.push(ByteCode(vec![])); }, ";" => { match self.bc().0.last() { Some(OpCode::Call(i)) => { let k = *i; self.bc_mut().0.pop(); self.bc_mut().0.push(OpCode::TCall(k)); }, Some(OpCode::If(t, f)) => { let t = *t; let f = *f; self.bc_mut().0.pop(); self.bc_mut().0.push(OpCode::TIf(t, f)); // technically only needed if ‘f’ is None, but whatever. self.bc_mut().0.push(OpCode::Ret); }, _ => self.bc_push(OpCode::Ret)?, } self.defstack.pop().ok_or(ParseError::DefStackEmpty)?; }, "if" => { let i = self.wordlist.0.len(); self.wordlist.0.push(ByteCode(vec![])); self.defstack.push(i); self.if_stack.push(IfClauses::True(i)); }, "else" => { self.bc_push(OpCode::Ret)?; self.defstack.pop(); let i = self.wordlist.0.len(); self.wordlist.0.push(ByteCode(vec![])); self.defstack.push(i); let true_clause = match self.if_stack.pop() { None => return Err(ParseError::MissingIf), Some(IfClauses::TrueFalse(_, _)) => return Err(ParseError::MissingIf), Some(IfClauses::True(cl)) => cl, }; self.if_stack.push(IfClauses::TrueFalse(true_clause, i)); }, "then" => { self.bc_push(OpCode::Ret)?; self.defstack.pop(); match self.if_stack.pop() { None => return Err(ParseError::MissingIf), Some(IfClauses::True(true_clause)) => { self.bc_push(OpCode::If(true_clause, None))?; }, Some(IfClauses::TrueFalse(true_clause, false_clause)) => { self.bc_push(OpCode::If(true_clause, Some(false_clause)))?; } } }, "+" => self.bc_push(OpCode::Add)?, "-" => self.bc_push(OpCode::Sub)?, "*" => self.bc_push(OpCode::Mul)?, "/" => self.bc_push(OpCode::Div)?, "dup" => self.bc_push(OpCode::Dup)?, "drop" => self.bc_push(OpCode::Drop)?, "=" => self.bc_push(OpCode::EQ)?, ">" => self.bc_push(OpCode::GT)?, ">=" => self.bc_push(OpCode::GTE)?, "<" => self.bc_push(OpCode::LT)?, "<=" => self.bc_push(OpCode::LTE)?, other => return Err(ParseError::UnknownWord(String::from(other))), } } } Ok(()) } } #[cfg(test)] mod tests { use super::*; use super::super::interp::OpCode; fn parser_for(text: &str) -> Parser { let mut p = Parser::new(text); p.parse().expect("badparse"); p } fn eprintwordlist(wordlist: &WordList) { for i in 0..wordlist.0.len() { eprintln!("wordlist[{}]: {:?}", i, wordlist.0[i]); } } #[test] fn literal_num() { let p = parser_for("1\n"); let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Num(1)); } #[test] fn literal_string() { let p = parser_for(r#"s" hello there""#); let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Str(3, 14)); } #[test] fn add_opcode() { let p = parser_for("+\n"); let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Add); } #[test] fn sub_opcode() { let p = parser_for("-\n"); let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Sub); } #[test] fn def_word() { let p = parser_for(": add2 2 + ; 3 add2\n"); let main = &p.wordlist.0[0]; let add2_index = p.wordalog.0.get("add2").expect("add2 has entry in wordlist"); let add2 = &p.wordlist.0[*add2_index]; assert_eq!(main.len(), 2); assert_eq!(main[0], OpCode::Num(3)); assert_eq!(main[1], OpCode::Call(*add2_index)); assert_eq!(add2.len(), 3); assert_eq!(add2[0], OpCode::Num(2)); assert_eq!(add2[1], OpCode::Add); assert_eq!(add2[2], OpCode::Ret); } #[test] fn if_then() { let p = parser_for("if 1 2 + then\n"); eprintwordlist(&p.wordlist); assert_eq!(p.wordlist.0.len(), 2); let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::If(1, None)); let tr = &p.wordlist.0[1]; assert_eq!(tr.len(), 4); assert_eq!(tr[0], OpCode::Num(1)); assert_eq!(tr[1], OpCode::Num(2)); assert_eq!(tr[2], OpCode::Add); assert_eq!(tr[3], OpCode::Ret); } #[test] fn if_else_then() { let p = parser_for("if 1 2 + else 1 2 - then\n"); eprintwordlist(&p.wordlist); assert_eq!(p.wordlist.0.len(), 3, "wordlist length"); let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::If(1, Some(2))); let tr = &p.wordlist.0[1]; assert_eq!(tr.len(), 4); assert_eq!(tr[0], OpCode::Num(1)); assert_eq!(tr[1], OpCode::Num(2)); assert_eq!(tr[2], OpCode::Add); assert_eq!(tr[3], OpCode::Ret); let fl = &p.wordlist.0[2]; assert_eq!(fl.len(), 4); assert_eq!(fl[0], OpCode::Num(1)); assert_eq!(fl[1], OpCode::Num(2)); assert_eq!(fl[2], OpCode::Sub); assert_eq!(fl[3], OpCode::Ret); } #[test] fn tail_call() { let p = parser_for(": first + ; : second 3 first ; 1 second\n"); assert_eq!(p.wordlist.0.len(), 3, "wordlist length"); eprintwordlist(&p.wordlist); } #[test] fn tail_if() { let p = parser_for(": t if 1 2 + then ; t\n"); eprintwordlist(&p.wordlist); assert_eq!(p.wordlist.0.len(), 3); let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Call(1)); let t = &p.wordlist.0[1]; assert_eq!(t.len(), 2); assert_eq!(t[0], OpCode::TIf(2, None)); assert_eq!(t[1], OpCode::Ret); let tr = &p.wordlist.0[2]; assert_eq!(tr.len(), 4); assert_eq!(tr[0], OpCode::Num(1)); assert_eq!(tr[1], OpCode::Num(2)); assert_eq!(tr[2], OpCode::Add); assert_eq!(tr[3], OpCode::Ret); } #[test] fn tail_if_else() { let p = parser_for(": t if 1 2 + else 1 2 - then ; t\n"); eprintwordlist(&p.wordlist); assert_eq!(p.wordlist.0.len(), 4, "wordlist length"); let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Call(1)); let t = &p.wordlist.0[1]; assert_eq!(t.len(), 2); assert_eq!(t[0], OpCode::TIf(2, Some(3))); assert_eq!(t[1], OpCode::Ret); let tr = &p.wordlist.0[2]; assert_eq!(tr.len(), 4); assert_eq!(tr[0], OpCode::Num(1)); assert_eq!(tr[1], OpCode::Num(2)); assert_eq!(tr[2], OpCode::Add); assert_eq!(tr[3], OpCode::Ret); let fl = &p.wordlist.0[3]; assert_eq!(fl.len(), 4); assert_eq!(fl[0], OpCode::Num(1)); assert_eq!(fl[1], OpCode::Num(2)); assert_eq!(fl[2], OpCode::Sub); assert_eq!(fl[3], OpCode::Ret); } #[test] fn recursion() { let p = parser_for(": foo foo ; foo\n"); eprintwordlist(&p.wordlist); assert_eq!(p.wordlist.0.len(), 2); let main = &p.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Call(1)); let foo = &p.wordlist.0[1]; assert_eq!(foo.len(), 1); assert_eq!(foo[0], OpCode::TCall(1)); } }