use super::interp::{ByteCode, OpCode, WordCatalog}; use std::collections::HashMap; use std::iter::{Enumerate, Iterator}; use std::str::Chars; #[derive(Debug)] pub enum ParseError { EOF, NameStackEmpty, MissingQuote, UnknownWord(String), } impl std::fmt::Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::EOF => write!(f, "premature end-of-file"), Self::NameStackEmpty => write!(f, "name stack empty"), Self::MissingQuote => write!(f, "missing ending quote"), Self::UnknownWord(word) => write!(f, "unknown word: {}", word), } } } impl std::error::Error for ParseError {} type ParseResult = Result; // a list of all bytecode defs, with the main routine at ‘0’. type ParserWordList = Vec; #[derive(Debug)] pub struct Parser<'a> { text: &'a str, enumerator: Enumerate>, wordlist: ParserWordList, wordalog: WordCatalog<'a>, namestack: Vec<&'a str>, } impl<'a> Parser<'a> { pub fn new(text: &'a str) -> Self { let enumerator = text.chars().enumerate(); let mut wordlist = vec![]; // main routine is always the first entry. wordlist.push(ByteCode(vec![])); Self { text, enumerator, wordlist, wordalog: WordCatalog(HashMap::new()), namestack: vec![], } } // pull the next, whitespace-delimited word off the input stream. fn next_word(&mut self) -> Option<(&'a str, usize, usize)> { let mut start = 0; let chars = self.enumerator.by_ref() .skip_while(|(i, c)| { start = *i; return c.is_whitespace() }); for (i, c) in chars { if c.is_whitespace() { let end = i; let word = self.text.get(start..end).unwrap(); return Some((word, start, end)) } } None } // push `op` onto the currently building bytecode, as determined // by the top of the `namestack`. fn bc_push(&mut self, op: OpCode) -> ParseResult<()> { let word_index = match self.namestack.last() { None => &0, Some(name) => self.wordalog.0.get(name).ok_or(ParseError::NameStackEmpty)?, }; self.wordlist[*word_index].0.push(op); Ok(()) } fn parse(&mut self) -> ParseResult<()> { while let Some((word, _start, end)) = self.next_word() { if let Ok(i) = word.parse::() { self.bc_push(OpCode::Num(i))?; } else if let Some(i) = self.wordalog.0.get(word) { self.bc_push(OpCode::WordI(*i))?; } else { match word { r#"s""# => { let (s_end, _) = self.enumerator .find(|(_i, c)| return *c == '"') .ok_or(ParseError::MissingQuote)?; self.bc_push(OpCode::Str(end+1, s_end))?; }, ":" => { let (name, _, _) = self.next_word().ok_or(ParseError::EOF)?; self.namestack.push(name); self.wordalog.0.insert(name, self.wordlist.len()); self.wordlist.push(ByteCode(vec![])); }, ";" => { self.bc_push(OpCode::Ret)?; self.namestack.pop(); }, "+" => self.bc_push(OpCode::Add)?, "-" => self.bc_push(OpCode::Sub)?, other => return Err(ParseError::UnknownWord(String::from(other))), } } } Ok(()) } } #[cfg(test)] mod tests { use super::*; use super::super::interp::OpCode; fn parser_for(text: &str) -> Parser { let mut p = Parser::new(text); p.parse().expect("badparse"); p } #[test] fn literal_num() { let p = parser_for("1\n"); let main = &p.wordlist[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Num(1)); } #[test] fn literal_string() { let p = parser_for(r#"s" hello there""#); let main = &p.wordlist[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Str(3, 14)); } #[test] fn add_opcode() { let p = parser_for("+\n"); let main = &p.wordlist[0]; eprintln!("main {:?}", main); assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Add); } #[test] fn sub_opcode() { let p = parser_for("-\n"); let main = &p.wordlist[0]; eprintln!("main {:?}", main); assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::Sub); } #[test] fn def_word() { let p = parser_for(": add2 2 + ; 3 add2\n"); let main = &p.wordlist[0]; eprintln!("main {:?}", main); let add2_index = p.wordalog.0.get("add2").expect("add2 has entry in wordlist"); let add2 = &p.wordlist[*add2_index]; eprintln!("add2 {:?}", add2); assert_eq!(main.len(), 2); assert_eq!(main[0], OpCode::Num(3)); assert_eq!(main[1], OpCode::WordI(*add2_index)); assert_eq!(add2.len(), 3); assert_eq!(add2[0], OpCode::Num(2)); assert_eq!(add2[1], OpCode::Add); assert_eq!(add2[2], OpCode::Ret); } }