diff options
Diffstat (limited to 'src/forth/parser.rs')
| -rw-r--r-- | src/forth/parser.rs | 183 |
1 files changed, 183 insertions, 0 deletions
diff --git a/src/forth/parser.rs b/src/forth/parser.rs new file mode 100644 index 0000000..26a5686 --- /dev/null +++ b/src/forth/parser.rs @@ -0,0 +1,183 @@ +use super::interp::{ByteCode, OpCode, WordCatalog}; + +use std::collections::HashMap; +use std::iter::{Enumerate, Iterator}; +use std::str::Chars; + +#[derive(Debug)] +pub enum ParseError { + EOF, + NameStackEmpty, + MissingQuote, + UnknownWord(String), +} +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::EOF => write!(f, "premature end-of-file"), + Self::NameStackEmpty => write!(f, "name stack empty"), + Self::MissingQuote => write!(f, "missing ending quote"), + Self::UnknownWord(word) => write!(f, "unknown word: {}", word), + } + } +} +impl std::error::Error for ParseError {} + +type ParseResult<T> = Result<T, ParseError>; + +// a list of all bytecode defs, with the main routine at ‘0’. +type ParserWordList = Vec<ByteCode>; + +#[derive(Debug)] +pub struct Parser<'a> { + text: &'a str, + enumerator: Enumerate<Chars<'a>>, + wordlist: ParserWordList, + wordalog: WordCatalog<'a>, + namestack: Vec<&'a str>, +} + +impl<'a> Parser<'a> { + pub fn new(text: &'a str) -> Self { + let enumerator = text.chars().enumerate(); + let mut wordlist = vec![]; + // main routine is always the first entry. + wordlist.push(ByteCode(vec![])); + Self { + text, + enumerator, + wordlist, + wordalog: WordCatalog(HashMap::new()), + namestack: vec![], + } + } + + // pull the next, whitespace-delimited word off the input stream. + fn next_word(&mut self) -> Option<(&'a str, usize, usize)> { + let mut start = 0; + let chars = + self.enumerator.by_ref() + .skip_while(|(i, c)| { + start = *i; + return c.is_whitespace() + }); + for (i, c) in chars { + if c.is_whitespace() { + let end = i; + let word = self.text.get(start..end).unwrap(); + return Some((word, start, end)) + } + } + None + } + + // push `op` onto the currently building bytecode, as determined + // by the top of the `namestack`. + fn bc_push(&mut self, op: OpCode) -> ParseResult<()> { + let word_index = match self.namestack.last() { + None => &0, + Some(name) => self.wordalog.0.get(name).ok_or(ParseError::NameStackEmpty)?, + }; + self.wordlist[*word_index].0.push(op); + Ok(()) + } + + fn parse(&mut self) -> ParseResult<()> { + while let Some((word, _start, end)) = self.next_word() { + if let Ok(i) = word.parse::<i32>() { + self.bc_push(OpCode::Num(i))?; + } else if let Some(i) = self.wordalog.0.get(word) { + self.bc_push(OpCode::WordI(*i))?; + } else { + match word { + r#"s""# => { + let (s_end, _) = + self.enumerator + .find(|(_i, c)| return *c == '"') + .ok_or(ParseError::MissingQuote)?; + self.bc_push(OpCode::Str(end+1, s_end))?; + }, + ":" => { + let (name, _, _) = self.next_word().ok_or(ParseError::EOF)?; + self.namestack.push(name); + self.wordalog.0.insert(name, self.wordlist.len()); + self.wordlist.push(ByteCode(vec![])); + }, + ";" => { + self.bc_push(OpCode::Ret)?; + self.namestack.pop(); + }, + "+" => self.bc_push(OpCode::Add)?, + "-" => self.bc_push(OpCode::Sub)?, + other => return Err(ParseError::UnknownWord(String::from(other))), + } + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::super::interp::OpCode; + + fn parser_for(text: &str) -> Parser { + let mut p = Parser::new(text); + p.parse().expect("badparse"); + p + } + + #[test] + fn literal_num() { + let p = parser_for("1\n"); + let main = &p.wordlist[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Num(1)); + } + + #[test] + fn literal_string() { + let p = parser_for(r#"s" hello there""#); + let main = &p.wordlist[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Str(3, 14)); + } + + #[test] + fn add_opcode() { + let p = parser_for("+\n"); + let main = &p.wordlist[0]; + eprintln!("main {:?}", main); + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Add); + } + + #[test] + fn sub_opcode() { + let p = parser_for("-\n"); + let main = &p.wordlist[0]; + eprintln!("main {:?}", main); + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Sub); + } + + #[test] + fn def_word() { + let p = parser_for(": add2 2 + ; 3 add2\n"); + let main = &p.wordlist[0]; + eprintln!("main {:?}", main); + + let add2_index = p.wordalog.0.get("add2").expect("add2 has entry in wordlist"); + let add2 = &p.wordlist[*add2_index]; + eprintln!("add2 {:?}", add2); + + assert_eq!(main.len(), 2); + assert_eq!(main[0], OpCode::Num(3)); + assert_eq!(main[1], OpCode::WordI(*add2_index)); + assert_eq!(add2.len(), 3); + assert_eq!(add2[0], OpCode::Num(2)); + assert_eq!(add2[1], OpCode::Add); + assert_eq!(add2[2], OpCode::Ret); + } +} |
