diff options
Diffstat (limited to 'src/forth/compiler.rs')
| -rw-r--r-- | src/forth/compiler.rs | 384 |
1 files changed, 384 insertions, 0 deletions
diff --git a/src/forth/compiler.rs b/src/forth/compiler.rs new file mode 100644 index 0000000..9120506 --- /dev/null +++ b/src/forth/compiler.rs @@ -0,0 +1,384 @@ +use super::vm::{ByteCode, OpCode, WordList}; + +use std::collections::HashMap; +use std::iter::{Enumerate, Iterator}; +use std::str::Chars; + +#[derive(Debug)] +pub enum CompileError { + EOF, + DefStackEmpty, + MissingIf, + MissingQuote, + UnknownWord(String), +} +impl std::fmt::Display for CompileError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::EOF => write!(f, "premature end-of-file"), + Self::DefStackEmpty => write!(f, "def stack empty"), + Self::MissingIf => write!(f, "missing if"), + Self::MissingQuote => write!(f, "missing ending quote"), + Self::UnknownWord(word) => write!(f, "unknown word: {}", word), + } + } +} +impl std::error::Error for CompileError {} + +type CompileResult<T> = Result<T, CompileError>; + +// todo: the annotations should be directly tied to the wordlist so +// they can't get out of sync. +#[derive(Debug)] +pub struct Annotation { + // (start, end) index in program text + pub loc: (usize, usize), +} + +#[derive(Debug)] +pub struct WordCatalog<'a>(pub(super) HashMap<&'a str, usize>); + +#[derive(Debug)] +pub enum IfClauses { + True(usize), + TrueFalse(usize, usize), +} + +#[derive(Debug)] +pub struct Compiler<'a> { + text: &'a str, + enumerator: Enumerate<Chars<'a>>, + // list of all word definitions, in the order defined. the main + // routine is always in the first entry. + // todo: don't be pub, have a method to extract a wordlist + pub wordlist: WordList, + pub annotations: Vec<Vec<Annotation>>, + // catalog of word to word index in `wordlist` + pub wordalog: WordCatalog<'a>, + // holds a stack of indices into `wordlist` that are currently + // being defined, with the top of stack being the most recent + // definition. + defstack: Vec<usize>, + // number of clauses currently being defined for a particular ‘if’ + // construct. a stack is needed in order to allow for nesting. + if_stack: Vec<(IfClauses, Annotation)>, +} + +impl<'a> Compiler<'a> { + pub fn new(text: &'a str) -> Self { + let mut wl = vec![]; + // main routine is always the first entry. + wl.push(ByteCode(vec![])); + Self { + text, + enumerator: text.chars().enumerate(), + wordlist: WordList(wl), + annotations: vec![vec![]], + wordalog: WordCatalog(HashMap::new()), + defstack: vec![], + if_stack: vec![], + } + } + + // pull the next, whitespace-delimited word off the input stream. + fn next_word(&mut self) -> Option<(&'a str, usize, usize)> { + let (start, _c) = + self.enumerator.by_ref() + .find(|(_i, c)| !c.is_whitespace())?; + let (end, _c) = + self.enumerator.by_ref() + .find(|(_i, c)| c.is_whitespace())?; + let word = &self.text[start..end]; + Some((word, start, end)) + } + + // currently ‘active’ bytecode being built. + fn bc_mut(&mut self) -> &mut ByteCode { + let word_index = self.defstack.last().unwrap_or(&0); + &mut self.wordlist.0[*word_index] + } + + fn bc(&self) -> &ByteCode { + let word_index = self.defstack.last().unwrap_or(&0); + &self.wordlist.0[*word_index] + } + + fn anno_mut(&mut self) -> &mut Vec<Annotation> { + let word_index = self.defstack.last().unwrap_or(&0); + &mut self.annotations[*word_index] + } + + // push `op` onto the currently building bytecode, as determined + // by the top of the `namestack`. + fn bc_push(&mut self, op: OpCode, anno: Annotation) { + self.bc_mut().0.push(op); + self.anno_mut().push(anno); + } + + pub fn compile(&mut self) -> CompileResult<()> { + while let Some((word, start, end)) = self.next_word() { + let anno = Annotation { loc: (start, end) }; + if let Ok(i) = word.parse::<i32>() { + self.bc_push(OpCode::Num(i), anno); + } else if let Some(i) = self.wordalog.0.get(word) { + self.bc_push(OpCode::Call(*i), anno); + } else { + match word { + r#"s""# => { + let (s_end, _) = + self.enumerator + .find(|(_i, c)| return *c == '"') + .ok_or(CompileError::MissingQuote)?; + self.bc_push(OpCode::Str(end+1, s_end), anno); + }, + ":" => { + let (name, _, _) = self.next_word().ok_or(CompileError::EOF)?; + self.wordalog.0.insert(name, self.wordlist.0.len()); + self.defstack.push(self.wordlist.0.len()); + self.wordlist.0.push(ByteCode(vec![])); + self.annotations.push(vec![]); + }, + ";" => { + match self.bc().0.last() { + Some(OpCode::Call(i)) => { + let k = *i; + self.bc_mut().0.pop(); + self.bc_mut().0.push(OpCode::TCall(k)); + }, + Some(OpCode::If(t, f)) => { + let t = *t; + let f = *f; + self.bc_mut().0.pop(); + self.bc_mut().0.push(OpCode::TIf(t, f)); + // technically only needed if ‘f’ is None, but whatever. + self.bc_push(OpCode::Ret, anno); + }, + _ => self.bc_push(OpCode::Ret, anno), + } + self.defstack.pop().ok_or(CompileError::DefStackEmpty)?; + }, + "if" => { + let i = self.wordlist.0.len(); + self.wordlist.0.push(ByteCode(vec![])); + self.annotations.push(vec![]); + self.defstack.push(i); + + self.if_stack.push((IfClauses::True(i), anno)); + }, + "else" => { + self.bc_push(OpCode::Ret, anno); + self.defstack.pop(); + + let i = self.wordlist.0.len(); + self.wordlist.0.push(ByteCode(vec![])); + self.annotations.push(vec![]); + self.defstack.push(i); + + let (true_clause, anno) = match self.if_stack.pop() { + None => return Err(CompileError::MissingIf), + Some((IfClauses::TrueFalse(_, _), _)) => return Err(CompileError::MissingIf), + Some((IfClauses::True(cl), anno)) => (cl, anno), + }; + self.if_stack.push((IfClauses::TrueFalse(true_clause, i), anno)); + }, + "then" => { + self.bc_push(OpCode::Ret, anno); + self.defstack.pop(); + + match self.if_stack.pop() { + None => return Err(CompileError::MissingIf), + Some((IfClauses::True(true_clause), anno)) => { + self.bc_push(OpCode::If(true_clause, None), anno); + }, + Some((IfClauses::TrueFalse(true_clause, false_clause), anno)) => { + self.bc_push(OpCode::If(true_clause, Some(false_clause)), anno); + } + } + }, + "+" => self.bc_push(OpCode::Add, anno), + "-" => self.bc_push(OpCode::Sub, anno), + "*" => self.bc_push(OpCode::Mul, anno), + "/" => self.bc_push(OpCode::Div, anno), + "dup" => self.bc_push(OpCode::Dup, anno), + "drop" => self.bc_push(OpCode::Drop, anno), + "=" => self.bc_push(OpCode::EQ, anno), + ">" => self.bc_push(OpCode::GT, anno), + ">=" => self.bc_push(OpCode::GTE, anno), + "<" => self.bc_push(OpCode::LT, anno), + "<=" => self.bc_push(OpCode::LTE, anno), + other => return Err(CompileError::UnknownWord(String::from(other))), + } + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::super::vm::OpCode; + + fn compiler_for(text: &str) -> Compiler { + let mut p = Compiler::new(text); + p.compile().expect("badparse"); + p + } + + fn eprintwordlist(wordlist: &WordList) { + for i in 0..wordlist.0.len() { + eprintln!("wordlist[{}]: {:?}", i, wordlist.0[i]); + } + } + + #[test] + fn literal_num() { + let comp = compiler_for("1\n"); + let main = &comp.wordlist.0[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Num(1)); + } + + #[test] + fn literal_string() { + let comp = compiler_for(r#"s" hello there""#); + let main = &comp.wordlist.0[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Str(3, 14)); + } + + #[test] + fn add_opcode() { + let comp = compiler_for("+\n"); + let main = &comp.wordlist.0[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Add); + } + + #[test] + fn sub_opcode() { + let comp = compiler_for("-\n"); + let main = &comp.wordlist.0[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Sub); + } + + #[test] + fn def_word() { + let comp = compiler_for(": add2 2 + ; 3 add2\n"); + let main = &comp.wordlist.0[0]; + let add2_index = comp.wordalog.0.get("add2").expect("add2 has entry in wordlist"); + let add2 = &comp.wordlist.0[*add2_index]; + assert_eq!(main.len(), 2); + assert_eq!(main[0], OpCode::Num(3)); + assert_eq!(main[1], OpCode::Call(*add2_index)); + assert_eq!(add2.len(), 3); + assert_eq!(add2[0], OpCode::Num(2)); + assert_eq!(add2[1], OpCode::Add); + assert_eq!(add2[2], OpCode::Ret); + } + + #[test] + fn if_then() { + let comp = compiler_for("if 1 2 + then\n"); + eprintwordlist(&comp.wordlist); + assert_eq!(comp.wordlist.0.len(), 2); + let main = &comp.wordlist.0[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::If(1, None)); + let tr = &comp.wordlist.0[1]; + assert_eq!(tr.len(), 4); + assert_eq!(tr[0], OpCode::Num(1)); + assert_eq!(tr[1], OpCode::Num(2)); + assert_eq!(tr[2], OpCode::Add); + assert_eq!(tr[3], OpCode::Ret); + } + + #[test] + fn if_else_then() { + let comp = compiler_for("if 1 2 + else 1 2 - then\n"); + eprintwordlist(&comp.wordlist); + assert_eq!(comp.wordlist.0.len(), 3, "wordlist length"); + let main = &comp.wordlist.0[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::If(1, Some(2))); + let tr = &comp.wordlist.0[1]; + assert_eq!(tr.len(), 4); + assert_eq!(tr[0], OpCode::Num(1)); + assert_eq!(tr[1], OpCode::Num(2)); + assert_eq!(tr[2], OpCode::Add); + assert_eq!(tr[3], OpCode::Ret); + let fl = &comp.wordlist.0[2]; + assert_eq!(fl.len(), 4); + assert_eq!(fl[0], OpCode::Num(1)); + assert_eq!(fl[1], OpCode::Num(2)); + assert_eq!(fl[2], OpCode::Sub); + assert_eq!(fl[3], OpCode::Ret); + } + + #[test] + fn tail_call() { + let comp = compiler_for(": first + ; : second 3 first ; 1 second\n"); + assert_eq!(comp.wordlist.0.len(), 3, "wordlist length"); + eprintwordlist(&comp.wordlist); + } + + #[test] + fn tail_if() { + let comp = compiler_for(": t if 1 2 + then ; t\n"); + eprintwordlist(&comp.wordlist); + assert_eq!(comp.wordlist.0.len(), 3); + let main = &comp.wordlist.0[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Call(1)); + let t = &comp.wordlist.0[1]; + assert_eq!(t.len(), 2); + assert_eq!(t[0], OpCode::TIf(2, None)); + assert_eq!(t[1], OpCode::Ret); + let tr = &comp.wordlist.0[2]; + assert_eq!(tr.len(), 4); + assert_eq!(tr[0], OpCode::Num(1)); + assert_eq!(tr[1], OpCode::Num(2)); + assert_eq!(tr[2], OpCode::Add); + assert_eq!(tr[3], OpCode::Ret); + } + + #[test] + fn tail_if_else() { + let comp = compiler_for(": t if 1 2 + else 1 2 - then ; t\n"); + eprintwordlist(&comp.wordlist); + assert_eq!(comp.wordlist.0.len(), 4, "wordlist length"); + let main = &comp.wordlist.0[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Call(1)); + let t = &comp.wordlist.0[1]; + assert_eq!(t.len(), 2); + assert_eq!(t[0], OpCode::TIf(2, Some(3))); + assert_eq!(t[1], OpCode::Ret); + let tr = &comp.wordlist.0[2]; + assert_eq!(tr.len(), 4); + assert_eq!(tr[0], OpCode::Num(1)); + assert_eq!(tr[1], OpCode::Num(2)); + assert_eq!(tr[2], OpCode::Add); + assert_eq!(tr[3], OpCode::Ret); + let fl = &comp.wordlist.0[3]; + assert_eq!(fl.len(), 4); + assert_eq!(fl[0], OpCode::Num(1)); + assert_eq!(fl[1], OpCode::Num(2)); + assert_eq!(fl[2], OpCode::Sub); + assert_eq!(fl[3], OpCode::Ret); + } + + #[test] + fn recursion() { + let comp = compiler_for(": foo foo ; foo\n"); + eprintwordlist(&comp.wordlist); + assert_eq!(comp.wordlist.0.len(), 2); + let main = &comp.wordlist.0[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Call(1)); + let foo = &comp.wordlist.0[1]; + assert_eq!(foo.len(), 1); + assert_eq!(foo[0], OpCode::TCall(1)); + } +} |
