use super::vm::{ByteCode, DataStackType, OpCode, WordList}; use std::collections::HashMap; use std::iter::{Enumerate, Iterator}; use std::str::Chars; #[derive(Debug)] pub enum CompileError { EOF, DefStackEmpty, MissingIf, MissingQuote, MissingParen, UnknownWord(String), } impl std::fmt::Display for CompileError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::EOF => write!(f, "premature end-of-file"), Self::DefStackEmpty => write!(f, "def stack empty"), Self::MissingIf => write!(f, "missing if"), Self::MissingQuote => write!(f, "missing ending quote"), Self::MissingParen => write!(f, "missing ending parenthesis"), Self::UnknownWord(word) => write!(f, "unknown word: {word}"), } } } impl std::error::Error for CompileError {} type CompileResult = Result; // todo: the annotations should be directly tied to the wordlist so // they can't get out of sync. #[derive(Debug)] pub struct Annotation { // (start, end) index in program text pub loc: (usize, usize), } #[derive(Debug)] pub struct WordCatalog<'a>(pub(super) HashMap<&'a str, usize>); #[derive(Debug)] pub enum IfClauses { True(usize), TrueFalse(usize, usize), } #[derive(Debug)] pub struct Compiler<'a> { text: &'a str, enumerator: Enumerate>, // list of all word definitions, in the order defined. the main // routine is always in the first entry. // todo: don't be pub, have a method to extract a wordlist pub wordlist: WordList, pub annotations: Vec>, // catalog of word to word index in `wordlist` pub wordalog: WordCatalog<'a>, // holds a stack of indices into `wordlist` that are currently // being defined, with the top of stack being the most recent // definition. defstack: Vec, // number of clauses currently being defined for a particular ‘if’ // construct. a stack is needed in order to allow for nesting. if_stack: Vec<(IfClauses, Annotation)>, } impl<'a> Compiler<'a> { pub fn new(text: &'a str) -> Self { Self { text, enumerator: text.chars().enumerate(), // we always need something at index 0 wordlist: WordList(vec![ByteCode(vec![])]), annotations: vec![vec![]], wordalog: WordCatalog(HashMap::new()), defstack: vec![], if_stack: vec![], } } // pull the next, whitespace-delimited word off the input stream. fn next_word(&mut self) -> Option<(&'a str, usize, usize)> { let (start, _c) = self .enumerator .by_ref() .find(|(_i, c)| !c.is_whitespace())?; let (end, _c) = self.enumerator.by_ref().find(|(_i, c)| c.is_whitespace())?; let word = &self.text[start..end]; Some((word, start, end)) } // currently ‘active’ bytecode being built. fn bc_mut(&mut self) -> &mut ByteCode { let word_index = self.defstack.last().unwrap_or(&0); &mut self.wordlist.0[*word_index] } fn bc(&self) -> &ByteCode { let word_index = self.defstack.last().unwrap_or(&0); &self.wordlist.0[*word_index] } fn anno_mut(&mut self) -> &mut Vec { let word_index = self.defstack.last().unwrap_or(&0); &mut self.annotations[*word_index] } // push `op` onto the currently building bytecode, as determined // by the top of the `namestack`. fn bc_push(&mut self, op: OpCode, anno: Annotation) { self.bc_mut().0.push(op); self.anno_mut().push(anno); } // substitute previous call/if with tail version // // only use where we might by pushing `OpCode::Ret`. honestly, // should todo: put this in bc_push so it's always applied // appropriately. fn sub_tcall(&mut self, anno: Annotation) { match self.bc().0.last() { Some(OpCode::Call(i)) => { let k = *i; self.bc_mut().0.pop(); self.bc_mut().0.push(OpCode::TCall(k)); } Some(OpCode::If(t, f)) => { let t = *t; let f = *f; self.bc_mut().0.pop(); self.bc_mut().0.push(OpCode::TIf(t, f)); // technically only needed if ‘f’ is None, but whatever. //self.bc_push(OpCode::Ret, anno); } _ => self.bc_push(OpCode::Ret, anno), } } pub fn compile(&mut self) -> CompileResult<()> { while let Some((word, start, end)) = self.next_word() { let anno = Annotation { loc: (start, end) }; if let Ok(i) = word.parse::() { self.bc_push(OpCode::Num(i), anno); } else if let Some(i) = self.wordalog.0.get(word) { self.bc_push(OpCode::Call(*i), anno); } else { match word { r#"s""# => { let (s_end, _) = self .enumerator .find(|(_i, c)| *c == '"') .ok_or(CompileError::MissingQuote)?; self.bc_push(OpCode::Str(end + 1, s_end), anno); } "(" => { self.enumerator .find(|(_i, c)| *c == ')') .ok_or(CompileError::MissingParen)?; } "\\" => { self.enumerator .find(|(_i, c)| *c == '\n') .ok_or(CompileError::EOF)?; } ":" => { let (name, _, _) = self.next_word().ok_or(CompileError::EOF)?; self.wordalog.0.insert(name, self.wordlist.0.len()); self.defstack.push(self.wordlist.0.len()); self.wordlist.0.push(ByteCode(vec![])); self.annotations.push(vec![]); } ";" => { self.sub_tcall(anno); self.defstack.pop().ok_or(CompileError::DefStackEmpty)?; } "if" => { let i = self.wordlist.0.len(); self.wordlist.0.push(ByteCode(vec![])); self.annotations.push(vec![]); self.defstack.push(i); self.if_stack.push((IfClauses::True(i), anno)); } "else" => { self.sub_tcall(anno); self.defstack.pop(); let i = self.wordlist.0.len(); self.wordlist.0.push(ByteCode(vec![])); self.annotations.push(vec![]); self.defstack.push(i); let (true_clause, anno) = match self.if_stack.pop() { None => return Err(CompileError::MissingIf), Some((IfClauses::TrueFalse(_, _), _)) => { return Err(CompileError::MissingIf); } Some((IfClauses::True(cl), anno)) => (cl, anno), }; self.if_stack .push((IfClauses::TrueFalse(true_clause, i), anno)); } "then" => { self.sub_tcall(anno); self.defstack.pop(); match self.if_stack.pop() { None => return Err(CompileError::MissingIf), Some((IfClauses::True(true_clause), anno)) => { self.bc_push(OpCode::If(true_clause, None), anno); } Some((IfClauses::TrueFalse(true_clause, false_clause), anno)) => { self.bc_push(OpCode::If(true_clause, Some(false_clause)), anno); } } } "+" => self.bc_push(OpCode::Add, anno), "-" => self.bc_push(OpCode::Sub, anno), "*" => self.bc_push(OpCode::Mul, anno), "/" => self.bc_push(OpCode::Div, anno), "dup" => self.bc_push(OpCode::Dup, anno), "drop" => self.bc_push(OpCode::Drop, anno), "rot" => self.bc_push(OpCode::Rot, anno), "swap" => self.bc_push(OpCode::Swap, anno), "=" => self.bc_push(OpCode::EQ, anno), ">" => self.bc_push(OpCode::GT, anno), ">=" => self.bc_push(OpCode::GTE, anno), "<" => self.bc_push(OpCode::LT, anno), "<=" => self.bc_push(OpCode::LTE, anno), other => return Err(CompileError::UnknownWord(String::from(other))), } } } Ok(()) } } #[cfg(test)] mod tests { use super::super::vm::OpCode; use super::*; fn compiler_for(text: &str) -> Compiler { let mut p = Compiler::new(text); p.compile().expect("badparse"); p } fn eprintwordlist(wordlist: &WordList) { for i in 0..wordlist.0.len() { eprintln!("wordlist[{}]: {:?}", i, wordlist.0[i]); } } #[test] fn literal_num() { let comp = compiler_for("1\n"); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Num(1)]); } #[test] fn literal_string() { let comp = compiler_for(r#"s" hello there""#); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Str(3, 14)]); } #[test] fn add_opcode() { let comp = compiler_for("+\n"); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Add]); } #[test] fn sub_opcode() { let comp = compiler_for("-\n"); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Sub]); } #[test] fn rot_opcode() { let comp = compiler_for("rot\n"); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Rot]); } #[test] fn swap_opcode() { let comp = compiler_for("swap\n"); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Swap]); } #[test] fn def_word() { let comp = compiler_for(": add2 2 + ; 3 add2\n"); let main = &comp.wordlist.0[0]; let add2_index = comp .wordalog .0 .get("add2") .expect("add2 has entry in wordlist"); assert_eq!(main, vec![OpCode::Num(3), OpCode::Call(*add2_index)]); let add2 = &comp.wordlist.0[*add2_index]; assert_eq!(add2, vec![OpCode::Num(2), OpCode::Add, OpCode::Ret]); } #[test] fn if_then() { let comp = compiler_for("if 1 2 + then\n"); eprintwordlist(&comp.wordlist); assert_eq!(comp.wordlist.0.len(), 2); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::If(1, None)]); let tr = &comp.wordlist.0[1]; assert_eq!( tr, vec![OpCode::Num(1), OpCode::Num(2), OpCode::Add, OpCode::Ret] ); } #[test] fn if_else_then() { let comp = compiler_for("if 1 2 + else 1 2 - then\n"); eprintwordlist(&comp.wordlist); assert_eq!(comp.wordlist.0.len(), 3, "wordlist length"); let main = &comp.wordlist.0[0]; assert_eq!(main.len(), 1); assert_eq!(main[0], OpCode::If(1, Some(2))); assert_eq!(main, vec![OpCode::If(1, Some(2))]); let tr = &comp.wordlist.0[1]; assert_eq!( tr, vec![OpCode::Num(1), OpCode::Num(2), OpCode::Add, OpCode::Ret] ); let fl = &comp.wordlist.0[2]; assert_eq!( fl, vec![OpCode::Num(1), OpCode::Num(2), OpCode::Sub, OpCode::Ret] ); } #[test] fn tail_call() { let comp = compiler_for(": first + ; : second 3 first ; 1 second\n"); assert_eq!(comp.wordlist.0.len(), 3, "wordlist length"); eprintwordlist(&comp.wordlist); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Num(1), OpCode::Call(2)]); let first = &comp.wordlist.0[1]; assert_eq!(first, vec![OpCode::Add, OpCode::Ret]); let second = &comp.wordlist.0[2]; assert_eq!(second, vec![OpCode::Num(3), OpCode::TCall(1)]); } #[test] fn tail_if() { let comp = compiler_for(": t if 1 2 + then ; t\n"); eprintwordlist(&comp.wordlist); assert_eq!(comp.wordlist.0.len(), 3); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Call(1)]); let t = &comp.wordlist.0[1]; assert_eq!(t, vec![OpCode::TIf(2, None)]); let tr = &comp.wordlist.0[2]; assert_eq!( tr, vec![OpCode::Num(1), OpCode::Num(2), OpCode::Add, OpCode::Ret] ); } #[test] fn tail_if_else() { let comp = compiler_for(": t if 1 2 + else 1 2 - then ; t\n"); eprintwordlist(&comp.wordlist); assert_eq!(comp.wordlist.0.len(), 4, "wordlist length"); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Call(1)]); let t = &comp.wordlist.0[1]; assert_eq!(t, vec![OpCode::TIf(2, Some(3))]); let tr = &comp.wordlist.0[2]; assert_eq!( tr, vec![OpCode::Num(1), OpCode::Num(2), OpCode::Add, OpCode::Ret] ); let fl = &comp.wordlist.0[3]; assert_eq!( fl, vec![OpCode::Num(1), OpCode::Num(2), OpCode::Sub, OpCode::Ret] ); } #[test] fn recursion() { let comp = compiler_for(": foo foo ; foo\n"); eprintwordlist(&comp.wordlist); assert_eq!(comp.wordlist.0.len(), 2); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Call(1)]); let foo = &comp.wordlist.0[1]; assert_eq!(foo, vec![OpCode::TCall(1)]); } #[test] fn tcall_if_then() { let prog = r#" : foo dup 1 > if foo then ; "#; let comp = compiler_for(prog); eprintwordlist(&comp.wordlist); let tbranch = &comp.wordlist.0[2]; eprintln!("true: {tbranch:?}"); assert_eq!(tbranch.0[0], OpCode::TCall(1)); } #[test] fn tcall_if_else_then() { let prog = r#" : foo dup 1 > if foo else foo then ; "#; let comp = compiler_for(prog); eprintwordlist(&comp.wordlist); let tbranch = &comp.wordlist.0[2]; eprintln!("true: {tbranch:?}"); assert_eq!(tbranch.0[0], OpCode::TCall(1)); let fbranch = &comp.wordlist.0[3]; eprintln!("false: {fbranch:?}"); assert_eq!(tbranch.0[0], OpCode::TCall(1)); } #[test] fn comment_paren() { let comp = compiler_for("1 ( foo ) 2 ( bar) 3\n"); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Num(1), OpCode::Num(2), OpCode::Num(3)]); } #[test] fn comment_backslash() { let comp = compiler_for("1 \\ foobar\n2\n"); let main = &comp.wordlist.0[0]; assert_eq!(main, vec![OpCode::Num(1), OpCode::Num(2)]); } }