summaryrefslogtreecommitdiffstats
path: root/src/forth/compiler.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/forth/compiler.rs')
-rw-r--r--src/forth/compiler.rs384
1 files changed, 384 insertions, 0 deletions
diff --git a/src/forth/compiler.rs b/src/forth/compiler.rs
new file mode 100644
index 0000000..9120506
--- /dev/null
+++ b/src/forth/compiler.rs
@@ -0,0 +1,384 @@
+use super::vm::{ByteCode, OpCode, WordList};
+
+use std::collections::HashMap;
+use std::iter::{Enumerate, Iterator};
+use std::str::Chars;
+
+#[derive(Debug)]
+pub enum CompileError {
+ EOF,
+ DefStackEmpty,
+ MissingIf,
+ MissingQuote,
+ UnknownWord(String),
+}
+impl std::fmt::Display for CompileError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::EOF => write!(f, "premature end-of-file"),
+ Self::DefStackEmpty => write!(f, "def stack empty"),
+ Self::MissingIf => write!(f, "missing if"),
+ Self::MissingQuote => write!(f, "missing ending quote"),
+ Self::UnknownWord(word) => write!(f, "unknown word: {}", word),
+ }
+ }
+}
+impl std::error::Error for CompileError {}
+
+type CompileResult<T> = Result<T, CompileError>;
+
+// todo: the annotations should be directly tied to the wordlist so
+// they can't get out of sync.
+#[derive(Debug)]
+pub struct Annotation {
+ // (start, end) index in program text
+ pub loc: (usize, usize),
+}
+
+#[derive(Debug)]
+pub struct WordCatalog<'a>(pub(super) HashMap<&'a str, usize>);
+
+#[derive(Debug)]
+pub enum IfClauses {
+ True(usize),
+ TrueFalse(usize, usize),
+}
+
+#[derive(Debug)]
+pub struct Compiler<'a> {
+ text: &'a str,
+ enumerator: Enumerate<Chars<'a>>,
+ // list of all word definitions, in the order defined. the main
+ // routine is always in the first entry.
+ // todo: don't be pub, have a method to extract a wordlist
+ pub wordlist: WordList,
+ pub annotations: Vec<Vec<Annotation>>,
+ // catalog of word to word index in `wordlist`
+ pub wordalog: WordCatalog<'a>,
+ // holds a stack of indices into `wordlist` that are currently
+ // being defined, with the top of stack being the most recent
+ // definition.
+ defstack: Vec<usize>,
+ // number of clauses currently being defined for a particular ‘if’
+ // construct. a stack is needed in order to allow for nesting.
+ if_stack: Vec<(IfClauses, Annotation)>,
+}
+
+impl<'a> Compiler<'a> {
+ pub fn new(text: &'a str) -> Self {
+ let mut wl = vec![];
+ // main routine is always the first entry.
+ wl.push(ByteCode(vec![]));
+ Self {
+ text,
+ enumerator: text.chars().enumerate(),
+ wordlist: WordList(wl),
+ annotations: vec![vec![]],
+ wordalog: WordCatalog(HashMap::new()),
+ defstack: vec![],
+ if_stack: vec![],
+ }
+ }
+
+ // pull the next, whitespace-delimited word off the input stream.
+ fn next_word(&mut self) -> Option<(&'a str, usize, usize)> {
+ let (start, _c) =
+ self.enumerator.by_ref()
+ .find(|(_i, c)| !c.is_whitespace())?;
+ let (end, _c) =
+ self.enumerator.by_ref()
+ .find(|(_i, c)| c.is_whitespace())?;
+ let word = &self.text[start..end];
+ Some((word, start, end))
+ }
+
+ // currently ‘active’ bytecode being built.
+ fn bc_mut(&mut self) -> &mut ByteCode {
+ let word_index = self.defstack.last().unwrap_or(&0);
+ &mut self.wordlist.0[*word_index]
+ }
+
+ fn bc(&self) -> &ByteCode {
+ let word_index = self.defstack.last().unwrap_or(&0);
+ &self.wordlist.0[*word_index]
+ }
+
+ fn anno_mut(&mut self) -> &mut Vec<Annotation> {
+ let word_index = self.defstack.last().unwrap_or(&0);
+ &mut self.annotations[*word_index]
+ }
+
+ // push `op` onto the currently building bytecode, as determined
+ // by the top of the `namestack`.
+ fn bc_push(&mut self, op: OpCode, anno: Annotation) {
+ self.bc_mut().0.push(op);
+ self.anno_mut().push(anno);
+ }
+
+ pub fn compile(&mut self) -> CompileResult<()> {
+ while let Some((word, start, end)) = self.next_word() {
+ let anno = Annotation { loc: (start, end) };
+ if let Ok(i) = word.parse::<i32>() {
+ self.bc_push(OpCode::Num(i), anno);
+ } else if let Some(i) = self.wordalog.0.get(word) {
+ self.bc_push(OpCode::Call(*i), anno);
+ } else {
+ match word {
+ r#"s""# => {
+ let (s_end, _) =
+ self.enumerator
+ .find(|(_i, c)| return *c == '"')
+ .ok_or(CompileError::MissingQuote)?;
+ self.bc_push(OpCode::Str(end+1, s_end), anno);
+ },
+ ":" => {
+ let (name, _, _) = self.next_word().ok_or(CompileError::EOF)?;
+ self.wordalog.0.insert(name, self.wordlist.0.len());
+ self.defstack.push(self.wordlist.0.len());
+ self.wordlist.0.push(ByteCode(vec![]));
+ self.annotations.push(vec![]);
+ },
+ ";" => {
+ match self.bc().0.last() {
+ Some(OpCode::Call(i)) => {
+ let k = *i;
+ self.bc_mut().0.pop();
+ self.bc_mut().0.push(OpCode::TCall(k));
+ },
+ Some(OpCode::If(t, f)) => {
+ let t = *t;
+ let f = *f;
+ self.bc_mut().0.pop();
+ self.bc_mut().0.push(OpCode::TIf(t, f));
+ // technically only needed if ‘f’ is None, but whatever.
+ self.bc_push(OpCode::Ret, anno);
+ },
+ _ => self.bc_push(OpCode::Ret, anno),
+ }
+ self.defstack.pop().ok_or(CompileError::DefStackEmpty)?;
+ },
+ "if" => {
+ let i = self.wordlist.0.len();
+ self.wordlist.0.push(ByteCode(vec![]));
+ self.annotations.push(vec![]);
+ self.defstack.push(i);
+
+ self.if_stack.push((IfClauses::True(i), anno));
+ },
+ "else" => {
+ self.bc_push(OpCode::Ret, anno);
+ self.defstack.pop();
+
+ let i = self.wordlist.0.len();
+ self.wordlist.0.push(ByteCode(vec![]));
+ self.annotations.push(vec![]);
+ self.defstack.push(i);
+
+ let (true_clause, anno) = match self.if_stack.pop() {
+ None => return Err(CompileError::MissingIf),
+ Some((IfClauses::TrueFalse(_, _), _)) => return Err(CompileError::MissingIf),
+ Some((IfClauses::True(cl), anno)) => (cl, anno),
+ };
+ self.if_stack.push((IfClauses::TrueFalse(true_clause, i), anno));
+ },
+ "then" => {
+ self.bc_push(OpCode::Ret, anno);
+ self.defstack.pop();
+
+ match self.if_stack.pop() {
+ None => return Err(CompileError::MissingIf),
+ Some((IfClauses::True(true_clause), anno)) => {
+ self.bc_push(OpCode::If(true_clause, None), anno);
+ },
+ Some((IfClauses::TrueFalse(true_clause, false_clause), anno)) => {
+ self.bc_push(OpCode::If(true_clause, Some(false_clause)), anno);
+ }
+ }
+ },
+ "+" => self.bc_push(OpCode::Add, anno),
+ "-" => self.bc_push(OpCode::Sub, anno),
+ "*" => self.bc_push(OpCode::Mul, anno),
+ "/" => self.bc_push(OpCode::Div, anno),
+ "dup" => self.bc_push(OpCode::Dup, anno),
+ "drop" => self.bc_push(OpCode::Drop, anno),
+ "=" => self.bc_push(OpCode::EQ, anno),
+ ">" => self.bc_push(OpCode::GT, anno),
+ ">=" => self.bc_push(OpCode::GTE, anno),
+ "<" => self.bc_push(OpCode::LT, anno),
+ "<=" => self.bc_push(OpCode::LTE, anno),
+ other => return Err(CompileError::UnknownWord(String::from(other))),
+ }
+ }
+ }
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use super::super::vm::OpCode;
+
+ fn compiler_for(text: &str) -> Compiler {
+ let mut p = Compiler::new(text);
+ p.compile().expect("badparse");
+ p
+ }
+
+ fn eprintwordlist(wordlist: &WordList) {
+ for i in 0..wordlist.0.len() {
+ eprintln!("wordlist[{}]: {:?}", i, wordlist.0[i]);
+ }
+ }
+
+ #[test]
+ fn literal_num() {
+ let comp = compiler_for("1\n");
+ let main = &comp.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::Num(1));
+ }
+
+ #[test]
+ fn literal_string() {
+ let comp = compiler_for(r#"s" hello there""#);
+ let main = &comp.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::Str(3, 14));
+ }
+
+ #[test]
+ fn add_opcode() {
+ let comp = compiler_for("+\n");
+ let main = &comp.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::Add);
+ }
+
+ #[test]
+ fn sub_opcode() {
+ let comp = compiler_for("-\n");
+ let main = &comp.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::Sub);
+ }
+
+ #[test]
+ fn def_word() {
+ let comp = compiler_for(": add2 2 + ; 3 add2\n");
+ let main = &comp.wordlist.0[0];
+ let add2_index = comp.wordalog.0.get("add2").expect("add2 has entry in wordlist");
+ let add2 = &comp.wordlist.0[*add2_index];
+ assert_eq!(main.len(), 2);
+ assert_eq!(main[0], OpCode::Num(3));
+ assert_eq!(main[1], OpCode::Call(*add2_index));
+ assert_eq!(add2.len(), 3);
+ assert_eq!(add2[0], OpCode::Num(2));
+ assert_eq!(add2[1], OpCode::Add);
+ assert_eq!(add2[2], OpCode::Ret);
+ }
+
+ #[test]
+ fn if_then() {
+ let comp = compiler_for("if 1 2 + then\n");
+ eprintwordlist(&comp.wordlist);
+ assert_eq!(comp.wordlist.0.len(), 2);
+ let main = &comp.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::If(1, None));
+ let tr = &comp.wordlist.0[1];
+ assert_eq!(tr.len(), 4);
+ assert_eq!(tr[0], OpCode::Num(1));
+ assert_eq!(tr[1], OpCode::Num(2));
+ assert_eq!(tr[2], OpCode::Add);
+ assert_eq!(tr[3], OpCode::Ret);
+ }
+
+ #[test]
+ fn if_else_then() {
+ let comp = compiler_for("if 1 2 + else 1 2 - then\n");
+ eprintwordlist(&comp.wordlist);
+ assert_eq!(comp.wordlist.0.len(), 3, "wordlist length");
+ let main = &comp.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::If(1, Some(2)));
+ let tr = &comp.wordlist.0[1];
+ assert_eq!(tr.len(), 4);
+ assert_eq!(tr[0], OpCode::Num(1));
+ assert_eq!(tr[1], OpCode::Num(2));
+ assert_eq!(tr[2], OpCode::Add);
+ assert_eq!(tr[3], OpCode::Ret);
+ let fl = &comp.wordlist.0[2];
+ assert_eq!(fl.len(), 4);
+ assert_eq!(fl[0], OpCode::Num(1));
+ assert_eq!(fl[1], OpCode::Num(2));
+ assert_eq!(fl[2], OpCode::Sub);
+ assert_eq!(fl[3], OpCode::Ret);
+ }
+
+ #[test]
+ fn tail_call() {
+ let comp = compiler_for(": first + ; : second 3 first ; 1 second\n");
+ assert_eq!(comp.wordlist.0.len(), 3, "wordlist length");
+ eprintwordlist(&comp.wordlist);
+ }
+
+ #[test]
+ fn tail_if() {
+ let comp = compiler_for(": t if 1 2 + then ; t\n");
+ eprintwordlist(&comp.wordlist);
+ assert_eq!(comp.wordlist.0.len(), 3);
+ let main = &comp.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::Call(1));
+ let t = &comp.wordlist.0[1];
+ assert_eq!(t.len(), 2);
+ assert_eq!(t[0], OpCode::TIf(2, None));
+ assert_eq!(t[1], OpCode::Ret);
+ let tr = &comp.wordlist.0[2];
+ assert_eq!(tr.len(), 4);
+ assert_eq!(tr[0], OpCode::Num(1));
+ assert_eq!(tr[1], OpCode::Num(2));
+ assert_eq!(tr[2], OpCode::Add);
+ assert_eq!(tr[3], OpCode::Ret);
+ }
+
+ #[test]
+ fn tail_if_else() {
+ let comp = compiler_for(": t if 1 2 + else 1 2 - then ; t\n");
+ eprintwordlist(&comp.wordlist);
+ assert_eq!(comp.wordlist.0.len(), 4, "wordlist length");
+ let main = &comp.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::Call(1));
+ let t = &comp.wordlist.0[1];
+ assert_eq!(t.len(), 2);
+ assert_eq!(t[0], OpCode::TIf(2, Some(3)));
+ assert_eq!(t[1], OpCode::Ret);
+ let tr = &comp.wordlist.0[2];
+ assert_eq!(tr.len(), 4);
+ assert_eq!(tr[0], OpCode::Num(1));
+ assert_eq!(tr[1], OpCode::Num(2));
+ assert_eq!(tr[2], OpCode::Add);
+ assert_eq!(tr[3], OpCode::Ret);
+ let fl = &comp.wordlist.0[3];
+ assert_eq!(fl.len(), 4);
+ assert_eq!(fl[0], OpCode::Num(1));
+ assert_eq!(fl[1], OpCode::Num(2));
+ assert_eq!(fl[2], OpCode::Sub);
+ assert_eq!(fl[3], OpCode::Ret);
+ }
+
+ #[test]
+ fn recursion() {
+ let comp = compiler_for(": foo foo ; foo\n");
+ eprintwordlist(&comp.wordlist);
+ assert_eq!(comp.wordlist.0.len(), 2);
+ let main = &comp.wordlist.0[0];
+ assert_eq!(main.len(), 1);
+ assert_eq!(main[0], OpCode::Call(1));
+ let foo = &comp.wordlist.0[1];
+ assert_eq!(foo.len(), 1);
+ assert_eq!(foo[0], OpCode::TCall(1));
+ }
+}