From 2eecdb1a4bae0fbced05f4c375cd6126929fb1fb Mon Sep 17 00:00:00 2001 From: Brian Cully Date: Thu, 7 Aug 2025 14:16:43 -0400 Subject: parser now emits list of words and hashmap of string→index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wordlist is just a vector of bytecode entries. to associate a word to an entry in the wordlist, use the word catalog to look up by name. --- .dir-locals.el | 7 ++ .envrc | 1 + .gitignore | 2 + Cargo.lock | 154 +++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 14 ++++ Makefile | 7 ++ index.html | 13 ++++ main.css | 3 + main.mjs | 22 +++++++ shell.nix | 21 ++++++ src/forth/interp.rs | 175 +++++++++++++++++++++++++++++++++++++++++++++++++ src/forth/mod.rs | 2 + src/forth/parser.rs | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 24 +++++++ 14 files changed, 628 insertions(+) create mode 100644 .dir-locals.el create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 Makefile create mode 100644 index.html create mode 100644 main.css create mode 100644 main.mjs create mode 100644 shell.nix create mode 100644 src/forth/interp.rs create mode 100644 src/forth/mod.rs create mode 100644 src/forth/parser.rs create mode 100644 src/lib.rs diff --git a/.dir-locals.el b/.dir-locals.el new file mode 100644 index 0000000..55b82b2 --- /dev/null +++ b/.dir-locals.el @@ -0,0 +1,7 @@ +;;; Directory Local Variables -*- no-byte-compile: t -*- +;;; For more information see (info "(emacs) Directory Variables") + +((nil . ((compile-command . "make serve"))) + (js-base-mode . ((js-indent-level . 4) + (js-chain-indent . t) + (js-indent-first-init . dynamic)))) diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..1d953f4 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use nix diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..54954ce --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/pkg diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..88c32de --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,154 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "automathon" +version = "0.1.0" +dependencies = [ + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +dependencies = [ + "js-sys", + "wasm-bindgen", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..6dbff64 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "automathon" +version = "0.1.0" +edition = "2024" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +wasm-bindgen = "0.2" + +[dependencies.web-sys] +features = ["Document", "Element", "HtmlElement", "Node", "Window"] +version = "0.3" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f37f0ac --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +# the --target is currently necessary or firefox will return +# "disallowed mime type" -bjc 7-aug-2025 +build: + wasm-pack build --target web + +serve: build + python -m http.server 8118 diff --git a/index.html b/index.html new file mode 100644 index 0000000..c985df4 --- /dev/null +++ b/index.html @@ -0,0 +1,13 @@ + + + + automathon + + + + +

automathon

+ no canvas! + + + diff --git a/main.css b/main.css new file mode 100644 index 0000000..a07613e --- /dev/null +++ b/main.css @@ -0,0 +1,3 @@ +canvas { + border: 1px solid greenyellow; +} diff --git a/main.mjs b/main.mjs new file mode 100644 index 0000000..63f277e --- /dev/null +++ b/main.mjs @@ -0,0 +1,22 @@ +import init from './pkg/automathon.js'; + +async function loaded() { + console.debug('run'); + const mod = await init(); + console.debug('init done', mod); + + window.calculate = _ => { + console.debug('calc'); + const inp1 = document.getElementById('number-input1').value; + const inp2 = document.getElementById('number-input2').value; + const res = mod.add(parseInt(inp1), parseInt(inp2)); + document.getElementById('result').textContent = res; + } + + window.testalert = _ => { + console.debug('testalert'); + mod.run(); + } +} + +document.addEventListener('DOMContentLoaded', loaded); diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..ab0fd32 --- /dev/null +++ b/shell.nix @@ -0,0 +1,21 @@ +{ pkgs ? import {} }: + +pkgs.mkShellNoCC { + packages = with pkgs; [ + rustc + clang # yes, it's necessary or ‘cc’ can't be found. -bjc 2025-aug-7 + lld # ibid. + cargo + rust-analyzer + wasm-pack + # the only thing better than needing cargo's infinite dependencies + # is needing npm's as well, just so we can use a bundler built for + # another, wildly different, registry. + nodePackages.npm + + # for http.server + python3 + ]; + + CARGO_HOME = "/data/bjc/cargo"; +} diff --git a/src/forth/interp.rs b/src/forth/interp.rs new file mode 100644 index 0000000..2ade22d --- /dev/null +++ b/src/forth/interp.rs @@ -0,0 +1,175 @@ +use std::collections::HashMap; +use std::ops::Index; + +#[derive(Clone, Debug, PartialEq)] +pub enum OpCode { + Num(i32), + Str(usize, usize), + WordI(usize), + Word(&'static str), + Add, + Sub, + Ret, +} + +#[derive(Debug)] +pub(super) struct ByteCode(pub(super) Vec); + +impl ByteCode { + pub fn len(&self) -> usize { + self.0.len() + } +} + +impl Index for ByteCode { + type Output = OpCode; + + fn index(&self, index: usize) -> &Self::Output { + &self.0[index] + } +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub(super) struct InstructionPointer(pub(super) usize); + +#[derive(Debug)] +pub(super) struct DataStack(pub(super) Vec); + +#[derive(Debug)] +pub(super) struct CallStack(pub(super) Vec); + +#[derive(Debug)] +pub(super) struct WordList(pub(super) HashMap<&'static str, InstructionPointer>); + +#[derive(Debug)] +pub(super) struct WordCatalog<'a>(pub(super) HashMap<&'a str, usize>); + +#[derive(Debug)] +pub struct Interp { + stack: DataStack, + callstack: CallStack, + wordlist: WordList, + bytecode: ByteCode, + ip: InstructionPointer, +} + +#[derive(Debug)] +pub enum RuntimeError { + StackUnderflow, + UndefinedWord, +} + +impl Interp { + pub fn new() -> Self { + Self { + stack: DataStack(Vec::new()), + callstack: CallStack(Vec::new()), + wordlist: WordList(HashMap::new()), + bytecode: ByteCode(Vec::new()), + ip: InstructionPointer(0), + } + } + + pub fn tick(&mut self) -> Result<(), RuntimeError> { + match self.bytecode[self.ip.0] { + OpCode::Num(n) => self.stack.0.push(n), + OpCode::Str(start, end) => eprintln!("got str: {} to {}", start, end), + OpCode::Add => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.stack.0.push(n2 + n1); + }, + OpCode::Sub => { + let n1 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + let n2 = self.stack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.stack.0.push(n2 - n1); + }, + OpCode::Ret => { + let ip = self.callstack.0.pop().ok_or(RuntimeError::StackUnderflow)?; + self.ip = ip; + }, + OpCode::Word(w) => { + let ip = self.wordlist.0.get(w).ok_or(RuntimeError::UndefinedWord)?; + self.callstack.0.push(self.ip); + self.ip.0 = ip.0 - 1; // we auto-increment at the end + }, + OpCode::WordI(i) => { + eprintln!("should jump to word based on index {}", i); + } + } + self.ip.0 += 1; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::OpCode; + + #[test] + fn simple_ticks() -> Result<(), RuntimeError> { + + let mut interp = Interp::new(); + interp.bytecode = ByteCode(vec![OpCode::Num(2), OpCode::Num(3), OpCode::Add]); + interp.tick()?; + assert_eq!(interp.ip.0, 1); + assert_eq!(interp.stack.0.len(), 1); + assert_eq!(interp.stack.0[0], 2, "first argument"); + interp.tick()?; + assert_eq!(interp.ip.0, 2); + assert_eq!(interp.stack.0.len(), 2); + assert_eq!(interp.stack.0[1], 3, "second argument"); + interp.tick()?; + assert_eq!(interp.ip.0, 3); + assert_eq!(interp.stack.0.len(), 1); + assert_eq!(interp.stack.0[0], 5, "result of addition"); + + Ok(()) + } + + #[test] + fn custom_word() -> Result<(), RuntimeError> { + let mut interp = Interp::new(); + interp.bytecode = ByteCode(vec![ + OpCode::Num(2), OpCode::Num(3), OpCode::Word("sub"), OpCode::Num(-2), OpCode::Add, + // "sub" definition + OpCode::Sub, OpCode::Ret, + ]); + // 5 is offset of w + interp.wordlist.0.insert("sub", InstructionPointer(5)); + interp.tick()?; + assert_eq!(interp.ip.0, 1); + assert_eq!(interp.stack.0.len(), 1); + assert_eq!(interp.stack.0[0], 2, "first argument"); + interp.tick()?; + assert_eq!(interp.ip.0, 2); + assert_eq!(interp.stack.0.len(), 2); + assert_eq!(interp.stack.0[1], 3, "second argument"); + + interp.tick()?; // call sub + assert_eq!(interp.ip.0, 5); + assert_eq!(interp.stack.0.len(), 2); + + interp.tick()?; // - + assert_eq!(interp.ip.0, 6); + assert_eq!(interp.stack.0.len(), 1); + + interp.tick()?; // ret + assert_eq!(interp.ip.0, 3); + assert_eq!(interp.stack.0.len(), 1); + assert_eq!(interp.stack.0[0], -1, "result of sub word"); + + interp.tick()?; // 2 + assert_eq!(interp.ip.0, 4); + assert_eq!(interp.stack.0.len(), 2); + assert_eq!(interp.stack.0[1], -2, "post sub arg"); + + interp.tick()?; // - + assert_eq!(interp.ip.0, 5); + assert_eq!(interp.stack.0.len(), 1); + assert_eq!(interp.stack.0[0], -3, "add opcode result"); + + Ok(()) + } +} diff --git a/src/forth/mod.rs b/src/forth/mod.rs new file mode 100644 index 0000000..fdda066 --- /dev/null +++ b/src/forth/mod.rs @@ -0,0 +1,2 @@ +mod interp; +mod parser; diff --git a/src/forth/parser.rs b/src/forth/parser.rs new file mode 100644 index 0000000..26a5686 --- /dev/null +++ b/src/forth/parser.rs @@ -0,0 +1,183 @@ +use super::interp::{ByteCode, OpCode, WordCatalog}; + +use std::collections::HashMap; +use std::iter::{Enumerate, Iterator}; +use std::str::Chars; + +#[derive(Debug)] +pub enum ParseError { + EOF, + NameStackEmpty, + MissingQuote, + UnknownWord(String), +} +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::EOF => write!(f, "premature end-of-file"), + Self::NameStackEmpty => write!(f, "name stack empty"), + Self::MissingQuote => write!(f, "missing ending quote"), + Self::UnknownWord(word) => write!(f, "unknown word: {}", word), + } + } +} +impl std::error::Error for ParseError {} + +type ParseResult = Result; + +// a list of all bytecode defs, with the main routine at ‘0’. +type ParserWordList = Vec; + +#[derive(Debug)] +pub struct Parser<'a> { + text: &'a str, + enumerator: Enumerate>, + wordlist: ParserWordList, + wordalog: WordCatalog<'a>, + namestack: Vec<&'a str>, +} + +impl<'a> Parser<'a> { + pub fn new(text: &'a str) -> Self { + let enumerator = text.chars().enumerate(); + let mut wordlist = vec![]; + // main routine is always the first entry. + wordlist.push(ByteCode(vec![])); + Self { + text, + enumerator, + wordlist, + wordalog: WordCatalog(HashMap::new()), + namestack: vec![], + } + } + + // pull the next, whitespace-delimited word off the input stream. + fn next_word(&mut self) -> Option<(&'a str, usize, usize)> { + let mut start = 0; + let chars = + self.enumerator.by_ref() + .skip_while(|(i, c)| { + start = *i; + return c.is_whitespace() + }); + for (i, c) in chars { + if c.is_whitespace() { + let end = i; + let word = self.text.get(start..end).unwrap(); + return Some((word, start, end)) + } + } + None + } + + // push `op` onto the currently building bytecode, as determined + // by the top of the `namestack`. + fn bc_push(&mut self, op: OpCode) -> ParseResult<()> { + let word_index = match self.namestack.last() { + None => &0, + Some(name) => self.wordalog.0.get(name).ok_or(ParseError::NameStackEmpty)?, + }; + self.wordlist[*word_index].0.push(op); + Ok(()) + } + + fn parse(&mut self) -> ParseResult<()> { + while let Some((word, _start, end)) = self.next_word() { + if let Ok(i) = word.parse::() { + self.bc_push(OpCode::Num(i))?; + } else if let Some(i) = self.wordalog.0.get(word) { + self.bc_push(OpCode::WordI(*i))?; + } else { + match word { + r#"s""# => { + let (s_end, _) = + self.enumerator + .find(|(_i, c)| return *c == '"') + .ok_or(ParseError::MissingQuote)?; + self.bc_push(OpCode::Str(end+1, s_end))?; + }, + ":" => { + let (name, _, _) = self.next_word().ok_or(ParseError::EOF)?; + self.namestack.push(name); + self.wordalog.0.insert(name, self.wordlist.len()); + self.wordlist.push(ByteCode(vec![])); + }, + ";" => { + self.bc_push(OpCode::Ret)?; + self.namestack.pop(); + }, + "+" => self.bc_push(OpCode::Add)?, + "-" => self.bc_push(OpCode::Sub)?, + other => return Err(ParseError::UnknownWord(String::from(other))), + } + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::super::interp::OpCode; + + fn parser_for(text: &str) -> Parser { + let mut p = Parser::new(text); + p.parse().expect("badparse"); + p + } + + #[test] + fn literal_num() { + let p = parser_for("1\n"); + let main = &p.wordlist[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Num(1)); + } + + #[test] + fn literal_string() { + let p = parser_for(r#"s" hello there""#); + let main = &p.wordlist[0]; + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Str(3, 14)); + } + + #[test] + fn add_opcode() { + let p = parser_for("+\n"); + let main = &p.wordlist[0]; + eprintln!("main {:?}", main); + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Add); + } + + #[test] + fn sub_opcode() { + let p = parser_for("-\n"); + let main = &p.wordlist[0]; + eprintln!("main {:?}", main); + assert_eq!(main.len(), 1); + assert_eq!(main[0], OpCode::Sub); + } + + #[test] + fn def_word() { + let p = parser_for(": add2 2 + ; 3 add2\n"); + let main = &p.wordlist[0]; + eprintln!("main {:?}", main); + + let add2_index = p.wordalog.0.get("add2").expect("add2 has entry in wordlist"); + let add2 = &p.wordlist[*add2_index]; + eprintln!("add2 {:?}", add2); + + assert_eq!(main.len(), 2); + assert_eq!(main[0], OpCode::Num(3)); + assert_eq!(main[1], OpCode::WordI(*add2_index)); + assert_eq!(add2.len(), 3); + assert_eq!(add2[0], OpCode::Num(2)); + assert_eq!(add2[1], OpCode::Add); + assert_eq!(add2[2], OpCode::Ret); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..da1469b --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,24 @@ +use wasm_bindgen::prelude::*; + +mod forth; + +#[wasm_bindgen] +extern { + #[wasm_bindgen(js_namespace = console)] + fn debug(s: &str); +} + +#[wasm_bindgen(start)] +pub fn run() -> Result<(), JsValue> { + let window = web_sys::window().expect("no global `window` exists"); + let document = window.document().expect("should have `document` on window"); + let body = document.body().expect("document should have `body`"); + + let val = document.create_element("p")?; + val.set_text_content(Some("hi there")); + body.append_child(&val)?; + + debug("done in rust's run"); + + Ok(()) +} -- cgit v1.3