From 4994785233d63a60242d9dd7968e524894089a3d Mon Sep 17 00:00:00 2001 From: minneelyyyy Date: Mon, 14 Oct 2024 16:13:22 -0400 Subject: [PATCH] initial commit --- .gitignore | 1 + Cargo.lock | 54 ++++++++ Cargo.toml | 7 ++ examples/repl.rs | 16 +++ src/executor.rs | 314 +++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 75 +++++++++++ src/parser.rs | 283 ++++++++++++++++++++++++++++++++++++++++++ src/tokenizer.rs | 201 ++++++++++++++++++++++++++++++ 8 files changed, 951 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 examples/repl.rs create mode 100644 src/executor.rs create mode 100644 src/lib.rs create mode 100644 src/parser.rs create mode 100644 src/tokenizer.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..39a8d2a --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,54 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "lamm" +version = "0.1.0" +dependencies = [ + "regex", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "regex" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..b4aa823 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "lamm" +version = "0.1.0" +edition = "2021" + +[dependencies] +regex = "1.11" \ No newline at end of file diff --git a/examples/repl.rs b/examples/repl.rs new file mode 100644 index 0000000..834c6c9 --- /dev/null +++ b/examples/repl.rs @@ -0,0 +1,16 @@ + +use lamm::{Tokenizer, Parser, Executor}; +use std::io::{self, BufReader}; + +fn main() { + let tokenizer = Tokenizer::new(BufReader::new(io::stdin())); + let parser = Parser::new(tokenizer); + let values = Executor::new(parser); + + for value in values { + match value { + Ok(v) => println!("{v}"), + Err(e) => eprintln!("{e}"), + } + } +} \ No newline at end of file diff --git a/src/executor.rs b/src/executor.rs new file mode 100644 index 0000000..77f1bb2 --- /dev/null +++ b/src/executor.rs @@ -0,0 +1,314 @@ +use super::{Value, Type, FunctionDeclaration}; +use super::parser::{ParseTree, ParseError}; +use super::tokenizer::Op; + +use std::collections::HashMap; +use std::borrow::Cow; +use std::fmt::Display; +use std::error::Error; + +#[derive(Debug)] +pub enum RuntimeError { + ParseError(ParseError), + NoOverloadForTypes(String, Vec), + ImmutableError(String), + VariableUndefined(String), + FunctionUndeclared(String), + FunctionUndefined(String), + NotAVariable(String), + ParseFail(String, Type), +} + +impl Display for RuntimeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::ParseError(e) => write!(f, "{e}"), + Self::NoOverloadForTypes(op, values) + => write!(f, "No overload of `{op}` exists for the operands `[{}]`", + values.iter().map(|x| format!("{}({x})", x.get_type())).collect::>().join(", ")), + Self::ImmutableError(ident) => write!(f, "`{ident}` already exists and cannot be redefined"), + Self::VariableUndefined(ident) => write!(f, "variable `{ident}` was not defined"), + Self::FunctionUndeclared(ident) => write!(f, "function `{ident}` was not declared"), + Self::FunctionUndefined(ident) => write!(f, "function `{ident}` was not defined"), + Self::NotAVariable(ident) => write!(f, "`{ident}` is a function but was attempted to be used like a variable"), + Self::ParseFail(s, t) => write!(f, "`\"{s}\"` couldn't be parsed into {}", t), + } + } +} + +impl Error for RuntimeError {} + +#[derive(Clone, Debug)] +enum Evaluation { + // at this point, it's type is set in stone + Computed(Value), + + // at this point, it's type is unknown, and may contradict a variable's type + // or not match the expected value of the expression, this is a runtime error + Uncomputed(Box), +} + +#[derive(Clone, Debug)] +struct Function { + decl: FunctionDeclaration, + body: Option>, +} + +#[derive(Clone, Debug)] +enum Object { + Variable(Evaluation), + Function(Function), +} + +pub struct Executor>> { + exprs: I, + globals: HashMap, +} + +impl>> Executor { + pub fn new(exprs: I) -> Self { + Self { + exprs, + globals: HashMap::new(), + } + } + + fn exec( + &mut self, + tree: ParseTree, + locals: &mut Cow>) -> Result + { + match tree { + ParseTree::Add(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x + y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x + y as f64)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Float(x as f64 + y)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x + y)), + (Value::String(x), Value::String(y)) => Ok(Value::String(format!("{x}{y}"))), + (x, y) => Err(RuntimeError::NoOverloadForTypes("+".into(), vec![x, y])) + }, + ParseTree::Sub(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x - y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x - y as f64)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Float(x as f64 - y)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x - y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes("-".into(), vec![x, y])) + }, + ParseTree::Mul(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x * y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x * y as f64)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Float(x as f64 * y)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x * y)), + (Value::String(x), Value::Int(y)) => Ok(Value::String(x.repeat(y as usize))), + (x, y) => Err(RuntimeError::NoOverloadForTypes("*".into(), vec![x, y])) + }, + ParseTree::Div(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x / y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x / y as f64)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Float(x as f64 / y)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x / y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes("*".into(), vec![x, y])) + }, + ParseTree::Exp(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x.pow(y as u32))), + (Value::Int(x), Value::Float(y)) => Ok(Value::Float((x as f64).powf(y))), + (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x.powf(y as f64))), + (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x.powf(y))), + (x, y) => Err(RuntimeError::NoOverloadForTypes("**".into(), vec![x, y])), + }, + ParseTree::Mod(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x % y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x % y as f64)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Float(x as f64 % y)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x % y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes("%".into(), vec![x, y])), + }, + ParseTree::EqualTo(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x == y)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Bool(x as f64 == y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x == y as f64)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x == y)), + (Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(x == y)), + (Value::String(x), Value::String(y)) => Ok(Value::Bool(x == y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes("==".into(), vec![x, y])), + }, + ParseTree::GreaterThan(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x > y)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Bool(x as f64 > y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x > y as f64)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x > y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes(">".into(), vec![x, y])), + }, + ParseTree::GreaterThanOrEqualTo(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x >= y)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Bool(x as f64 >= y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x >= y as f64)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x >= y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes(">=".into(), vec![x, y])), + }, + ParseTree::LessThan(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x < y)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Bool((x as f64) < y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x < y as f64)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x < y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes("<".into(), vec![x, y])), + }, + ParseTree::LessThanOrEqualTo(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x <= y)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Bool(x as f64 <= y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x <= y as f64)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x <= y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes("<=".into(), vec![x, y])), + }, + ParseTree::Not(x) => match self.exec(*x, locals)? { + Value::Bool(x) => Ok(Value::Bool(!x)), + x => Err(RuntimeError::NoOverloadForTypes("not".into(), vec![x])) + }, + ParseTree::Equ(ident, body, scope) => { + if self.globals.contains_key(&ident) || locals.contains_key(&ident) { + Err(RuntimeError::ImmutableError(ident.clone())) + } else { + let locals = locals.to_mut(); + let value = self.exec(*body, &mut Cow::Borrowed(&locals))?; + locals.insert(ident.clone(), Object::Variable(Evaluation::Computed(value))); + + self.exec(*scope, &mut Cow::Borrowed(&locals)) + } + }, + ParseTree::LazyEqu(ident, body, scope) => { + if self.globals.contains_key(&ident) || locals.contains_key(&ident) { + Err(RuntimeError::ImmutableError(ident.clone())) + } else { + let locals = locals.to_mut(); + locals.insert(ident.clone(), Object::Variable(Evaluation::Uncomputed(body))); + + self.exec(*scope, &mut Cow::Borrowed(&locals)) + } + }, + ParseTree::GlobalEqu(ident, body) => todo!(), + ParseTree::LazyGlobalEqu(ident, body) => todo!(), + ParseTree::FunctionDefinition(ident, args, r, body, scope) => { + let existing = locals.get(&ident).or(self.globals.get(&ident)).cloned(); + + match existing { + Some(_) => Err(RuntimeError::ImmutableError(ident.clone())), + None => { + let locals = locals.to_mut(); + + locals.insert(ident.clone(), Object::Function(Function { + decl: FunctionDeclaration { name: ident.clone(), r, args }, + body: Some(body) + })); + + self.exec(*scope, &mut Cow::Borrowed(&locals)) + } + } + }, + ParseTree::Compose(x, y) => { + self.exec(*x, locals)?; + self.exec(*y, locals) + }, + ParseTree::Id(x) => self.exec(*x, locals), + ParseTree::If(cond, body) => if match self.exec(*cond, locals)? { + Value::Float(f) => f != 0.0, + Value::Int(i) => i != 0, + Value::Bool(b) => b, + Value::String(s) => !s.is_empty(), + Value::Nil => false, + } { + self.exec(*body, locals) + } else { + Ok(Value::Nil) + }, + ParseTree::IfElse(cond, istrue, isfalse) => if match self.exec(*cond, locals)? { + Value::Float(f) => f != 0.0, + Value::Int(i) => i != 0, + Value::Bool(b) => b, + Value::String(s) => !s.is_empty(), + Value::Nil => false, + } { + self.exec(*istrue, locals) + } else { + self.exec(*isfalse, locals) + }, + ParseTree::FunctionCall(ident, args) => { + let obj = locals.get(&ident).or(self.globals.get(&ident)).cloned(); + + if let Some(Object::Function(f)) = obj { + let locals = locals.to_mut(); + let body = f.body.ok_or(RuntimeError::FunctionUndefined(ident.clone()))?; + + for ((name, _), tree) in std::iter::zip(f.decl.args, args) { + locals.insert(name.clone(), Object::Variable(Evaluation::Computed(self.exec(tree, &mut Cow::Borrowed(locals))?))); + } + + self.exec(*body, &mut Cow::Borrowed(&locals)) + } else { + Err(RuntimeError::FunctionUndeclared(ident.clone())) + } + }, + ParseTree::Variable(ident) => { + let locals = locals.to_mut(); + + let obj = locals.get(&ident).or(self.globals.get(&ident)).cloned(); + + if let Some(Object::Variable(eval)) = obj { + match eval { + Evaluation::Computed(v) => Ok(v), + Evaluation::Uncomputed(tree) => { + let v = self.exec(*tree, &mut Cow::Borrowed(&locals))?; + locals.insert(ident, Object::Variable(Evaluation::Computed(v.clone()))); + + Ok(v) + } + } + } else { + Err(RuntimeError::VariableUndefined(ident.clone())) + } + }, + ParseTree::Constant(value) => Ok(value), + ParseTree::ToInt(x) => match self.exec(*x, locals)? { + Value::Int(x) => Ok(Value::Int(x)), + Value::Float(x) => Ok(Value::Int(x as i64)), + Value::Bool(x) => Ok(Value::Int(if x { 1 } else { 0 })), + Value::String(x) => { + let r: i64 = x.parse().map_err(|_| RuntimeError::ParseFail(x.clone(), Type::Int))?; + Ok(Value::Int(r)) + } + x => Err(RuntimeError::NoOverloadForTypes("int".into(), vec![x])), + }, + ParseTree::ToFloat(x) => match self.exec(*x, locals)? { + Value::Int(x) => Ok(Value::Float(x as f64)), + Value::Float(x) => Ok(Value::Float(x)), + Value::Bool(x) => Ok(Value::Float(if x { 1.0 } else { 0.0 })), + Value::String(x) => { + let r: f64 = x.parse().map_err(|_| RuntimeError::ParseFail(x.clone(), Type::Int))?; + Ok(Value::Float(r)) + } + x => Err(RuntimeError::NoOverloadForTypes("float".into(), vec![x])), + }, + ParseTree::ToBool(x) => match self.exec(*x, locals)? { + Value::Int(x) => Ok(Value::Bool(x != 0)), + Value::Float(x) => Ok(Value::Bool(x != 0.0)), + Value::Bool(x) => Ok(Value::Bool(x)), + Value::String(x) => Ok(Value::Bool(!x.is_empty())), + x => Err(RuntimeError::NoOverloadForTypes("bool".into(), vec![x])), + }, + ParseTree::ToString(x) => Ok(Value::String(format!("{}", self.exec(*x, locals)?))), + } + } +} + +impl>> Iterator for Executor { + type Item = Result; + + fn next(&mut self) -> Option { + let expr = self.exprs.next(); + + match expr { + Some(Ok(expr)) => Some(self.exec(expr, &mut Cow::Borrowed(&HashMap::new()))), + Some(Err(e)) => Some(Err(RuntimeError::ParseError(e))), + None => None, + } + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..446b5ce --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,75 @@ + +mod tokenizer; +mod parser; +mod executor; + +pub use tokenizer::{Tokenizer, TokenizeError}; +pub use parser::{Parser, ParseError}; +pub use executor::{Executor, RuntimeError}; + +use std::fmt::Display; + +#[derive(Clone, Debug)] +pub(crate) enum Type { + Float, + Int, + Bool, + String, + Nil, + Any, + Function(Box, Vec), +} + +impl Display for Type { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", match self { + Self::Float => "Float".into(), + Self::Int => "Int".into(), + Self::Bool => "Bool".into(), + Self::String => "String".into(), + Self::Nil => "Nil".into(), + Self::Any => "Any".into(), + Self::Function(r, _) => format!("Function -> {}", *r) + }) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Value { + Float(f64), + Int(i64), + Bool(bool), + String(String), + Nil, +} + +impl Value { + pub(crate) fn get_type(&self) -> Type { + match self { + Self::Float(_) => Type::Float, + Self::Int(_) => Type::Int, + Self::Bool(_) => Type::Bool, + Self::String(_) => Type::String, + Self::Nil => Type::Nil, + } + } +} + +impl Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Float(x) => write!(f, "{x}"), + Self::Int(x) => write!(f, "{x}"), + Self::Bool(x) => write!(f, "{}", if *x { "true" } else { "false" }), + Self::String(x) => write!(f, "{x}"), + Self::Nil => write!(f, "nil"), + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct FunctionDeclaration { + name: String, + r: Type, + args: Vec<(String, Type)>, +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..6c35c53 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,283 @@ +use super::{Type, Value, FunctionDeclaration}; +use super::tokenizer::{Token, TokenizeError, Op}; + +use std::error; +use std::collections::HashMap; +use std::fmt::Display; +use std::borrow::Cow; + +#[derive(Debug)] +pub enum ParseError { + NoInput, + UnexpectedEndInput, + IdentifierUndefined(String), + InvalidIdentifier, + FunctionUndefined(String), + VariableUndefined(String), + TokenizeError(TokenizeError), +} + +impl Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ParseError::UnexpectedEndInput => write!(f, "Input ended unexpectedly"), + ParseError::IdentifierUndefined(name) => write!(f, "Undefined variable `{name}`"), + ParseError::InvalidIdentifier => write!(f, "Invalid identifier"), + ParseError::FunctionUndefined(name) => write!(f, "Undefined function `{name}`"), + ParseError::VariableUndefined(name) => write!(f, "Undefined variable `{name}`"), + ParseError::NoInput => write!(f, "No input given"), + ParseError::TokenizeError(e) => write!(f, "{e}"), + } + } +} + +impl error::Error for ParseError {} + +#[derive(Clone, Debug)] +pub enum ParseTree { + // Mathematical Operators + Add(Box, Box), + Sub(Box, Box), + Mul(Box, Box), + Div(Box, Box), + Exp(Box, Box), + Mod(Box, Box), + + // Boolean Operations + EqualTo(Box, Box), + GreaterThan(Box, Box), + GreaterThanOrEqualTo(Box, Box), + LessThan(Box, Box), + LessThanOrEqualTo(Box, Box), + Not(Box), + + // Defining Objects + Equ(String, Box, Box), + LazyEqu(String, Box, Box), + GlobalEqu(String, Box), + LazyGlobalEqu(String, Box), + FunctionDefinition(String, Vec<(String, Type)>, Type, Box, Box), + + // Functional Operations + Compose(Box, Box), + Id(Box), + + // Branching + If(Box, Box), + IfElse(Box, Box, Box), + + // Evaluations + FunctionCall(String, Vec), + Variable(String), + Constant(Value), + + // Type Casts + ToInt(Box), + ToFloat(Box), + ToBool(Box), + ToString(Box), +} + +impl ParseTree { + fn parse( + tokens: &mut I, + globals: &HashMap, + locals: &mut Cow>) -> Result + where + I: Iterator>, + { + match tokens.next() { + Some(Ok(token)) => { + match token { + Token::Constant(c) => Ok(Self::Constant(c)), + Token::Identifier(ident) => { + // If it is found to be a function, get its argument count. + // During parsing, we only keep track of function definitions + // so that we know how many arguments it takes + if let Some(decl) = locals.clone().get(&ident).or(globals.clone().get(&ident)) { + let args = decl.args.iter() + .map(|_| ParseTree::parse(tokens, globals, locals)).collect::, ParseError>>()?; + + Ok(ParseTree::FunctionCall(ident.clone(), args)) + } else { + Ok(ParseTree::Variable(ident.clone())) + } + } + Token::Operator(op) => { + match op { + Op::Add => Ok(ParseTree::Add( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::Sub => Ok(ParseTree::Sub( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::Mul => Ok(ParseTree::Mul( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::Div => Ok(ParseTree::Div( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::Exp => Ok(ParseTree::Exp( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::Mod => Ok(ParseTree::Mod( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::Equ | Op::LazyEqu | Op::GlobalEqu | Op::LazyGlobalEqu => { + let token = tokens.next() + .ok_or(ParseError::UnexpectedEndInput)? + .map_err(|e| ParseError::TokenizeError(e))?; + + if let Token::Identifier(ident) = token { + match op { + Op::Equ => Ok(ParseTree::Equ(ident.clone(), + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::LazyEqu => Ok(ParseTree::LazyEqu(ident.clone(), + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::GlobalEqu => Ok(ParseTree::GlobalEqu(ident.clone(), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::LazyGlobalEqu => Ok(ParseTree::LazyGlobalEqu(ident.clone(), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + _ => panic!("Operator literally changed under your nose"), + } + } else { + Err(ParseError::InvalidIdentifier) + } + } + Op::FunctionDeclare(nargs) => { + let token = tokens.next() + .ok_or(ParseError::UnexpectedEndInput)? + .map_err(|e| ParseError::TokenizeError(e))?; + + if let Token::Identifier(ident) = token { + let args: Vec<(String, Type)> = tokens.take(nargs) + .map(|token| match token { + Ok(Token::Identifier(ident)) => Ok((ident, Type::Any)), + Ok(_) => Err(ParseError::InvalidIdentifier), + Err(e) => Err(ParseError::TokenizeError(e)), + }) + .collect::, ParseError>>()?; + + let locals = locals.to_mut(); + + locals.insert(ident.clone(), FunctionDeclaration { + name: ident.clone(), + r: Type::Any, + args: args.clone(), + }); + + Ok(ParseTree::FunctionDefinition( + ident, + args, + Type::Any, + Box::new(ParseTree::parse(tokens, globals, &mut Cow::Borrowed(&*locals))?), + Box::new(ParseTree::parse(tokens, globals, &mut Cow::Borrowed(&*locals))?))) + } else { + Err(ParseError::InvalidIdentifier) + } + } + Op::Compose => Ok(ParseTree::Compose( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::Id => Ok(ParseTree::Id( + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::If => Ok(ParseTree::If( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::IfElse => Ok(ParseTree::IfElse( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::EqualTo => Ok(ParseTree::EqualTo( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::GreaterThan => Ok(ParseTree::GreaterThan( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::LessThan => Ok(ParseTree::LessThan( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::GreaterThanOrEqualTo => Ok(ParseTree::GreaterThanOrEqualTo( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::LessThanOrEqualTo => Ok(ParseTree::LessThanOrEqualTo( + Box::new(ParseTree::parse(tokens, globals, locals)?), + Box::new(ParseTree::parse(tokens, globals, locals)?) + )), + Op::Not => Ok(ParseTree::Not(Box::new(ParseTree::parse(tokens, globals, locals)?))), + Op::IntCast => Ok(ParseTree::ToInt(Box::new(ParseTree::parse(tokens, globals, locals)?))), + Op::FloatCast => Ok(ParseTree::ToFloat(Box::new(ParseTree::parse(tokens, globals, locals)?))), + Op::BoolCast => Ok(ParseTree::ToBool(Box::new(ParseTree::parse(tokens, globals, locals)?))), + Op::StringCast => Ok(ParseTree::ToString(Box::new(ParseTree::parse(tokens, globals, locals)?))), + } + } + } + }, + Some(Err(e)) => Err(ParseError::TokenizeError(e)), + None => Err(ParseError::NoInput), + } + } +} + +pub struct Parser>> { + tokens: I, + + // These are used to keep track of functions in the current context + // by the parser. otherwise the parser would have no way to tell + // if the program `* a b 12` is supposed to be ((* a b) (12)) or (* (a b) 12) + globals: HashMap, + locals: HashMap, +} + +impl>> Parser { + pub fn new(tokens: I) -> Self { + Self { + tokens, + globals: HashMap::new(), + locals: HashMap::new() + } + } + + pub fn globals(mut self, g: HashMap) -> Self { + self.globals = g; + self + } +} + +impl>> Iterator for Parser { + type Item = Result; + + fn next(&mut self) -> Option { + let tree = ParseTree::parse(&mut self.tokens, &self.globals, &mut Cow::Borrowed(&self.locals)); + + match tree { + Ok(tree) => Some(Ok(tree)), + Err(e) => { + match e { + ParseError::NoInput => None, + _ => Some(Err(e)), + } + } + } + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs new file mode 100644 index 0000000..9e59f5e --- /dev/null +++ b/src/tokenizer.rs @@ -0,0 +1,201 @@ +use std::{error, io}; +use std::collections::VecDeque; + +use super::Value; +use std::fmt::{Display, Formatter}; +use std::io::{BufRead, Cursor}; + +#[derive(Debug)] +pub enum TokenizeError { + InvalidDynamicOperator(String), + InvalidNumericConstant(String), + InvalidIdentifier(String), + UnableToMatchToken(String), + IO(io::Error), +} + +impl Display for TokenizeError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + TokenizeError::InvalidDynamicOperator(op) + => write!(f, "invalid dynamic operator `{op}`"), + TokenizeError::InvalidNumericConstant(t) + => write!(f, "invalid numeric constant `{t}`"), + TokenizeError::InvalidIdentifier(ident) + => write!(f, "invalid identifier `{ident}`"), + TokenizeError::UnableToMatchToken(token) + => write!(f, "the token `{token}` was unable to be parsed"), + TokenizeError::IO(io) => write!(f, "{io}") + } + } +} + +impl error::Error for TokenizeError {} + +#[derive(Debug, Clone)] +pub enum Op { + Add, + Sub, + Mul, + Div, + Exp, + Equ, + Mod, + LazyEqu, + GlobalEqu, + LazyGlobalEqu, + FunctionDeclare(usize), + Compose, + Id, + If, + IfElse, + GreaterThan, + LessThan, + EqualTo, + GreaterThanOrEqualTo, + LessThanOrEqualTo, + Not, + IntCast, + FloatCast, + BoolCast, + StringCast, +} + +#[derive(Debug, Clone)] +pub enum Token { + Identifier(String), + Operator(Op), + Constant(Value), +} + +fn get_dot_count(s: &str) -> Option { + s.chars().fold(Some(0), |acc, c| + match c { + ':' => acc.map(|acc| acc + 2), + '.' => acc.map(|acc| acc + 1), + _ => None, + } + ) +} + +fn valid_identifier(c: char) -> bool { + c.is_alphanumeric() || c == '\'' || c == '_' +} + +impl Token { + fn parse(s: &str) -> Result { + let string = regex::Regex::new(r#"".+""#).expect("LOL!"); + + if string.is_match(s) { + return Ok(Token::Constant(Value::String(s[1..s.len() - 1].to_string()))); + } + + match s { + // First check if s is an operator + "+" => Ok(Token::Operator(Op::Add)), + "-" => Ok(Token::Operator(Op::Sub)), + "*" => Ok(Token::Operator(Op::Mul)), + "/" => Ok(Token::Operator(Op::Div)), + "**" => Ok(Token::Operator(Op::Exp)), + "%" => Ok(Token::Operator(Op::Mod)), + "=" => Ok(Token::Operator(Op::Equ)), + "." => Ok(Token::Operator(Op::LazyEqu)), + "=>" => Ok(Token::Operator(Op::GlobalEqu)), + ".>" => Ok(Token::Operator(Op::LazyGlobalEqu)), + "~" => Ok(Token::Operator(Op::Compose)), + "," => Ok(Token::Operator(Op::Id)), + "?" => Ok(Token::Operator(Op::If)), + "??" => Ok(Token::Operator(Op::IfElse)), + ">" => Ok(Token::Operator(Op::GreaterThan)), + "<" => Ok(Token::Operator(Op::LessThan)), + ">=" => Ok(Token::Operator(Op::GreaterThanOrEqualTo)), + "<=" => Ok(Token::Operator(Op::LessThanOrEqualTo)), + "==" => Ok(Token::Operator(Op::EqualTo)), + + // then some keywords + "true" => Ok(Token::Constant(Value::Bool(true))), + "false" => Ok(Token::Constant(Value::Bool(false))), + "not" => Ok(Token::Operator(Op::Not)), + + // Type casting + "int" => Ok(Token::Operator(Op::IntCast)), + "float" => Ok(Token::Operator(Op::FloatCast)), + "bool" => Ok(Token::Operator(Op::BoolCast)), + "string" => Ok(Token::Operator(Op::StringCast)), + + // then variable length keywords, constants, and identifiers + _ => { + if s.starts_with(':') { + Ok(Token::Operator(Op::FunctionDeclare( + get_dot_count(s).map(|x| x - 1).ok_or(TokenizeError::InvalidDynamicOperator(s.to_string()))? + ))) + } else if s.starts_with(|c| char::is_digit(c, 10) || c == '-') { + if let Ok(int) = s.parse::() { + Ok(Token::Constant(Value::Int(int))) + } else if let Ok(float) = s.parse::() { + Ok(Token::Constant(Value::Float(float))) + } else { + Err(TokenizeError::InvalidNumericConstant(s.to_string())) + } + } else if s.starts_with(valid_identifier) { + let valid = s.chars().skip(1).all(valid_identifier); + valid.then(|| Token::Identifier(s.to_string())).ok_or(TokenizeError::InvalidIdentifier(s.to_string())) + } else { + Err(TokenizeError::UnableToMatchToken(s.to_string())) + } + } + } + } +} + +pub struct Tokenizer { + reader: R, + tokens: VecDeque, +} + +impl Tokenizer { + pub fn new(reader: R) -> Self { + Self { + reader, + tokens: VecDeque::new(), + } + } +} + +impl std::str::FromStr for Tokenizer> { + type Err = (); + + fn from_str(s: &str) -> Result { + let cursor = Cursor::new(s.to_string()); + Ok(Tokenizer::new(cursor)) + } +} + +impl std::iter::Iterator for Tokenizer { + type Item = Result; + + fn next(&mut self) -> Option { + if let Some(token) = self.tokens.pop_front() { + return Some(Ok(token)); + } + + let mut input = String::new(); + + match self.reader.read_to_string(&mut input) { + Ok(0) => None, + Err(e) => Some(Err(TokenizeError::IO(e))), + _ => { + let re = regex::Regex::new(r#"(\-?[a-zA-Z0-9\.'_]+)|[`~!@#\$%\^&\*\(\)\+-=\[\]\{\}\\|;:,<\.>/\?]+|("[^"]+")"#).expect("This wont fail promise :3"); + + for token in re.find_iter(input.as_str()).map(|mat| mat.as_str()).map(Token::parse) { + match token { + Ok(token) => self.tokens.push_back(token), + Err(e) => return Some(Err(e)), + } + } + + self.tokens.pop_front().map(|x| Ok(x)) + } + } + } +} \ No newline at end of file