diff --git a/Cargo.toml b/Cargo.toml index cb41074..517aea6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "lamm" -version = "0.1.0" +version = "0.2.0" edition = "2021" license = "MIT" description = "a simple, functional paradigm programming language which uses Polish notation" repository = "https://github.com/minneelyyyy/lamm" [dependencies] -regex = "1.11" \ No newline at end of file +regex = "1.11" diff --git a/README.md b/README.md new file mode 100644 index 0000000..d6be399 --- /dev/null +++ b/README.md @@ -0,0 +1,206 @@ +# Lamm + +A small, functional programming language. + +# Syntax + +Lamm uses [Polish Notation](https://en.wikipedia.org/wiki/Polish_notation). +That means that instead of writing `5 + 6`, you would instead write `+ 5 6`. + +Since we're here, we might as well cover some operators. + +## Math Operators + +``` ++ 5 6 # => 11 +- 5 6 # => -1 +* 5 6 # => 30 +/ 5 6 # => 0 (integer division) +** 5 6 # => 15625 +% 6 5 # => 1 +``` + +There is no order of operations to worry about, you essentially write your code in the order it should be evaluated in. + +## Variables + +Variables are **constant** in Lamm, there is no mutation. Here are some examples of defining variables. + +``` += pi 3.1415926 # immediately evaluated +. sqrt2 ** 2 0.5 # lazy evaluated +``` + +Variables are **scoped** in Lamm, meaning they only exist in the single expression that they are defined for. That means that the following code is an **error**. + +``` += pi 3.1415926 += r 16 +* pi ** r 2 # OK += deg 60 +* deg / pi 360.0 # ERROR: `pi` was undefined +``` + +## Scope + +Scope in Lamm consists of a single expression, such as `sqrt + ** a 2 ** b 2`. So then, what do I do when I need a variable for more than a single expression? There are multiple solutions depending on your needs. + +### Multi-Statement Expression + +You can create a multi-statement expression using either `()` syntax or the `~` operator, which `()` is simple syntactic sugar for. In these, only the value of the last expression is returned, the rest get ignored. This is the perfect place to put stateful function calls. + +``` +. x 12 ( + print + "My favorite number is " string x + print + "Auf Wiedersehen! Ich werde aber meine Lieblingsnummer " + string x " vermissen." +) +``` + +### Global Scope + +You can introduce a variable to global scope using the `export` builtin function. + +``` +# A very useful constant += pi 3.1415926 +export ["pi"] + +# Some more useful constants += e 2.71828 += phi 1.6180339887 +export ["e" "phi"] +``` + +## Functions + +All functions in Lamm are **scoped** similarly to variables. Functions are declared using the `:` operator, which can be extended with more `:` and `.` characters to let Lamm know how many arguments the function takes. + +``` +: inc x + x 1 + inc 24 # => 25 + +:. pythag a b sqrt + ** a 2.0 ** b 2.0 + pythag 3 4 # => 5 + +:::::. ten'args a b c d e f g h i j + [a b c d e f g h i j] +``` + +The parameter types and return type of functions can be declared using a special syntax unique to function and lambda definitions. + +``` +# Takes an x of `Any` type +: inc x + x 1 + inc 12 # => 13 + +# Takes an x of `Int` and returns an `Int` +: inc ?. x Int -> Int + x 1 + inc 9 # => 10 +``` + +The `?.` operator is unique to function declarations and is used to specify the type of an argument. There are also first class functions, here is the syntax for it. + +``` +# Applies a function to any value +:. apply : f x f x + apply 'sqrt 9 # => 3 + +# Applies a function f which maps an Int to an Int to x +:. apply'int ?: f Int -> Int ?. x Int -> Int f x + apply'int 'sqrt 36 # => 6 +``` + +The `:` operator inside of a function prototype tells Lamm that this argument must be a function where every argument and it's return type are all `Any`. This means that `: f` is essentially syntactic sugar for `?: f Any -> Any`. Also, in order to pass a function to a function, you must use the `'` operator, which tells Lamm not to call the function. + +And off course, `:` and `?:` in function prototypes can also be extended depending on the number of arguments the function must take. + +## Branching + +Lamm has the following boolean expressions + +``` +== 1 2 # => false +!= 1 2 # => true +> 1 2 # => false +< 1 2 # => true +>= 1 2 # => false +<= 1 2 # => true +!true # => false +true && false # => false +true || false # => true +``` + +These can be used inside of `?` (if) and `??` (if-else) statements. + +``` +. n 12 + ?? < 12 10 + print "n is less than 10" + print "n is greater than 10" +``` + +An `?` if statement where it's condition is false simply returns `nil`, as do `print` and other functions without a return value. `?` is mostly useful inside of blocks. + +``` +: times'twelve ?. n Int -> Int ( + ? == n 0 + print "n is 0" + + * n 12 +) +``` + +## Arrays + +Lamm offers a few fundamental array operations. + +``` ++ 1 [2 3 4] # => [1 2 3 4] ++ [1 2 3] 4 # => [1 2 3 4] ++ [1 2] [3 4] # => [1 2 3 4] +head [1 2 3 4] # => 1 +tail [1 2 3 4] # => [2 3 4] +init [1 2 3 4] # => [1 2 3] +fini [1 2 3 4] # => 4 +bool [1 2 3 4] # => true +bool empty # => false +``` + +Using these, you can build a lot of fundamental functional paradigm functions. + +``` +:. map ?: f Any -> Any ?. x [Any] -> [Any] + ?? bool x + + f head x map 'f tail x + empty +map ;y ** y 2 [1 2 3 4 5 6 7 8 9 10] # => [1 4 9 16 25 36 49 64 81 100] + +:: iterate : f i count -> [Any] + ?? > count 0 + + i iterate 'f f i - count 1 + empty +iterate ;x + 1 x 0 10 # => [0 1 2 3 4 5 6 7 8 9] + +:. take ?. n Int ?. x [Any] -> [Any] + ?? > n 0 + + head x take - n 1 tail x + empty +take 3 [1 2 3 4 5] # => [1 2 3] + +:. take'while : pred Any -> Bool ?. x [Any] -> [Any] + ?? && bool x pred head x + + head x take'while 'pred tail x + empty +take'while ;y < y 10 [1 3 5 7 9 11 13 15 16] # => [1 3 5 7 9] +``` + +## Lambdas + +Lambdas are created using the `;` operator, and they are always passed as a value, so no `'` is necessary. + + +``` +map ;x * x 12 [1 2 3] # => [12 24 36] +``` + +They follow the same prototype syntax as regular functions, with the notable lack of an identifier. \ No newline at end of file diff --git a/src/executor.rs b/src/executor.rs index 8b5b429..ef6fc50 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,10 +1,12 @@ -use super::{Value, Type, FunctionDeclaration}; +use super::{Value, Type, Object}; use super::parser::{ParseTree, ParseError}; use std::collections::HashMap; -use std::borrow::Cow; use std::fmt::Display; use std::error::Error; +use std::io; +use std::sync::{Arc, Mutex}; +use std::cell::RefCell; #[derive(Debug)] pub enum RuntimeError { @@ -16,14 +18,17 @@ pub enum RuntimeError { FunctionUndefined(String), NotAVariable(String), ParseFail(String, Type), + TypeError(Type, Type), + EmptyArray, + IO(io::Error), } impl Display for RuntimeError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::ParseError(e) => write!(f, "{e}"), + Self::ParseError(e) => write!(f, "Parser Error: {e}"), Self::NoOverloadForTypes(op, values) - => write!(f, "No overload of `{op}` exists for the operands `[{}]`", + => write!(f, "No overload of `{op}` exists for the operands `{}`", values.iter().map(|x| format!("{}({x})", x.get_type())).collect::>().join(", ")), Self::ImmutableError(ident) => write!(f, "`{ident}` already exists and cannot be redefined"), Self::VariableUndefined(ident) => write!(f, "variable `{ident}` was not defined"), @@ -31,70 +36,137 @@ impl Display for RuntimeError { Self::FunctionUndefined(ident) => write!(f, "function `{ident}` was not defined"), Self::NotAVariable(ident) => write!(f, "`{ident}` is a function but was attempted to be used like a variable"), Self::ParseFail(s, t) => write!(f, "`\"{s}\"` couldn't be parsed into {}", t), + Self::IO(e) => write!(f, "{e}"), + Self::TypeError(left, right) => write!(f, "expected type `{left}` but got type `{right}`"), + Self::EmptyArray => write!(f, "attempt to access element from an empty array"), } } } impl Error for RuntimeError {} -#[derive(Clone, Debug)] -enum Evaluation { - // at this point, it's type is set in stone - Computed(Value), - - // at this point, it's type is unknown, and may contradict a variable's type - // or not match the expected value of the expression, this is a runtime error - Uncomputed(Box), -} - -#[derive(Clone, Debug)] -struct Function { - decl: FunctionDeclaration, - body: Option>, -} - -#[derive(Clone, Debug)] -enum Object { - Variable(Evaluation), - Function(Function), -} - /// Executes an input of ParseTrees -pub struct Executor>> { - exprs: I, - globals: HashMap, +pub struct Executor<'a, I> +where + I: Iterator> +{ + exprs: &'a mut I, + globals: &'a mut HashMap>>, + locals: HashMap>>, } -impl>> Executor { - pub fn new(exprs: I) -> Self { +impl<'a, I> Executor<'a, I> +where + I: Iterator>, +{ + pub fn new(exprs: &'a mut I, globals: &'a mut HashMap>>) -> Self { Self { exprs, - globals: HashMap::new(), + globals, + locals: HashMap::new(), } } - fn exec( - &mut self, - tree: ParseTree, - locals: &mut Cow>) -> Result - { - match tree { - ParseTree::Add(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + pub fn _add_global(self, k: String, v: Arc>) -> Self { + self.globals.insert(k, v); + self + } + + pub fn locals(mut self, locals: HashMap>>) -> Self { + self.locals = locals; + self + } + + pub fn add_local(mut self, k: String, v: Arc>) -> Self { + self.locals.insert(k, v); + self + } + + fn _get_object(&self, ident: &String) -> Result<&Arc>, RuntimeError> { + self.locals.get(ident).or(self.globals.get(ident)) + .ok_or(RuntimeError::VariableUndefined(ident.clone())) + } + + fn get_object_mut(&mut self, ident: &String) -> Result<&mut Arc>, RuntimeError> { + self.locals.get_mut(ident).or(self.globals.get_mut(ident)) + .ok_or(RuntimeError::VariableUndefined(ident.clone())) + } + + fn variable_exists(&self, ident: &String) -> bool { + self.locals.contains_key(ident) || self.globals.contains_key(ident) + } + + fn eval(obj: &mut Arc>) -> Result { + let mut guard = obj.lock().unwrap(); + + let v = guard.eval()?; + + Ok(v) + } + + fn obj_locals(obj: &Arc>) -> HashMap>> { + let guard = obj.lock().unwrap(); + + let locals = guard.locals(); + + locals + } + + fn obj_globals(obj: &Arc>) -> HashMap>> { + let guard = obj.lock().unwrap(); + + let locals = guard.globals(); + + locals + } + + pub fn exec(&mut self, tree: Box) -> Result { + match *tree { + ParseTree::Add(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x + y)), (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x + y as f64)), (Value::Int(x), Value::Float(y)) => Ok(Value::Float(x as f64 + y)), (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x + y)), (Value::String(x), Value::String(y)) => Ok(Value::String(format!("{x}{y}"))), + (Value::Array(xtype, x), Value::Array(ytype, y)) => { + if xtype != ytype { + return Err(RuntimeError::TypeError(xtype, ytype)); + } + + Ok(Value::Array(xtype, [x, y].concat())) + }, + (Value::Array(t, x), y) => { + let ytype = y.get_type(); + + if t != ytype { + return Err(RuntimeError::TypeError(t, ytype)); + } + + // NOTE: use y's type instead of the arrays type. + // an `empty` array has Any type, but any value will have a fixed type. + // this converts the empty array into a typed array. + Ok(Value::Array(ytype, [x, vec![y]].concat())) + }, + (x, Value::Array(t, y)) => { + let xtype = x.get_type(); + + if t != xtype { + return Err(RuntimeError::TypeError(t, xtype)); + } + + // NOTE: read above + Ok(Value::Array(xtype, [vec![x], y].concat())) + }, (x, y) => Err(RuntimeError::NoOverloadForTypes("+".into(), vec![x, y])) }, - ParseTree::Sub(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + ParseTree::Sub(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x - y)), (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x - y as f64)), (Value::Int(x), Value::Float(y)) => Ok(Value::Float(x as f64 - y)), (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x - y)), (x, y) => Err(RuntimeError::NoOverloadForTypes("-".into(), vec![x, y])) }, - ParseTree::Mul(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + ParseTree::Mul(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x * y)), (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x * y as f64)), (Value::Int(x), Value::Float(y)) => Ok(Value::Float(x as f64 * y)), @@ -102,28 +174,28 @@ impl>> Executor { (Value::String(x), Value::Int(y)) => Ok(Value::String(x.repeat(y as usize))), (x, y) => Err(RuntimeError::NoOverloadForTypes("*".into(), vec![x, y])) }, - ParseTree::Div(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + ParseTree::Div(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x / y)), (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x / y as f64)), (Value::Int(x), Value::Float(y)) => Ok(Value::Float(x as f64 / y)), (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x / y)), (x, y) => Err(RuntimeError::NoOverloadForTypes("*".into(), vec![x, y])) }, - ParseTree::Exp(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + ParseTree::Exp(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x.pow(y as u32))), (Value::Int(x), Value::Float(y)) => Ok(Value::Float((x as f64).powf(y))), (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x.powf(y as f64))), (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x.powf(y))), (x, y) => Err(RuntimeError::NoOverloadForTypes("**".into(), vec![x, y])), }, - ParseTree::Mod(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + ParseTree::Mod(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Int(x % y)), (Value::Float(x), Value::Int(y)) => Ok(Value::Float(x % y as f64)), (Value::Int(x), Value::Float(y)) => Ok(Value::Float(x as f64 % y)), (Value::Float(x), Value::Float(y)) => Ok(Value::Float(x % y)), (x, y) => Err(RuntimeError::NoOverloadForTypes("%".into(), vec![x, y])), }, - ParseTree::EqualTo(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + ParseTree::EqualTo(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x == y)), (Value::Int(x), Value::Float(y)) => Ok(Value::Bool(x as f64 == y)), (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x == y as f64)), @@ -132,140 +204,143 @@ impl>> Executor { (Value::String(x), Value::String(y)) => Ok(Value::Bool(x == y)), (x, y) => Err(RuntimeError::NoOverloadForTypes("==".into(), vec![x, y])), }, - ParseTree::GreaterThan(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + ParseTree::NotEqualTo(x, y) => match (self.exec(x)?, self.exec(y)?) { + (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x != y)), + (Value::Int(x), Value::Float(y)) => Ok(Value::Bool(x as f64 != y)), + (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x != y as f64)), + (Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x != y)), + (Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(x != y)), + (Value::String(x), Value::String(y)) => Ok(Value::Bool(x != y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes("!=".into(), vec![x, y])), + }, + ParseTree::GreaterThan(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x > y)), (Value::Int(x), Value::Float(y)) => Ok(Value::Bool(x as f64 > y)), (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x > y as f64)), (Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x > y)), (x, y) => Err(RuntimeError::NoOverloadForTypes(">".into(), vec![x, y])), }, - ParseTree::GreaterThanOrEqualTo(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + ParseTree::GreaterThanOrEqualTo(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x >= y)), (Value::Int(x), Value::Float(y)) => Ok(Value::Bool(x as f64 >= y)), (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x >= y as f64)), (Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x >= y)), (x, y) => Err(RuntimeError::NoOverloadForTypes(">=".into(), vec![x, y])), }, - ParseTree::LessThan(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + ParseTree::LessThan(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x < y)), (Value::Int(x), Value::Float(y)) => Ok(Value::Bool((x as f64) < y)), (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x < y as f64)), (Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x < y)), (x, y) => Err(RuntimeError::NoOverloadForTypes("<".into(), vec![x, y])), }, - ParseTree::LessThanOrEqualTo(x, y) => match (self.exec(*x, locals)?, self.exec(*y, locals)?) { + ParseTree::LessThanOrEqualTo(x, y) => match (self.exec(x)?, self.exec(y)?) { (Value::Int(x), Value::Int(y)) => Ok(Value::Bool(x <= y)), (Value::Int(x), Value::Float(y)) => Ok(Value::Bool(x as f64 <= y)), (Value::Float(x), Value::Int(y)) => Ok(Value::Bool(x <= y as f64)), (Value::Float(x), Value::Float(y)) => Ok(Value::Bool(x <= y)), (x, y) => Err(RuntimeError::NoOverloadForTypes("<=".into(), vec![x, y])), }, - ParseTree::Not(x) => match self.exec(*x, locals)? { + ParseTree::Not(x) => match self.exec(x)? { Value::Bool(x) => Ok(Value::Bool(!x)), x => Err(RuntimeError::NoOverloadForTypes("not".into(), vec![x])) }, + ParseTree::And(x, y) => match (self.exec(x)?, self.exec(y)?) { + (Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(x && y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes("&&".into(), vec![x, y])) + }, + ParseTree::Or(x, y) => match (self.exec(x)?, self.exec(y)?) { + (Value::Bool(x), Value::Bool(y)) => Ok(Value::Bool(x || y)), + (x, y) => Err(RuntimeError::NoOverloadForTypes("||".into(), vec![x, y])) + }, ParseTree::Equ(ident, body, scope) => { - if self.globals.contains_key(&ident) || locals.contains_key(&ident) { + if self.variable_exists(&ident) { Err(RuntimeError::ImmutableError(ident.clone())) } else { - let locals = locals.to_mut(); - let value = self.exec(*body, &mut Cow::Borrowed(&locals))?; - locals.insert(ident.clone(), Object::Variable(Evaluation::Computed(value))); + let value = self.exec(body)?; + let g = self.globals.clone(); - self.exec(*scope, &mut Cow::Borrowed(&locals)) + Executor::new(self.exprs, &mut self.globals) + .locals(self.locals.clone()) + .add_local(ident, Arc::new(Mutex::new(Object::value(value, g, self.locals.to_owned())))) + .exec(scope) } }, ParseTree::LazyEqu(ident, body, scope) => { - if self.globals.contains_key(&ident) || locals.contains_key(&ident) { + if self.variable_exists(&ident) { Err(RuntimeError::ImmutableError(ident.clone())) } else { - let locals = locals.to_mut(); - locals.insert(ident.clone(), Object::Variable(Evaluation::Uncomputed(body))); - - self.exec(*scope, &mut Cow::Borrowed(&locals)) + let g = self.globals.clone(); + Executor::new(self.exprs, &mut self.globals) + .locals(self.locals.clone()) + .add_local(ident, Arc::new(Mutex::new(Object::variable(*body, g, self.locals.to_owned())))) + .exec(scope) } }, - ParseTree::FunctionDefinition(ident, args, r, body, scope) => { - let existing = locals.get(&ident).or(self.globals.get(&ident)).cloned(); - - match existing { - Some(_) => Err(RuntimeError::ImmutableError(ident.clone())), - None => { - let locals = locals.to_mut(); - - locals.insert(ident.clone(), Object::Function(Function { - decl: FunctionDeclaration { _name: ident.clone(), _r: r, args }, - body: Some(body) - })); - - self.exec(*scope, &mut Cow::Borrowed(&locals)) - } - } + ParseTree::FunctionDefinition(func, scope) => { + let g = self.globals.clone(); + Executor::new(self.exprs, &mut self.globals) + .locals(self.locals.clone()) + .add_local(func.name().unwrap().to_string(), Arc::new(Mutex::new(Object::function(func, g, self.locals.clone())))) + .exec(scope) }, ParseTree::Compose(x, y) => { - self.exec(*x, locals)?; - self.exec(*y, locals) + self.exec(x)?; + self.exec(y) }, - ParseTree::Id(x) => self.exec(*x, locals), - ParseTree::If(cond, body) => if match self.exec(*cond, locals)? { + ParseTree::Id(x) => self.exec(x), + ParseTree::If(cond, body) => if match self.exec(cond)? { Value::Float(f) => f != 0.0, Value::Int(i) => i != 0, Value::Bool(b) => b, Value::String(s) => !s.is_empty(), + Value::Array(_, vec) => !vec.is_empty(), Value::Nil => false, + x => return Err(RuntimeError::NoOverloadForTypes("?".into(), vec![x])), } { - self.exec(*body, locals) + self.exec(body) } else { Ok(Value::Nil) }, - ParseTree::IfElse(cond, istrue, isfalse) => if match self.exec(*cond, locals)? { + ParseTree::IfElse(cond, istrue, isfalse) => if match self.exec(cond)? { Value::Float(f) => f != 0.0, Value::Int(i) => i != 0, Value::Bool(b) => b, Value::String(s) => !s.is_empty(), + Value::Array(_, vec) => !vec.is_empty(), Value::Nil => false, + x => return Err(RuntimeError::NoOverloadForTypes("??".into(), vec![x])), } { - self.exec(*istrue, locals) + self.exec(istrue) } else { - self.exec(*isfalse, locals) + self.exec(isfalse) }, ParseTree::FunctionCall(ident, args) => { - let obj = locals.get(&ident).or(self.globals.get(&ident)).cloned(); + let obj = self.get_object_mut(&ident)?; + let globals = Self::obj_globals(obj); + let locals = Self::obj_locals(obj); + let v = Self::eval(obj)?; - if let Some(Object::Function(f)) = obj { - let locals = locals.to_mut(); - let body = f.body.ok_or(RuntimeError::FunctionUndefined(ident.clone()))?; + match v { + Value::Function(mut f) => { + let args = args.into_iter() + .map(|x| Object::variable(x, self.globals.clone(), self.locals.clone())) + .collect(); - for ((name, _), tree) in std::iter::zip(f.decl.args, args) { - locals.insert(name.clone(), Object::Variable(Evaluation::Computed(self.exec(tree, &mut Cow::Borrowed(locals))?))); - } - - self.exec(*body, &mut Cow::Borrowed(&locals)) - } else { - Err(RuntimeError::FunctionUndeclared(ident.clone())) + f.call(globals, locals, args) + }, + _ => Err(RuntimeError::FunctionUndefined(ident.clone())) } }, ParseTree::Variable(ident) => { - let locals = locals.to_mut(); + let obj = self.get_object_mut(&ident)?; - let obj = locals.get(&ident).or(self.globals.get(&ident)).cloned(); + let v = obj.lock().unwrap().eval()?; - if let Some(Object::Variable(eval)) = obj { - match eval { - Evaluation::Computed(v) => Ok(v), - Evaluation::Uncomputed(tree) => { - let v = self.exec(*tree, &mut Cow::Borrowed(&locals))?; - locals.insert(ident, Object::Variable(Evaluation::Computed(v.clone()))); - - Ok(v) - } - } - } else { - Err(RuntimeError::VariableUndefined(ident.clone())) - } + Ok(v) }, ParseTree::Constant(value) => Ok(value), - ParseTree::ToInt(x) => match self.exec(*x, locals)? { + ParseTree::IntCast(x) => match self.exec(x)? { Value::Int(x) => Ok(Value::Int(x)), Value::Float(x) => Ok(Value::Int(x as i64)), Value::Bool(x) => Ok(Value::Int(if x { 1 } else { 0 })), @@ -275,7 +350,7 @@ impl>> Executor { } x => Err(RuntimeError::NoOverloadForTypes("int".into(), vec![x])), }, - ParseTree::ToFloat(x) => match self.exec(*x, locals)? { + ParseTree::FloatCast(x) => match self.exec(x)? { Value::Int(x) => Ok(Value::Float(x as f64)), Value::Float(x) => Ok(Value::Float(x)), Value::Bool(x) => Ok(Value::Float(if x { 1.0 } else { 0.0 })), @@ -285,26 +360,70 @@ impl>> Executor { } x => Err(RuntimeError::NoOverloadForTypes("float".into(), vec![x])), }, - ParseTree::ToBool(x) => match self.exec(*x, locals)? { + ParseTree::BoolCast(x) => match self.exec(x)? { Value::Int(x) => Ok(Value::Bool(x != 0)), Value::Float(x) => Ok(Value::Bool(x != 0.0)), Value::Bool(x) => Ok(Value::Bool(x)), Value::String(x) => Ok(Value::Bool(!x.is_empty())), + Value::Array(_, vec) => Ok(Value::Bool(!vec.is_empty())), x => Err(RuntimeError::NoOverloadForTypes("bool".into(), vec![x])), }, - ParseTree::ToString(x) => Ok(Value::String(format!("{}", self.exec(*x, locals)?))), + ParseTree::StringCast(x) => Ok(Value::String(format!("{}", self.exec(x)?))), + ParseTree::Print(x) => match self.exec(x)? { + Value::String(s) => { + println!("{s}"); + Ok(Value::Nil) + } + x => { + println!("{x}"); + Ok(Value::Nil) + } + } + ParseTree::LambdaDefinition(func) => Ok(Value::Function(func)), + ParseTree::NonCall(name) => { + let obj = self.get_object_mut(&name)?; + + let v = obj.lock().unwrap().eval()?; + + Ok(v) + } + ParseTree::Head(x) => match self.exec(x)? { + Value::Array(_, x) => Ok(x.first().ok_or(RuntimeError::EmptyArray)?.clone()), + t => Err(RuntimeError::NoOverloadForTypes("head".into(), vec![t])) + }, + ParseTree::Tail(x) => match self.exec(x)? { + Value::Array(t, x) => Ok(Value::Array(t, if x.len() > 0 { x[1..].to_vec() } else { vec![] })), + t => Err(RuntimeError::NoOverloadForTypes("tail".into(), vec![t])) + }, + ParseTree::Init(x) => match self.exec(x)? { + Value::Array(t, x) => Ok(Value::Array(t, if x.len() > 0 { x[..x.len() - 1].to_vec() } else { vec![] })), + t => Err(RuntimeError::NoOverloadForTypes("init".into(), vec![t])) + }, + ParseTree::Fini(x) => match self.exec(x)? { + Value::Array(_, x) => Ok(x.last().ok_or(RuntimeError::EmptyArray)?.clone()), + t => Err(RuntimeError::NoOverloadForTypes("fini".into(), vec![t])) + }, + ParseTree::Nop => Ok(Value::Nil), + ParseTree::Export(names) => { + for name in names { + let obj = self.locals.remove(&name).ok_or(RuntimeError::VariableUndefined(name.clone()))?; + self.globals.insert(name, obj); + } + + Ok(Value::Nil) + } } } } -impl>> Iterator for Executor { +impl<'a, I: Iterator>> Iterator for Executor<'a, I> { type Item = Result; fn next(&mut self) -> Option { let expr = self.exprs.next(); match expr { - Some(Ok(expr)) => Some(self.exec(expr, &mut Cow::Borrowed(&HashMap::new()))), + Some(Ok(expr)) => Some(self.exec(Box::new(expr))), Some(Err(e)) => Some(Err(RuntimeError::ParseError(e))), None => None, } diff --git a/src/function.rs b/src/function.rs new file mode 100644 index 0000000..49db30a --- /dev/null +++ b/src/function.rs @@ -0,0 +1,81 @@ +use std::cell::RefCell; +use crate::parser::ParseTree; +use crate::executor::{Executor, RuntimeError}; +use crate::{Type, Object, Value}; + +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::sync::{Arc, Mutex}; + +#[derive(Clone, Debug, PartialEq)] +pub struct FunctionType(pub Box, pub Vec); + +impl Display for FunctionType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Function({}, {})", self.0, self.1.iter().map(|x| format!("{x}")).collect::>().join(", ")) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Function { + pub(crate) name: Option, + t: FunctionType, + arg_names: Vec, + body: Box, +} + +impl Function { + pub(crate) fn lambda(t: FunctionType, arg_names: Vec, body: Box) -> Self { + Self { + name: None, + t, + arg_names, + body + } + } + + pub(crate) fn named(name: &str, t: FunctionType, arg_names: Vec, body: Box) -> Self { + Self { + name: Some(name.to_string()), + t, + arg_names, + body + } + } + + pub(crate) fn name(&self) -> Option<&str> { + self.name.as_ref().map(|x| x.as_str()) + } + + pub(crate) fn get_type(&self) -> FunctionType { + self.t.clone() + } + + pub(crate) fn call(&mut self, + mut globals: HashMap>>, + locals: HashMap>>, + args: Vec) -> Result + { + let mut tree = vec![Ok(*self.body.clone())].into_iter(); + let g = globals.clone(); + + let mut exec = Executor::new(&mut tree, &mut globals) + .locals(locals.clone()); + + for (obj, name) in std::iter::zip(args.into_iter(), self.arg_names.clone().into_iter()) { + exec = exec.add_local(name.clone(), Arc::new(Mutex::new(obj))); + } + + if let Some(name) = self.name().map(|x| x.to_string()) { + exec = exec.add_local(name, Arc::new(Mutex::new(Object::function(self.clone(), g, locals)))); + } + + exec.next().unwrap() + } +} + +impl Display for Function { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.t) + } +} diff --git a/src/lib.rs b/src/lib.rs index 2173d49..de35b96 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,20 @@ - mod tokenizer; mod parser; mod executor; +mod function; +use executor::{Executor, RuntimeError}; +use parser::{ParseTree, Parser}; +use tokenizer::Tokenizer; +use function::{FunctionType, Function}; + +use std::collections::HashMap; use std::fmt::Display; use std::io::BufRead; +use std::fmt; +use std::iter::Peekable; +use std::sync::{Arc, Mutex}; +use std::cell::RefCell; #[derive(Clone, Debug)] pub enum Type { @@ -12,21 +22,35 @@ pub enum Type { Int, Bool, String, + Array(Box), + Function(FunctionType), Nil, Any, - _Function(Box, Vec), +} + +impl PartialEq for Type { + fn eq(&self, other: &Type) -> bool { + match (self, other) { + (Self::Any, _) => true, + (_, Self::Any) => true, + (Self::Array(l0), Self::Array(r0)) => l0 == r0, + (Self::Function(l0), Self::Function(r0)) => l0 == r0, + _ => core::mem::discriminant(self) == core::mem::discriminant(other), + } + } } impl Display for Type { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", match self { Self::Float => "Float".into(), Self::Int => "Int".into(), Self::Bool => "Bool".into(), Self::String => "String".into(), + Self::Array(t) => format!("[{t}]"), + Self::Function(r) => format!("{r}"), Self::Nil => "Nil".into(), Self::Any => "Any".into(), - Self::_Function(r, _) => format!("Function -> {}", *r) }) } } @@ -38,6 +62,8 @@ pub enum Value { Int(i64), Bool(bool), String(String), + Array(Type, Vec), + Function(Function), Nil, } @@ -48,7 +74,9 @@ impl Value { Self::Int(_) => Type::Int, Self::Bool(_) => Type::Bool, Self::String(_) => Type::String, + Self::Array(t, _) => Type::Array(Box::new(t.clone())), Self::Nil => Type::Nil, + Self::Function(f) => Type::Function(f.get_type()), } } } @@ -59,19 +87,105 @@ impl Display for Value { Self::Float(x) => write!(f, "{x}"), Self::Int(x) => write!(f, "{x}"), Self::Bool(x) => write!(f, "{}", if *x { "true" } else { "false" }), - Self::String(x) => write!(f, "{x}"), + Self::String(x) => write!(f, "\"{x}\""), + Self::Array(_t, v) => write!(f, "[{}]", v.iter().map(|x| format!("{x}")).collect::>().join(" ")), + Self::Function(func) => write!(f, "{func}"), Self::Nil => write!(f, "nil"), } } } -#[derive(Clone, Debug)] -pub(crate) struct FunctionDeclaration { - _name: String, - _r: Type, - args: Vec<(String, Type)>, +#[derive(Clone, Debug, PartialEq)] +enum Cache { + Cached(Value), + Uncached(ParseTree), } -pub fn evaluate(r: R) -> impl Iterator> { - executor::Executor::new(parser::Parser::new(tokenizer::Tokenizer::new(r))) +#[derive(Clone, Debug)] +struct Object { + locals: HashMap>>, + globals: HashMap>>, + value: Cache, +} + +impl PartialEq for Object { + fn eq(&self, other: &Self) -> bool { + self.value == other.value + } +} + +impl Object { + pub fn variable(tree: ParseTree, globals: HashMap>>, locals: HashMap>>) -> Self { + Self { + locals, + globals, + value: Cache::Uncached(tree), + } + } + + pub fn value(v: Value, globals: HashMap>>, locals: HashMap>>) -> Self { + Self { + locals, + globals, + value: Cache::Cached(v), + } + } + + pub fn function(func: Function, globals: HashMap>>, locals: HashMap>>) -> Self { + Self { + locals, + globals, + value: Cache::Cached(Value::Function(func)), + } + } + + /// evaluate the tree inside of an object if it isn't evaluated yet, returns the value + pub fn eval(&mut self) -> Result { + match self.value.clone() { + Cache::Cached(v) => Ok(v), + Cache::Uncached(tree) => { + let mut tree = vec![Ok(tree)].into_iter(); + + let mut exec = Executor::new(&mut tree, &mut self.globals) + .locals(self.locals.clone()); + + let v = exec.next().unwrap()?; + + self.value = Cache::Cached(v.clone()); + + Ok(v) + } + } + } + + pub fn locals(&self) -> HashMap>> { + self.locals.clone() + } + + pub fn globals(&self) -> HashMap>> { + self.globals.clone() + } +} + +pub struct Runtime<'a, R: BufRead> { + tokenizer: Peekable>, + global_types: HashMap, + globals: HashMap>>, + parser: Option>>, +} + +impl<'a, R: BufRead> Runtime<'a, R> { + pub fn new(reader: R) -> Self { + Self { + tokenizer: Tokenizer::new(reader).peekable(), + global_types: HashMap::new(), + globals: HashMap::new(), + parser: None, + } + } + + pub fn values(&'a mut self) -> impl Iterator> + 'a { + self.parser = Some(Parser::new(&mut self.tokenizer, &mut self.global_types)); + Executor::new(self.parser.as_mut().unwrap(), &mut self.globals) + } } diff --git a/examples/repl.rs b/src/main.rs similarity index 56% rename from examples/repl.rs rename to src/main.rs index e978730..571aa6c 100644 --- a/examples/repl.rs +++ b/src/main.rs @@ -1,7 +1,9 @@ use std::io::{self, BufReader}; fn main() { - for value in lamm::evaluate(BufReader::new(io::stdin())) { + let mut runtime = lamm::Runtime::new(BufReader::new(io::stdin())); + + for value in runtime.values() { match value { Ok(v) => println!("{v}"), Err(e) => eprintln!("{e}"), diff --git a/src/parser.rs b/src/parser.rs index 64285f9..a577859 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,39 +1,45 @@ -use super::{Type, Value, FunctionDeclaration}; + +use crate::executor::Executor; + +use super::{Value, Type, Function, FunctionType}; use super::tokenizer::{Token, TokenizeError, Op}; +use std::borrow::BorrowMut; use std::error; use std::collections::HashMap; use std::fmt::Display; -use std::borrow::Cow; +use std::iter::Peekable; #[derive(Debug)] pub enum ParseError { NoInput, UnexpectedEndInput, IdentifierUndefined(String), - InvalidIdentifier, - FunctionUndefined(String), - VariableUndefined(String), + InvalidIdentifier(Token), + UnmatchedArrayClose, + UnwantedToken(Token), TokenizeError(TokenizeError), + ImmutableError(String), } impl Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { ParseError::UnexpectedEndInput => write!(f, "Input ended unexpectedly"), - ParseError::IdentifierUndefined(name) => write!(f, "Undefined variable `{name}`"), - ParseError::InvalidIdentifier => write!(f, "Invalid identifier"), - ParseError::FunctionUndefined(name) => write!(f, "Undefined function `{name}`"), - ParseError::VariableUndefined(name) => write!(f, "Undefined variable `{name}`"), + ParseError::IdentifierUndefined(name) => write!(f, "Undefined identifier `{name}`"), + ParseError::InvalidIdentifier(t) => write!(f, "Invalid identifier `{t:?}`"), ParseError::NoInput => write!(f, "No input given"), - ParseError::TokenizeError(e) => write!(f, "{e}"), + ParseError::UnmatchedArrayClose => write!(f, "there was an unmatched array closing operator `]`"), + ParseError::TokenizeError(e) => write!(f, "Tokenizer Error: {e}"), + ParseError::ImmutableError(i) => write!(f, "attempt to redeclare {i} met with force"), + ParseError::UnwantedToken(t) => write!(f, "unexpected token {t:?}"), } } } impl error::Error for ParseError {} -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub(crate) enum ParseTree { // Mathematical Operators Add(Box, Box), @@ -45,20 +51,28 @@ pub(crate) enum ParseTree { // Boolean Operations EqualTo(Box, Box), + NotEqualTo(Box, Box), GreaterThan(Box, Box), GreaterThanOrEqualTo(Box, Box), LessThan(Box, Box), LessThanOrEqualTo(Box, Box), Not(Box), + And(Box, Box), + Or(Box, Box), // Defining Objects Equ(String, Box, Box), LazyEqu(String, Box, Box), - FunctionDefinition(String, Vec<(String, Type)>, Type, Box, Box), + FunctionDefinition(Function, Box), + LambdaDefinition(Function), // Functional Operations Compose(Box, Box), Id(Box), + Head(Box), + Tail(Box), + Init(Box), + Fini(Box), // Branching If(Box, Box), @@ -68,195 +82,430 @@ pub(crate) enum ParseTree { FunctionCall(String, Vec), Variable(String), Constant(Value), + NonCall(String), // Type Casts - ToInt(Box), - ToFloat(Box), - ToBool(Box), - ToString(Box), -} + IntCast(Box), + FloatCast(Box), + BoolCast(Box), + StringCast(Box), -impl ParseTree { - fn parse( - tokens: &mut I, - globals: &HashMap, - locals: &mut Cow>) -> Result - where - I: Iterator>, - { - match tokens.next() { - Some(Ok(token)) => { - match token { - Token::Constant(c) => Ok(Self::Constant(c)), - Token::Identifier(ident) => { - // If it is found to be a function, get its argument count. - // During parsing, we only keep track of function definitions - // so that we know how many arguments it takes - if let Some(decl) = locals.clone().get(&ident).or(globals.clone().get(&ident)) { - let args = decl.args.iter() - .map(|_| ParseTree::parse(tokens, globals, locals)).collect::, ParseError>>()?; - - Ok(ParseTree::FunctionCall(ident.clone(), args)) - } else { - Ok(ParseTree::Variable(ident.clone())) - } - } - Token::Operator(op) => { - match op { - Op::Add => Ok(ParseTree::Add( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::Sub => Ok(ParseTree::Sub( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::Mul => Ok(ParseTree::Mul( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::Div => Ok(ParseTree::Div( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::Exp => Ok(ParseTree::Exp( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::Mod => Ok(ParseTree::Mod( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::Equ | Op::LazyEqu => { - let token = tokens.next() - .ok_or(ParseError::UnexpectedEndInput)? - .map_err(|e| ParseError::TokenizeError(e))?; - - if let Token::Identifier(ident) = token { - match op { - Op::Equ => Ok(ParseTree::Equ(ident.clone(), - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::LazyEqu => Ok(ParseTree::LazyEqu(ident.clone(), - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - _ => panic!("Operator literally changed under your nose"), - } - } else { - Err(ParseError::InvalidIdentifier) - } - } - Op::FunctionDeclare(nargs) => { - let token = tokens.next() - .ok_or(ParseError::UnexpectedEndInput)? - .map_err(|e| ParseError::TokenizeError(e))?; - - if let Token::Identifier(ident) = token { - let args: Vec<(String, Type)> = tokens.take(nargs) - .map(|token| match token { - Ok(Token::Identifier(ident)) => Ok((ident, Type::Any)), - Ok(_) => Err(ParseError::InvalidIdentifier), - Err(e) => Err(ParseError::TokenizeError(e)), - }) - .collect::, ParseError>>()?; - - let locals = locals.to_mut(); - - locals.insert(ident.clone(), FunctionDeclaration { - _name: ident.clone(), - _r: Type::Any, - args: args.clone(), - }); - - Ok(ParseTree::FunctionDefinition( - ident, - args, - Type::Any, - Box::new(ParseTree::parse(tokens, globals, &mut Cow::Borrowed(&*locals))?), - Box::new(ParseTree::parse(tokens, globals, &mut Cow::Borrowed(&*locals))?))) - } else { - Err(ParseError::InvalidIdentifier) - } - } - Op::Compose => Ok(ParseTree::Compose( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::Id => Ok(ParseTree::Id( - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::If => Ok(ParseTree::If( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::IfElse => Ok(ParseTree::IfElse( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::EqualTo => Ok(ParseTree::EqualTo( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::GreaterThan => Ok(ParseTree::GreaterThan( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::LessThan => Ok(ParseTree::LessThan( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::GreaterThanOrEqualTo => Ok(ParseTree::GreaterThanOrEqualTo( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::LessThanOrEqualTo => Ok(ParseTree::LessThanOrEqualTo( - Box::new(ParseTree::parse(tokens, globals, locals)?), - Box::new(ParseTree::parse(tokens, globals, locals)?) - )), - Op::Not => Ok(ParseTree::Not(Box::new(ParseTree::parse(tokens, globals, locals)?))), - Op::IntCast => Ok(ParseTree::ToInt(Box::new(ParseTree::parse(tokens, globals, locals)?))), - Op::FloatCast => Ok(ParseTree::ToFloat(Box::new(ParseTree::parse(tokens, globals, locals)?))), - Op::BoolCast => Ok(ParseTree::ToBool(Box::new(ParseTree::parse(tokens, globals, locals)?))), - Op::StringCast => Ok(ParseTree::ToString(Box::new(ParseTree::parse(tokens, globals, locals)?))), - } - } - } - }, - Some(Err(e)) => Err(ParseError::TokenizeError(e)), - None => Err(ParseError::NoInput), - } - } + // Misc + Print(Box), + Nop, + Export(Vec), } /// Parses input tokens and produces ParseTrees for an Executor -pub(crate) struct Parser>> { - tokens: I, - - // These are used to keep track of functions in the current context - // by the parser. otherwise the parser would have no way to tell - // if the program `* a b 12` is supposed to be ((* a b) (12)) or (* (a b) 12) - globals: HashMap, - locals: HashMap, +pub(crate) struct Parser<'a, I: Iterator>> { + tokens: &'a mut Peekable, + globals: &'a mut HashMap, + locals: HashMap, } -impl>> Parser { - pub fn new(tokens: I) -> Self { +impl<'a, I: Iterator>> Parser<'a, I> { + pub fn new(tokens: &'a mut Peekable, globals: &'a mut HashMap) -> Self { Self { tokens, - globals: HashMap::new(), + globals, locals: HashMap::new() } } + + + pub fn add_global(self, k: String, v: Type) -> Self { + self.globals.insert(k, v); + self + } + + pub fn add_globals>(self, items: Items) -> Self { + items.for_each(|(name, t)| { + self.globals.insert(name, t); + }); + self + } + + pub fn locals(mut self, locals: HashMap) -> Self { + self.locals = locals; + self + } + + pub fn add_local(mut self, k: String, v: Type) -> Self { + self.locals.insert(k, v); + self + } + + pub fn add_locals>(mut self, items: Items) -> Self { + items.for_each(|(name, t)| { + self.locals.insert(name, t); + }); + self + } + + fn get_object_type(&self, ident: &String) -> Result<&Type, ParseError> { + self.locals.get(ident).or(self.globals.get(ident)) + .ok_or(ParseError::IdentifierUndefined(ident.clone())) + } + + fn get_object_types>(&self, items: Names) -> impl Iterator> { + items.map(|x| self.get_object_type(&x)) + } + + fn parse(&mut self) -> Result { + match self.tokens.next().ok_or(ParseError::NoInput)?.map_err(|e| ParseError::TokenizeError(e))? { + Token::Constant(c) => Ok(ParseTree::Constant(c)), + Token::Identifier(ident) => { + match self.get_object_type(&ident)? { + Type::Function(f) => { + let args = f.1.clone().iter() + .map(|_| self.parse()).collect::, ParseError>>()?; + + Ok(ParseTree::FunctionCall(ident, args)) + } + _ => Ok(ParseTree::Variable(ident)), + } + } + Token::Operator(op) => { + match op { + Op::Add => Ok(ParseTree::Add(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::Sub => Ok(ParseTree::Sub(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::Mul => Ok(ParseTree::Mul(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::Div => Ok(ParseTree::Div(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::Exp => Ok(ParseTree::Exp(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::Mod => Ok(ParseTree::Mod(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::Equ | Op::LazyEqu => { + let token = self.tokens.next() + .ok_or(ParseError::UnexpectedEndInput)? + .map_err(|e| ParseError::TokenizeError(e))?; + + let body = Box::new(self.parse()?); + + if let Token::Identifier(ident) = token { + match op { + Op::Equ => Ok(ParseTree::Equ(ident.clone(), + body, + Box::new(Parser::new(self.tokens.by_ref(), self.globals.borrow_mut()) + .locals(self.locals.clone()) + .add_local(ident, Type::Any) + .parse()?)) + ), + Op::LazyEqu => Ok(ParseTree::LazyEqu(ident.clone(), + body, + Box::new(Parser::new(self.tokens.by_ref(), self.globals.borrow_mut()) + .locals(self.locals.clone()) + .add_local(ident, Type::Any) + .parse()?)) + ), + _ => unreachable!(), + } + } else { + Err(ParseError::InvalidIdentifier(token)) + } + } + Op::FunctionDefine(arg_count) => { + let f = self.parse_function(arg_count)?; + + Ok(ParseTree::FunctionDefinition(f.clone(), + Box::new( + Parser::new(self.tokens, self.globals.borrow_mut()) + .locals(self.locals.clone()) + .add_local(f.name().unwrap().to_string(), Type::Function(f.get_type())) + .parse()? + ))) + }, + Op::Compose => Ok(ParseTree::Compose(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::Id => Ok(ParseTree::Id(Box::new(self.parse()?))), + Op::IfElse => Ok(ParseTree::IfElse(Box::new(self.parse()?), Box::new(self.parse()?), Box::new(self.parse()?))), + Op::If => Ok(ParseTree::If(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::EqualTo => Ok(ParseTree::EqualTo(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::GreaterThan => Ok(ParseTree::GreaterThan(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::LessThan => Ok(ParseTree::LessThan(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::GreaterThanOrEqualTo => Ok(ParseTree::GreaterThanOrEqualTo(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::LessThanOrEqualTo => Ok(ParseTree::LessThanOrEqualTo(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::Not => Ok(ParseTree::Not(Box::new(self.parse()?))), + Op::IntCast => Ok(ParseTree::IntCast(Box::new(self.parse()?))), + Op::FloatCast => Ok(ParseTree::FloatCast(Box::new(self.parse()?))), + Op::BoolCast => Ok(ParseTree::BoolCast(Box::new(self.parse()?))), + Op::StringCast => Ok(ParseTree::StringCast(Box::new(self.parse()?))), + Op::Print => Ok(ParseTree::Print(Box::new(self.parse()?))), + Op::OpenArray => { + let mut depth = 1; + + // take tokens until we reach the end of this array + // if we don't collect them here it causes rust to overflow computing the types + let array_tokens = self.tokens.by_ref().take_while(|t| match t { + Ok(Token::Operator(Op::OpenArray)) => { + depth += 1; + true + }, + Ok(Token::Operator(Op::CloseArray)) => { + depth -= 1; + depth > 0 + } + _ => true, + }).collect::, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?; + + let mut array_tokens = array_tokens + .into_iter() + .map(|t| Ok(t)) + .collect::>>() + .into_iter() + .peekable(); + + let trees: Vec = Parser::new(&mut array_tokens, self.globals.borrow_mut()) + .locals(self.locals.to_owned()) + .collect::>()?; + + let tree = trees.into_iter().fold( + ParseTree::Constant(Value::Array(Type::Any, vec![])), + |acc, x| ParseTree::Add(Box::new(acc), Box::new(x.clone())), + ); + + Ok(tree) + } + Op::OpenStatement => { + let mut depth = 1; + + // take tokens until we reach the end of this array + // if we don't collect them here it causes rust to overflow computing the types + let tokens = self.tokens.by_ref().take_while(|t| match t { + Ok(Token::Operator(Op::OpenStatement)) => { + depth += 1; + true + }, + Ok(Token::Operator(Op::CloseStatement)) => { + depth -= 1; + depth > 0 + } + _ => true, + }).collect::, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?; + + let mut tokens = tokens + .into_iter() + .map(|t| Ok(t)) + .collect::>>() + .into_iter() + .peekable(); + + let trees: Vec = Parser::new(&mut tokens, self.globals.borrow_mut()) + .locals(self.locals.to_owned()) + .collect::>()?; + + let tree = trees.into_iter().fold( + ParseTree::Nop, + |acc, x| ParseTree::Compose(Box::new(acc), Box::new(x.clone())), + ); + + Ok(tree) + } + Op::Empty => Ok(ParseTree::Constant(Value::Array(Type::Any, vec![]))), + Op::CloseArray => Err(ParseError::UnmatchedArrayClose), + Op::NotEqualTo => Ok(ParseTree::NotEqualTo(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::And => Ok(ParseTree::And(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::Or => Ok(ParseTree::Or(Box::new(self.parse()?), Box::new(self.parse()?))), + Op::LambdaDefine(arg_count) => { + let f = self.parse_lambda(arg_count)?; + Ok(ParseTree::LambdaDefinition(f)) + } + Op::NonCall => { + let name = Self::get_identifier(self.tokens.next())?; + Ok(ParseTree::NonCall(name)) + }, + Op::Head => Ok(ParseTree::Head(Box::new(self.parse()?))), + Op::Tail => Ok(ParseTree::Tail(Box::new(self.parse()?))), + Op::Init => Ok(ParseTree::Init(Box::new(self.parse()?))), + Op::Fini => Ok(ParseTree::Fini(Box::new(self.parse()?))), + Op::Export => { + let list = self.parse()?; + let mut g = HashMap::new(); + let list = Executor::new(&mut vec![Ok(list)].into_iter(), &mut g).next().unwrap().map_err(|_| ParseError::NoInput)?; + + if let Value::Array(Type::String, items) = list { + let names = items.into_iter().map(|x| match x { + Value::String(s) => s, + _ => unreachable!(), + }); + + for name in names.clone() { + let t = self.locals.remove(&name).ok_or(ParseError::IdentifierUndefined(name.clone()))?; + self.globals.insert(name, t); + } + + Ok(ParseTree::Export(names.collect())) + } else { + Err(ParseError::NoInput) + } + } + op => Err(ParseError::UnwantedToken(Token::Operator(op))), + } + } + t => Err(ParseError::UnwantedToken(t)), + } + } + + fn parse_lambda(&mut self, arg_count: usize) -> Result { + let (t, args) = Self::parse_function_declaration(self.tokens, arg_count)?; + + let mut locals = self.locals.clone(); + + for (name, t) in std::iter::zip(args.iter(), t.1.iter()) { + locals.insert(name.clone(), t.clone()); + } + + Ok(Function::lambda(t, args, Box::new( + Parser::new(self.tokens, &mut self.globals) + .locals(locals).parse()?))) + } + + fn parse_function(&mut self, arg_count: usize) -> Result { + let name = Self::get_identifier(self.tokens.next())?; + let (t, args) = Self::parse_function_declaration(self.tokens, arg_count)?; + + let mut locals = self.locals.clone(); + + for (name, t) in std::iter::zip(args.iter(), t.1.iter()) { + locals.insert(name.clone(), t.clone()); + } + + locals.insert(name.clone(), Type::Function(t.clone())); + + Ok(Function::named(&name, t, args, Box::new( + Parser::new(self.tokens, &mut self.globals) + .locals(locals).parse()?))) + } + + fn parse_function_declaration( + tokens: &mut Peekable, + arg_count: usize) -> Result<(FunctionType, Vec), ParseError> + { + let args: Vec<(Type, String)> = (0..arg_count) + .map(|_| Self::parse_function_declaration_parameter(tokens)) + .collect::>()?; + + let (types, names): (Vec<_>, Vec<_>) = args.into_iter().unzip(); + let ret = if tokens.next_if(|x| matches!(x, Ok(Token::Operator(Op::Arrow)))).is_some() { + Self::parse_type(tokens)? + } else { + Type::Any + }; + + Ok((FunctionType(Box::new(ret), types), names)) + } + + fn parse_function_declaration_parameter( + mut tokens: &mut Peekable) -> Result<(Type, String), ParseError> + { + match tokens.next() { + // untyped variable + Some(Ok(Token::Identifier(x))) => Ok((Type::Any, x)), + + // typed variable + Some(Ok(Token::Operator(Op::TypeDeclaration))) => { + let name = Self::get_identifier(tokens.next())?; + let t = Self::parse_type(&mut tokens)?; + + Ok((t, name)) + } + + // untyped function (all args Any, return type Any) + Some(Ok(Token::Operator(Op::FunctionDefine(n)))) => { + let name = Self::get_identifier(tokens.next())?; + let args = (0..n).map(|_| Type::Any).collect(); + + Ok((Type::Function(FunctionType(Box::new(Type::Any), args)), name)) + } + + // typed function + Some(Ok(Token::Operator(Op::FunctionDeclare(n)))) => { + let name = Self::get_identifier(tokens.next())?; + let args = (0..n).map(|_| Self::parse_type(&mut tokens)).collect::>()?; + let mut ret = Type::Any; + + // this is annoying + // inside the next_if closure, we already can know that its an error + // and return it, but we cannot return out of a closure + if let Some(t) = tokens.next_if(|x| matches!(x, Ok(Token::Operator(Op::Arrow)))) + { + // so we just check for an error here. this is the only reason t exists. + if let Err(e) = t { + return Err(ParseError::TokenizeError(e)); + } + + ret = Self::parse_type(&mut tokens)?; + } + + Ok((Type::Function(FunctionType(Box::new(ret), args)), name)) + } + + Some(Ok(t)) => Err(ParseError::UnwantedToken(t)), + Some(Err(e)) => Err(ParseError::TokenizeError(e)), + None => Err(ParseError::UnexpectedEndInput), + } + } + + // for some dumbass reason, + // this is the only code that breaks if it doesn't take an impl Iterator instead of simply I ... + fn parse_type(tokens: &mut Peekable>>) -> Result { + match tokens.next() { + Some(Ok(Token::Type(t))) => Ok(t), + Some(Ok(Token::Operator(Op::OpenArray))) => { + let mut depth = 1; + + // take tokens until we reach the end of this array + // if we don't collect them here it causes rust to overflow computing the types + let array_tokens = tokens.by_ref().take_while(|t| match t { + Ok(Token::Operator(Op::OpenArray)) => { + depth += 1; + true + }, + Ok(Token::Operator(Op::CloseArray)) => { + depth -= 1; + depth > 0 + } + _ => true, + }).collect::, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?; + + // ... thanks to this conversion here. The compiler complains that the types don't + // match. there is code elsewhere in this codebase that looks exactly like this and + // still simply uses &mut Peekable as the type. I don't understand why this code + // is special, but we have to do horribleness for it to work. + let mut array_tokens = array_tokens + .into_iter() + .map(|t| Ok(t)) + .collect::>>() + .into_iter() + .peekable(); + + let t = match Self::parse_type(&mut array_tokens) { + Ok(t) => t, + Err(ParseError::UnexpectedEndInput) => Type::Any, + Err(e) => return Err(e), + }; + + Ok(Type::Array(Box::new(t))) + }, + Some(Ok(t)) => Err(ParseError::UnwantedToken(t.clone())), + Some(Err(e)) => Err(ParseError::TokenizeError(e)), + None => Err(ParseError::UnexpectedEndInput), + } + } + + fn get_identifier(t: Option>) -> Result { + match t.ok_or(ParseError::UnexpectedEndInput)? + .map_err(|e| ParseError::TokenizeError(e)) + { + Ok(Token::Identifier(ident)) => Ok(ident), + Ok(t) => Err(ParseError::InvalidIdentifier(t)), + Err(e) => Err(e), + } + } } -impl>> Iterator for Parser { +impl<'a, I: Iterator>> Iterator for Parser<'a, I> { type Item = Result; fn next(&mut self) -> Option { - let tree = ParseTree::parse(&mut self.tokens, &self.globals, &mut Cow::Borrowed(&self.locals)); + let tree = self.parse(); match tree { Ok(tree) => Some(Ok(tree)), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1bd01f7..34df5da 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,6 +1,8 @@ use std::iter::Peekable; use std::{error, io}; -use std::collections::VecDeque; +use std::collections::{VecDeque, HashMap}; + +use crate::Type; use super::Value; use std::fmt::{Display, Formatter}; @@ -39,8 +41,8 @@ impl Display for TokenizeError { impl error::Error for TokenizeError {} -#[derive(Debug, Clone)] -pub(crate) enum Op { +#[derive(Debug, Clone, PartialEq)] +pub enum Op { Add, Sub, Mul, @@ -49,7 +51,11 @@ pub(crate) enum Op { Equ, Mod, LazyEqu, + TypeDeclaration, + FunctionDefine(usize), FunctionDeclare(usize), + LambdaDefine(usize), + Arrow, Compose, Id, If, @@ -57,6 +63,7 @@ pub(crate) enum Op { GreaterThan, LessThan, EqualTo, + NotEqualTo, GreaterThanOrEqualTo, LessThanOrEqualTo, Not, @@ -64,85 +71,87 @@ pub(crate) enum Op { FloatCast, BoolCast, StringCast, + Print, + OpenArray, + CloseArray, + OpenStatement, + CloseStatement, + Empty, + And, + Or, + NonCall, + Head, + Tail, + Init, + Fini, + Export, } -#[derive(Debug, Clone)] -pub(crate) enum Token { +#[derive(Debug, Clone, PartialEq)] +pub enum Token { Identifier(String), Operator(Op), Constant(Value), + Type(Type), } -fn get_dot_count(s: &str) -> Option { - s.chars().fold(Some(0), |acc, c| - match c { - ':' => acc.map(|acc| acc + 2), - '.' => acc.map(|acc| acc + 1), - _ => None, - } - ) +fn get_dot_count>(s: &mut Peekable) -> Option { + let mut total = 0; + + while let Some(n) = s.next_if(|&c| c == ':' || c == '.').map(|c| match c { + ':' => 2, + '.' => 1, + _ => 0, + }) { + total += n; + } + + Some(total) } impl Token { /// Parse a single token fn parse(s: &str) -> Result { - let string = regex::Regex::new(r#"".+""#).map_err(|e| TokenizeError::Regex(e))?; let identifier = regex::Regex::new(r#"[A-Za-z_][A-Za-z0-9_']*"#).map_err(|e| TokenizeError::Regex(e))?; let number = regex::Regex::new(r#"([0-9]+\.?[0-9]*)|(\.[0-9])"#).map_err(|e| TokenizeError::Regex(e))?; - if string.is_match(s) { - Ok(Token::Constant(Value::String(s[1..s.len() - 1].to_string()))) - } else if identifier.is_match(s) { - Ok(Token::Identifier(s.to_string())) - } else if number.is_match(s) { - if let Ok(int) = s.parse::() { - Ok(Token::Constant(Value::Int(int))) - } else if let Ok(float) = s.parse::() { - Ok(Token::Constant(Value::Float(float))) - } else { - Err(TokenizeError::InvalidNumericConstant(s.to_string())) - } - } else { - match s { - // First check if s is an operator - "+" => Ok(Token::Operator(Op::Add)), - "-" => Ok(Token::Operator(Op::Sub)), - "*" => Ok(Token::Operator(Op::Mul)), - "/" => Ok(Token::Operator(Op::Div)), - "**" => Ok(Token::Operator(Op::Exp)), - "%" => Ok(Token::Operator(Op::Mod)), - "=" => Ok(Token::Operator(Op::Equ)), - "." => Ok(Token::Operator(Op::LazyEqu)), - "~" => Ok(Token::Operator(Op::Compose)), - "," => Ok(Token::Operator(Op::Id)), - "?" => Ok(Token::Operator(Op::If)), - "??" => Ok(Token::Operator(Op::IfElse)), - ">" => Ok(Token::Operator(Op::GreaterThan)), - "<" => Ok(Token::Operator(Op::LessThan)), - ">=" => Ok(Token::Operator(Op::GreaterThanOrEqualTo)), - "<=" => Ok(Token::Operator(Op::LessThanOrEqualTo)), - "==" => Ok(Token::Operator(Op::EqualTo)), - - // then some keywords - "true" => Ok(Token::Constant(Value::Bool(true))), - "false" => Ok(Token::Constant(Value::Bool(false))), - "not" => Ok(Token::Operator(Op::Not)), - - // Type casting - "int" => Ok(Token::Operator(Op::IntCast)), - "float" => Ok(Token::Operator(Op::FloatCast)), - "bool" => Ok(Token::Operator(Op::BoolCast)), - "string" => Ok(Token::Operator(Op::StringCast)), - - // then variable length keywords - _ => { - if s.starts_with(":") { - Ok(Token::Operator(Op::FunctionDeclare( - get_dot_count(s).map(|x| x - 1).ok_or(TokenizeError::InvalidDynamicOperator(s.to_string()))? - ))) + match s { + // Match keywords first + "true" => Ok(Token::Constant(Value::Bool(true))), + "false" => Ok(Token::Constant(Value::Bool(false))), + "int" => Ok(Token::Operator(Op::IntCast)), + "float" => Ok(Token::Operator(Op::FloatCast)), + "bool" => Ok(Token::Operator(Op::BoolCast)), + "string" => Ok(Token::Operator(Op::StringCast)), + "print" => Ok(Token::Operator(Op::Print)), + "empty" => Ok(Token::Operator(Op::Empty)), + "head" => Ok(Token::Operator(Op::Head)), + "tail" => Ok(Token::Operator(Op::Tail)), + "init" => Ok(Token::Operator(Op::Init)), + "fini" => Ok(Token::Operator(Op::Fini)), + "export" => Ok(Token::Operator(Op::Export)), + + // Types + "Any" => Ok(Token::Type(Type::Any)), + "Int" => Ok(Token::Type(Type::Int)), + "Float" => Ok(Token::Type(Type::Float)), + "Bool" => Ok(Token::Type(Type::Bool)), + "String" => Ok(Token::Type(Type::String)), + + // then identifiers and numbers + _ => { + if identifier.is_match(s) { + Ok(Token::Identifier(s.to_string())) + } else if number.is_match(s) { + if let Ok(int) = s.parse::() { + Ok(Token::Constant(Value::Int(int))) + } else if let Ok(float) = s.parse::() { + Ok(Token::Constant(Value::Float(float))) } else { - Err(TokenizeError::UnableToMatchToken(s.to_string())) + Err(TokenizeError::InvalidNumericConstant(s.to_string())) } + } else { + Err(TokenizeError::UnableToMatchToken(s.to_string())) } } } @@ -165,7 +174,39 @@ impl Tokenizer { /// Tokenizes more input and adds them to the internal queue fn tokenize>(&mut self, mut iter: Peekable) { - const OPERATOR_CHARS: &'static str = "!@$%^&*()-=+[]{}|;:,<.>/?"; + let operators: HashMap<&'static str, Op> = HashMap::from([ + ("+", Op::Add), + ("-", Op::Sub), + ("*", Op::Mul), + ("/", Op::Div), + ("**", Op::Exp), + ("%", Op::Mod), + ("=", Op::Equ), + (".", Op::LazyEqu), + ("?.", Op::TypeDeclaration), + (":", Op::FunctionDefine(1)), + ("?:", Op::FunctionDeclare(1)), + (";", Op::LambdaDefine(1)), + ("->", Op::Arrow), + ("~", Op::Compose), + (",", Op::Id), + ("?", Op::If), + ("??", Op::IfElse), + (">", Op::GreaterThan), + ("<", Op::LessThan), + (">=", Op::GreaterThanOrEqualTo), + ("<=", Op::LessThanOrEqualTo), + ("==", Op::EqualTo), + ("!=", Op::NotEqualTo), + ("[", Op::OpenArray), + ("]", Op::CloseArray), + ("(", Op::OpenStatement), + (")", Op::CloseStatement), + ("!", Op::Not), + ("&&", Op::And), + ("||", Op::Or), + ("'", Op::NonCall), + ]); let c = if let Some(c) = iter.next() { c @@ -173,26 +214,16 @@ impl Tokenizer { return; }; - if c.is_alphanumeric() || c == '.' { + if c.is_alphanumeric() { let mut token = String::from(c); while let Some(c) = iter.next_if(|&c| c.is_alphanumeric() || c == '.' || c == '\'') { token.push(c); } - self.tokens.push_back(Token::parse(&token)); - self.tokenize(iter) - } else if OPERATOR_CHARS.contains(c) { - let mut token = String::from(c); - - while let Some(c) = iter.next_if(|&c| OPERATOR_CHARS.contains(c)) { - token.push(c); - } - self.tokens.push_back(Token::parse(&token)); self.tokenize(iter) } else if c == '#' { - // consume comments let _: String = iter.by_ref().take_while(|&c| c != '\n').collect(); } else if c == '\"' { let mut token = String::new(); @@ -221,11 +252,136 @@ impl Tokenizer { } self.tokens.push_back(Ok(Token::Constant(Value::String(token)))); + self.tokenize(iter) + } else if operators.keys().any(|x| x.starts_with(c)) { + let mut token = String::from(c); + + loop { + // get a list of all tokens this current token could possibly be + let possible: HashMap<&'static str, Op> = operators + .clone().into_iter() + .filter(|(key, _)| key.starts_with(&token)) + .collect(); + + // checks if a character is "expected", aka based on how many chars + // we have eaten so far, which characters out of the current nominees + // are expected in the next position + let is_expected = |c: &char| + possible.iter().any(|(op, _)| match op.chars().nth(token.len()) { + Some(i) => *c == i, + None => false, + }); + + match possible.len() { + 1 => { + // if the current operator exists in possible, we push it + // if not, we need to make sure that the next characters + // we grab *actually* match the last operator + if let Some(op) = possible.get(token.as_str()) { + self.tokens.push_back(Ok(Token::Operator(match op { + // special handling for "dynamic" operators + Op::FunctionDefine(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + Op::FunctionDefine(n + count) + } + Op::FunctionDeclare(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + Op::FunctionDeclare(n + count) + } + Op::LambdaDefine(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + Op::LambdaDefine(n + count) + } + op => op.clone(), + }))); + + break; + } else { + let next = match iter.next_if(is_expected) { + Some(c) => c, + None => { + self.tokens.push_back(Err(TokenizeError::UnableToMatchToken(format!("{token}")))); + return; + } + }; + + token.push(next); + } + } + 0 => unreachable!(), + _ => { + let next = match iter.next_if(is_expected) { + Some(c) => c, + None => { + // at this point, token must be in the hashmap possible, otherwise it wouldnt have any matches + self.tokens.push_back(Ok(Token::Operator(match possible.get(token.as_str()).unwrap() { + // special handling for "dynamic" operators + Op::FunctionDefine(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + + Op::FunctionDefine(n + count) + } + Op::FunctionDeclare(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + Op::FunctionDeclare(n + count) + } + Op::LambdaDefine(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + Op::LambdaDefine(n + count) + } + op => op.clone(), + }))); + break; + } + }; + + token.push(next); + } + } + } + self.tokenize(iter) } else if c.is_whitespace() { self.tokenize(iter) } else { self.tokens.push_back(Err(TokenizeError::InvalidCharacter(c))); + return; } } } @@ -262,16 +418,29 @@ impl std::iter::Iterator for Tokenizer { #[cfg(test)] mod tests { - use super::*; use std::str::FromStr; + use crate::parser::Parser; + use super::*; #[test] fn tokenizer() { - let program = ": function x ** x 2 function 1200"; + let program = ": length ?. x [] -> Int ?? x + 1 length tail x 0 length [ 1 2 3 ]"; - let tok = Tokenizer::from_str(program).unwrap(); - let tokens: Vec = tok.collect::>().expect("tokenizer error"); + let tokens: Vec = Tokenizer::from_str(program).unwrap().collect::>().unwrap(); - println!("{tokens:?}"); + println!("{tokens:#?}"); } -} + + #[test] + fn a() { + let program = ": length ?. x [] -> Int ?? x + 1 length tail x 0 length [ 1 2 3 ]"; + + let mut tokenizer = Tokenizer::from_str(program).unwrap().peekable(); + + let mut globals = HashMap::new(); + let mut parser = Parser::new(&mut tokenizer, &mut globals); + + let tree = parser.next(); + println!("{tree:#?}"); + } +} \ No newline at end of file