diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..22ef09a --- /dev/null +++ b/src/error.rs @@ -0,0 +1,81 @@ + +use std::error; +use std::fmt; +use std::ops::Range; + +#[derive(Debug, Clone)] +pub struct Error { + message: String, + note: Option, + + file: Option, + code: Option, + location: Option<(usize, Range)>, +} + +impl Error { + pub(crate) fn new(message: String) -> Self { + Self { + message, + note: None, + file: None, + code: None, + location: None + } + } + + pub(crate) fn note(mut self, note: String) -> Self { + self.note = Some(note); + self + } + + pub(crate) fn file(mut self, file: String) -> Self { + self.file = Some(file); + self + } + + pub(crate) fn location(mut self, line: usize, r: Range) -> Self { + self.location = Some((line, r)); + self + } + + pub(crate) fn code(mut self, code: String) -> Self { + self.code = Some(code); + self + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.message)?; + + if let Some((line, loc)) = &self.location { + let filename = self.file.clone().unwrap_or("".into()); + + if let Some(code) = &self.code { + let mut lines = code.lines(); + let linect = match lines.nth(*line) { + Some(l) => l, + None => return Ok(()), // there should probably be an error if the line number is somehow out of range + }; + + write!(f, "\n| --> {filename}:{line}:{}\n| {linect}\n", loc.start)?; + + let spaces = " ".repeat(loc.start); + let pointers: String = loc.clone().map(|_| '^').collect(); + + write!(f, "|{spaces}{pointers}")?; + + if let Some(note) = &self.note { + write!(f, " {note}")?; + } + } else { + write!(f, " @ {filename}:{line}:{}", loc.start)?; + } + } + + Ok(()) + } +} + +impl error::Error for Error {} \ No newline at end of file diff --git a/src/executor.rs b/src/executor.rs index 7422d55..63a48e3 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,102 +1,66 @@ use super::{Value, Type, Object}; -use super::parser::{ParseTree, ParseError}; +use super::parser::ParseTree; use super::tokenizer::Op; +use super::error::Error; use std::collections::HashMap; -use std::fmt::Display; -use std::error::Error; -use std::io; use std::sync::{Arc, Mutex}; -#[derive(Debug)] -pub enum RuntimeError { - ParseError(ParseError), - NoOverloadForTypes(String, Vec), - ImmutableError(String), - VariableUndefined(String), - FunctionUndeclared(String), - FunctionUndefined(String), - NotAVariable(String), - ParseFail(String, Type), - TypeError(Type, Type), - EmptyArray, - IO(io::Error), -} - -impl Display for RuntimeError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::ParseError(e) => write!(f, "Parser Error: {e}"), - Self::NoOverloadForTypes(op, values) - => write!(f, "No overload of `{op}` exists for the operands `{}`", - values.iter().map(|x| format!("{}({x})", x.get_type())).collect::>().join(", ")), - Self::ImmutableError(ident) => write!(f, "`{ident}` already exists and cannot be redefined"), - Self::VariableUndefined(ident) => write!(f, "variable `{ident}` was not defined"), - Self::FunctionUndeclared(ident) => write!(f, "function `{ident}` was not declared"), - Self::FunctionUndefined(ident) => write!(f, "function `{ident}` was not defined"), - Self::NotAVariable(ident) => write!(f, "`{ident}` is a function but was attempted to be used like a variable"), - Self::ParseFail(s, t) => write!(f, "`\"{s}\"` couldn't be parsed into {}", t), - Self::IO(e) => write!(f, "{e}"), - Self::TypeError(left, right) => write!(f, "expected type `{left}` but got type `{right}`"), - Self::EmptyArray => write!(f, "attempt to access element from an empty array"), - } - } -} - -impl Error for RuntimeError {} - /// Executes an input of ParseTrees -pub struct Executor<'a, I> -where - I: Iterator> -{ - exprs: &'a mut I, - globals: &'a mut HashMap>>, +pub(crate) struct Executor { + globals: HashMap>>, locals: HashMap>>, } -impl<'a, I> Executor<'a, I> -where - I: Iterator>, -{ - pub fn new(exprs: &'a mut I, globals: &'a mut HashMap>>) -> Self { +impl Executor { + pub(crate) fn new() -> Self { Self { - exprs, - globals, + globals: HashMap::new(), locals: HashMap::new(), } } - pub fn _add_global(self, k: String, v: Arc>) -> Self { + pub(crate) fn values(mut self, iter: I) -> impl Iterator> + where + I: Iterator> + { + iter.map(move |x| self.exec(x?)) + } + + pub(crate) fn add_global(mut self, k: String, v: Arc>) -> Self { self.globals.insert(k, v); self } - pub fn locals(mut self, locals: HashMap>>) -> Self { + pub(crate) fn add_globals>)>>(self, globals: Globals) -> Self { + globals.into_iter().fold(self, |acc, (k, v)| acc.add_global(k, v)) + } + + pub(crate) fn locals(mut self, locals: HashMap>>) -> Self { self.locals = locals; self } - pub fn add_local(mut self, k: String, v: Arc>) -> Self { + pub(crate) fn add_local(mut self, k: String, v: Arc>) -> Self { self.locals.insert(k, v); self } - fn _get_object(&self, ident: &String) -> Result<&Arc>, RuntimeError> { + fn _get_object(&self, ident: &String) -> Result<&Arc>, Error> { self.locals.get(ident).or(self.globals.get(ident)) - .ok_or(RuntimeError::VariableUndefined(ident.clone())) + .ok_or(Error::new(format!("undefined identifier {}", ident.clone()))) } - fn get_object_mut(&mut self, ident: &String) -> Result<&mut Arc>, RuntimeError> { + fn get_object_mut(&mut self, ident: &String) -> Result<&mut Arc>, Error> { self.locals.get_mut(ident).or(self.globals.get_mut(ident)) - .ok_or(RuntimeError::VariableUndefined(ident.clone())) + .ok_or(Error::new(format!("undefined identifier {}", ident.clone()))) } fn variable_exists(&self, ident: &String) -> bool { self.locals.contains_key(ident) || self.globals.contains_key(ident) } - fn eval(obj: &mut Arc>) -> Result { + fn eval(obj: &mut Arc>) -> Result { let mut guard = obj.lock().unwrap(); let v = guard.eval()?; @@ -120,11 +84,11 @@ where locals } - pub fn exec(&mut self, tree: Box) -> Result { - match *tree { + pub(crate) fn exec(&mut self, tree: ParseTree) -> Result { + match tree { ParseTree::Operator(op, args) => { let args: Vec = args.into_iter() - .map(|x| self.exec(Box::new(x))).collect::>()?; + .map(|x| self.exec(x)).collect::>()?; match op { Op::Add => match &args[..] { @@ -135,7 +99,7 @@ where [Value::String(x), Value::String(y)] => Ok(Value::String(format!("{x}{y}"))), [Value::Array(xtype, x), Value::Array(ytype, y)] => { if xtype != ytype { - return Err(RuntimeError::TypeError(xtype.clone(), ytype.clone())); + return Err(Error::new(format!("expected type {} but found {}", xtype, ytype))); } Ok(Value::Array(xtype.clone(), [x.clone(), y.clone()].concat())) @@ -146,7 +110,7 @@ where let ytype = y.get_type(); if *t != ytype { - return Err(RuntimeError::TypeError(t.clone(), ytype)); + return Err(Error::new(format!("expected type {} but found {}", t, ytype))); } // NOTE: use y's type instead of the arrays type. @@ -158,13 +122,13 @@ where let xtype = x.get_type(); if *t != xtype { - return Err(RuntimeError::TypeError(t.clone(), xtype)); + return Err(Error::new(format!("expected type {} but found {}", t, xtype))); } // NOTE: read above Ok(Value::Array(xtype, [vec![x.clone()], y.clone()].concat())) }, - _ => Err(RuntimeError::NoOverloadForTypes("+".into(), args)), + _ => Err(Error::new("todo: add".into())), } Op::Sub => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Int(x - y)), @@ -173,7 +137,7 @@ where [Value::Float(x), Value::Float(y)] => Ok(Value::Float(x - y)), [Value::Nil, x] => Ok(x.clone()), [x, Value::Nil] => Ok(x.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("-".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::Mul => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Int(x * y)), @@ -182,7 +146,7 @@ where [Value::Float(x), Value::Float(y)] => Ok(Value::Float(x * y)), [Value::Nil, x] => Ok(x.clone()), [x, Value::Nil] => Ok(x.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("*".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::Div => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Float(*x as f64 / *y as f64)), @@ -191,7 +155,7 @@ where [Value::Float(x), Value::Float(y)] => Ok(Value::Float(x / y)), [Value::Nil, x] => Ok(x.clone()), [x, Value::Nil] => Ok(x.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("/".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::FloorDiv => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Int(x / y)), @@ -200,7 +164,7 @@ where [Value::Float(x), Value::Float(y)] => Ok(Value::Int(*x as i64 / *y as i64)), [Value::Nil, x] => Ok(x.clone()), [x, Value::Nil] => Ok(x.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("//".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::Exp => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Float((*x as f64).powf(*y as f64))), @@ -209,7 +173,7 @@ where [Value::Float(x), Value::Float(y)] => Ok(Value::Float(x.powf(*y))), [Value::Nil, x] => Ok(x.clone()), [x, Value::Nil] => Ok(x.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("**".into(), args)), + _ => Err(Error::new("todo: fsadfdsf".into())), } Op::Mod => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Int(x % y)), @@ -218,85 +182,85 @@ where [Value::Float(x), Value::Float(y)] => Ok(Value::Float(x % y)), [Value::Nil, x] => Ok(x.clone()), [x, Value::Nil] => Ok(x.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("%".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::GreaterThan => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x > y)), [Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x > *y as f64)), [Value::Int(x), Value::Float(y)] => Ok(Value::Bool(*x as f64 > *y)), [Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x > y)), - _ => Err(RuntimeError::NoOverloadForTypes(">".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::GreaterThanOrEqualTo => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x >= y)), [Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x >= *y as f64)), [Value::Int(x), Value::Float(y)] => Ok(Value::Bool(*x as f64 >= *y)), [Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x >= y)), - _ => Err(RuntimeError::NoOverloadForTypes(">=".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::LessThan => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x < y)), [Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x < *y as f64)), [Value::Int(x), Value::Float(y)] => Ok(Value::Bool((*x as f64) < *y)), [Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x < y)), - _ => Err(RuntimeError::NoOverloadForTypes("<".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::LessThanOrEqualTo => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x <= y)), [Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x <= *y as f64)), [Value::Int(x), Value::Float(y)] => Ok(Value::Bool(*x as f64 <= *y)), [Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x <= y)), - _ => Err(RuntimeError::NoOverloadForTypes("<=".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::EqualTo => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x == y)), [Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x == *y as f64)), [Value::Int(x), Value::Float(y)] => Ok(Value::Bool(*x as f64 == *y)), [Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x == y)), - _ => Err(RuntimeError::NoOverloadForTypes("==".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::NotEqualTo => match &args[..] { [Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x != y)), [Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x != *y as f64)), [Value::Int(x), Value::Float(y)] => Ok(Value::Bool(*x as f64 != *y)), [Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x != y)), - _ => Err(RuntimeError::NoOverloadForTypes("!=".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::Not => match &args[0] { Value::Bool(b) => Ok(Value::Bool(!b)), - _ => Err(RuntimeError::NoOverloadForTypes("!".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::Or => match &args[..] { [Value::Bool(x), Value::Bool(y)] => Ok(Value::Bool(*x || *y)), [Value::Nil, x] => Ok(x.clone()), [x, Value::Nil] => Ok(x.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("||".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::And => match &args[..] { [Value::Bool(x), Value::Bool(y)] => Ok(Value::Bool(*x && *y)), [Value::Nil, x] => Ok(x.clone()), [x, Value::Nil] => Ok(x.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("&&".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::Compose => match &args[..] { [_, v] => Ok(v.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("~".into(), args)), + _ => Err(Error::new("todo: actual error output".into())), } Op::Head => match &args[0] { - Value::Array(_, x) => Ok(x.first().ok_or(RuntimeError::EmptyArray)?.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("head".into(), args)), + Value::Array(_, x) => Ok(x.first().ok_or(Error::new(format!("passed an empty array to head")))?.clone()), + _ => Err(Error::new("head".into())), } Op::Tail => match &args[0] { Value::Array(t, x) => Ok(Value::Array(t.clone(), if x.len() > 0 { x[1..].to_vec() } else { vec![] })), - _ => Err(RuntimeError::NoOverloadForTypes("head".into(), args)), + _ => Err(Error::new("tail".into())), } Op::Init => match &args[0] { Value::Array(t, x) => Ok(Value::Array(t.clone(), if x.len() > 0 { x[..x.len() - 1].to_vec() } else { vec![] })), - _ => Err(RuntimeError::NoOverloadForTypes("head".into(), args)), + _ => Err(Error::new("init".into())), } Op::Fini => match &args[0] { - Value::Array(_, x) => Ok(x.last().ok_or(RuntimeError::EmptyArray)?.clone()), - _ => Err(RuntimeError::NoOverloadForTypes("head".into(), args)), + Value::Array(_, x) => Ok(x.last().ok_or(Error::new(format!("passed an empty array to fini")))?.clone()), + _ => Err(Error::new("fini".into())), } Op::Id => match &args[0] { x => Ok(x.clone()), @@ -306,20 +270,20 @@ where Value::Float(x) => Ok(Value::Int(*x as i64)), Value::Bool(x) => Ok(Value::Int(if *x { 1 } else { 0 })), Value::String(x) => { - let r: i64 = x.parse().map_err(|_| RuntimeError::ParseFail(x.clone(), Type::Int))?; + let r: i64 = x.parse().map_err(|_| Error::new(format!("failed to parse {} into {}", x, Type::Int)))?; Ok(Value::Int(r)) } - x => Err(RuntimeError::NoOverloadForTypes("int".into(), vec![x.clone()])), + x => Err(Error::new(format!("no possible conversion from {} into {}", x, Type::Int))), } Op::FloatCast => match &args[0] { Value::Int(x) => Ok(Value::Float(*x as f64)), Value::Float(x) => Ok(Value::Float(*x)), Value::Bool(x) => Ok(Value::Float(if *x { 1.0 } else { 0.0 })), Value::String(x) => { - let r: f64 = x.parse().map_err(|_| RuntimeError::ParseFail(x.clone(), Type::Int))?; + let r: f64 = x.parse().map_err(|_| Error::new(format!("failed to parse {} into {}", x, Type::Float)))?; Ok(Value::Float(r)) } - x => Err(RuntimeError::NoOverloadForTypes("float".into(), vec![x.clone()])), + x => Err(Error::new(format!("no possible conversion from {} into {}", x, Type::Float))), } Op::BoolCast => match &args[0] { Value::Int(x) => Ok(Value::Bool(*x != 0)), @@ -327,7 +291,7 @@ where Value::Bool(x) => Ok(Value::Bool(*x)), Value::String(x) => Ok(Value::Bool(!x.is_empty())), Value::Array(_, vec) => Ok(Value::Bool(!vec.is_empty())), - x => Err(RuntimeError::NoOverloadForTypes("bool".into(), vec![x.clone()])), + x => Err(Error::new(format!("no possible conversion from {} into {}", x, Type::Bool))), } Op::StringCast => Ok(Value::String(format!("{}", &args[0]))), Op::Print => match &args[0] { @@ -345,38 +309,38 @@ where } ParseTree::Equ(ident, body, scope) => { if self.variable_exists(&ident) { - Err(RuntimeError::ImmutableError(ident.clone())) + Err(Error::new(format!("attempt to override value of variable {ident}"))) } else { - let value = self.exec(body)?; + let value = self.exec(*body)?; let g = self.globals.clone(); - Executor::new(self.exprs, &mut self.globals) + Executor::new() .locals(self.locals.clone()) .add_local(ident, Arc::new(Mutex::new(Object::value(value, g, self.locals.to_owned())))) - .exec(scope) + .exec(*scope) } }, ParseTree::LazyEqu(ident, body, scope) => { if self.variable_exists(&ident) { - Err(RuntimeError::ImmutableError(ident.clone())) + Err(Error::new(format!("attempt to override value of variable {ident}"))) } else { let g = self.globals.clone(); - Executor::new(self.exprs, &mut self.globals) + Executor::new() .locals(self.locals.clone()) .add_local(ident, Arc::new(Mutex::new(Object::variable(*body, g, self.locals.to_owned())))) - .exec(scope) + .exec(*scope) } }, ParseTree::FunctionDefinition(func, scope) => { let g = self.globals.clone(); - Executor::new(self.exprs, &mut self.globals) + Executor::new() .locals(self.locals.clone()) .add_local(func.name().unwrap().to_string(), Arc::new(Mutex::new(Object::function( func .globals(g) .locals(self.locals.clone()), HashMap::new(), HashMap::new())))) - .exec(scope) + .exec(*scope) }, ParseTree::FunctionCall(ident, args) => { let obj = self.get_object_mut(&ident)?; @@ -394,35 +358,35 @@ where f.call(args) }, - _ => Err(RuntimeError::FunctionUndefined(ident.clone())) + _ => Err(Error::new(format!("the function {ident} is not defined"))) } }, ParseTree::_FunctionCallLocal(_idx, _args) => todo!(), - ParseTree::If(cond, body) => if match self.exec(cond)? { + ParseTree::If(cond, body) => if match self.exec(*cond)? { Value::Float(f) => f != 0.0, Value::Int(i) => i != 0, Value::Bool(b) => b, Value::String(s) => !s.is_empty(), Value::Array(_, vec) => !vec.is_empty(), Value::Nil => false, - x => return Err(RuntimeError::NoOverloadForTypes("?".into(), vec![x])), + x => return Err(Error::new(format!("could not convert {x} into a bool for truthiness check"))), } { - self.exec(body) + self.exec(*body) } else { Ok(Value::Nil) }, - ParseTree::IfElse(cond, istrue, isfalse) => if match self.exec(cond)? { + ParseTree::IfElse(cond, istrue, isfalse) => if match self.exec(*cond)? { Value::Float(f) => f != 0.0, Value::Int(i) => i != 0, Value::Bool(b) => b, Value::String(s) => !s.is_empty(), Value::Array(_, vec) => !vec.is_empty(), Value::Nil => false, - x => return Err(RuntimeError::NoOverloadForTypes("??".into(), vec![x])), + x => return Err(Error::new(format!("could not convert {x} into a bool for truthiness check"))), } { - self.exec(istrue) + self.exec(*istrue) } else { - self.exec(isfalse) + self.exec(*isfalse) }, ParseTree::Variable(ident) => { let obj = self.get_object_mut(&ident)?; @@ -436,7 +400,7 @@ where ParseTree::Nop => Ok(Value::Nil), ParseTree::Export(names) => { for name in names { - let obj = self.locals.remove(&name).ok_or(RuntimeError::VariableUndefined(name.clone()))?; + let obj = self.locals.remove(&name).ok_or(Error::new(format!("attempt to export an object that was not defined")))?; self.globals.insert(name, obj); } @@ -454,17 +418,3 @@ where } } } - -impl<'a, I: Iterator>> Iterator for Executor<'a, I> { - type Item = Result; - - fn next(&mut self) -> Option { - let expr = self.exprs.next(); - - match expr { - Some(Ok(expr)) => Some(self.exec(Box::new(expr))), - Some(Err(e)) => Some(Err(RuntimeError::ParseError(e))), - None => None, - } - } -} \ No newline at end of file diff --git a/src/function.rs b/src/function.rs index 5253091..60792df 100644 --- a/src/function.rs +++ b/src/function.rs @@ -1,6 +1,7 @@ use crate::parser::ParseTree; -use crate::executor::{Executor, RuntimeError}; +use crate::executor::Executor; use crate::{Type, Object, Value}; +use super::error::Error; use std::collections::HashMap; use std::fmt::{self, Display}; @@ -140,13 +141,11 @@ impl Function { self.t.clone() } - pub(crate) fn call(&mut self, args: Vec>>) -> Result { - let mut tree = vec![Ok(*self.body.clone())].into_iter(); - let mut globals = self.globals.clone().unwrap(); + pub(crate) fn call(&mut self, args: Vec>>) -> Result { + let globals = self.globals.clone().unwrap(); let locals = self.locals.clone().unwrap(); - let mut exec = Executor::new(&mut tree, &mut globals) - .locals(locals.clone()); + let mut exec = Executor::new().add_globals(globals).locals(locals.clone()); if let Some(names) = self.arg_names.clone() { for (obj, name) in std::iter::zip(args.clone().into_iter(), names.into_iter()) { @@ -158,7 +157,7 @@ impl Function { exec = exec.add_local(name, Arc::new(Mutex::new(Object::function(self.clone(), self.globals.clone().unwrap(), locals)))); } - exec.exec(self.body.clone()) + exec.exec(*self.body.clone()) } } diff --git a/src/lib.rs b/src/lib.rs index efeb75f..c9f91cc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,17 +2,19 @@ mod tokenizer; mod parser; mod executor; mod function; +mod error; -use executor::{Executor, RuntimeError}; +use executor::Executor; use parser::{ParseTree, Parser}; use tokenizer::Tokenizer; use function::{FunctionType, Function}; +use error::Error; +use core::str; use std::collections::HashMap; use std::fmt::Display; use std::io::BufRead; use std::fmt; -use std::iter::Peekable; use std::sync::{Arc, Mutex}; #[derive(Clone, Debug)] @@ -147,16 +149,15 @@ impl Object { } /// evaluate the tree inside an object if it isn't evaluated yet, returns the value - pub fn eval(&mut self) -> Result { + pub fn eval(&mut self) -> Result { match self.value.clone() { Cache::Cached(v) => Ok(v), Cache::Uncached(tree) => { - let mut t = vec![Ok(tree.clone())].into_iter(); - - let mut exec = Executor::new(&mut t, &mut self.globals) + let mut exec = Executor::new() + .add_globals(self.globals.clone()) .locals(self.locals.clone()); - let v = exec.exec(Box::new(tree))?; + let v = exec.exec(tree)?; self.value = Cache::Cached(v.clone()); @@ -174,20 +175,111 @@ impl Object { } } -pub struct Runtime<'a, R: BufRead> { - tokenizer: Peekable>, - global_types: HashMap, - globals: HashMap>>, - parser: Option>>, +/// A custom type used in the tokenizer to automatically keep track of which character we are on +pub(crate) struct CodeIter { + reader: R, + code: String, + + // position in code + pos: usize, + + // the current line number + line: usize, + + // column in the current line + column: usize, } -impl<'a, R: BufRead> Runtime<'a, R> { - pub fn new(reader: R, name: &str) -> Self { +impl CodeIter { + fn new(reader: R) -> Self { Self { - tokenizer: Tokenizer::new(reader, name).peekable(), + reader, + code: String::new(), + pos: 0, + line: 0, + column: 0, + } + } + + pub(crate) fn getpos(&self) -> (usize, usize) { + (self.line, self.column) + } + + fn code(&self) -> String { + self.code.clone() + } + + // Peekable is useless here because I cannot access the inner object otherwise + pub(crate) fn peek(&mut self) -> Option { + if let Some(c) = self.code.chars().nth(self.pos) { + Some(c) + } else { + match self.reader.read_line(&mut self.code) { + Ok(0) => return None, + Ok(_) => (), + Err(_e) => panic!("aaaa"), + }; + + self.peek() + } + } + + pub(crate) fn next_if(&mut self, func: impl FnOnce(&char) -> bool) -> Option { + let c = self.peek()?; + + if (func)(&c) { + self.next() + } else { + None + } + } +} + +impl Iterator for CodeIter { + type Item = char; + + fn next(&mut self) -> Option { + if let Some(c) = self.code.chars().nth(self.pos) { + match c { + '\n' => { + self.line += 1; + self.column = 0; + self.pos += 1; + + None + }, + c => { + self.column += 1; + self.pos += 1; + Some(c) + } + } + } else { + match self.reader.read_line(&mut self.code) { + Ok(0) => return None, + Ok(_) => (), + Err(_e) => panic!("aaaa"), + }; + + self.next() + } + } +} + +pub struct Runtime { + reader: Arc>>, + filename: String, + global_types: HashMap, + globals: HashMap>>, +} + +impl Runtime { + pub fn new(reader: R, filename: &str) -> Self { + Self { + reader: Arc::new(Mutex::new(CodeIter::new(reader))), + filename: filename.to_string(), global_types: HashMap::new(), globals: HashMap::new(), - parser: None, }.add_global("version'", Value::String( format!("{} ({}/{})", env!("CARGO_PKG_VERSION"), @@ -197,6 +289,12 @@ impl<'a, R: BufRead> Runtime<'a, R> { )) } + pub fn code(&self) -> String { + let reader = self.reader.lock().unwrap(); + let code = reader.code(); + code + } + pub fn add_global(mut self, name: &str, value: Value) -> Self { self.global_types.insert(name.to_string(), value.get_type()); self.globals.insert(name.to_string(), @@ -207,9 +305,27 @@ impl<'a, R: BufRead> Runtime<'a, R> { self } +} - pub fn values(&'a mut self) -> impl Iterator> + 'a { - self.parser = Some(Parser::new(&mut self.tokenizer, &mut self.global_types)); - Executor::new(self.parser.as_mut().unwrap(), &mut self.globals) +impl Iterator for Runtime { + type Item = Result; + + fn next(&mut self) -> Option { + let tokenizer = Tokenizer::new(self.reader.clone()); + + let tree = Parser::new() + .add_globals(self.global_types.clone()) + .parse(&mut tokenizer.peekable()); + + let tree = match tree.map_err(|e| e + .code(self.code()) + .file(self.filename.clone())) + { + Ok(Some(tree)) => tree, + Ok(None) => return None, + Err(e) => return Some(Err(e)) + }; + + Some(Executor::new().add_globals(self.globals.clone()).exec(tree)) } } diff --git a/src/main.rs b/src/main.rs index 5e96a83..097cf71 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,12 @@ use std::io::{self, BufReader}; fn main() { - let mut runtime = lamm::Runtime::new(BufReader::new(io::stdin()), ""); + let runtime = lamm::Runtime::new(BufReader::new(io::stdin()), ""); - for value in runtime.values() { + for value in runtime { match value { Ok(v) => println!("=> {v}"), - Err(e) => eprintln!("{e}"), + Err(e) => eprintln!("error: {e}"), } } } diff --git a/src/parser.rs b/src/parser.rs index 9ae07c1..7ebcfa3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,46 +1,11 @@ -use crate::executor::Executor; - use super::{Value, Type, Function, FunctionType}; -use super::tokenizer::{Token, TokenType, TokenizeError, Op}; +use super::tokenizer::{Token, TokenType, Op}; +use super::error::Error; -use std::borrow::BorrowMut; -use std::error; use std::collections::HashMap; -use std::fmt::Display; use std::iter::Peekable; -#[derive(Debug)] -pub enum ParseError { - NoInput, - UnexpectedEndInput, - IdentifierUndefined(Token), - InvalidIdentifier(Token), - UnmatchedArrayClose, - UnwantedToken(Token), - TokenizeError(TokenizeError), - ImmutableError(String), - RuntimeError, -} - -impl Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ParseError::UnexpectedEndInput => write!(f, "Input ended unexpectedly"), - ParseError::IdentifierUndefined(name) => write!(f, "Undefined identifier `{}` {}:{}:{}", name.lexeme, name.file, name.line, name.location.start), - ParseError::InvalidIdentifier(t) => write!(f, "Invalid identifier `{t:?}`"), - ParseError::NoInput => write!(f, "No input given"), - ParseError::UnmatchedArrayClose => write!(f, "there was an unmatched array closing operator `]`"), - ParseError::TokenizeError(e) => write!(f, "Tokenizer Error: {e}"), - ParseError::RuntimeError => write!(f, "Runtime Error"), - ParseError::ImmutableError(i) => write!(f, "attempt to redeclare {i} met with force"), - ParseError::UnwantedToken(t) => write!(f, "unexpected token {t:?}"), - } - } -} - -impl error::Error for ParseError {} - #[derive(Clone, Debug)] pub(crate) enum ParseTree { Operator(Op, Vec), @@ -69,45 +34,50 @@ pub(crate) enum ParseTree { } /// Parses input tokens and produces ParseTrees for an Executor -pub(crate) struct Parser<'a, I: Iterator>> { - tokens: &'a mut Peekable, - globals: &'a mut HashMap, +#[derive(Clone)] +pub(crate) struct Parser { + globals: HashMap, locals: HashMap, } -impl<'a, I: Iterator>> Parser<'a, I> { - pub fn new(tokens: &'a mut Peekable, globals: &'a mut HashMap) -> Self { +impl Parser { + pub(crate) fn new() -> Self { Self { - tokens, - globals, + globals: HashMap::new(), locals: HashMap::new() } } + pub(crate) fn trees>>(mut self, mut tokens: Peekable) -> impl Iterator> { + std::iter::from_fn(move || { + match self.parse(&mut tokens) { + Ok(Some(tree)) => Some(Ok(tree)), + Ok(None) => None, + Err(e) => Some(Err(e)), + } + }) + } - pub fn _add_global(self, k: String, v: Type) -> Self { + pub(crate) fn add_global(mut self, k: String, v: Type) -> Self { self.globals.insert(k, v); self } - pub fn _add_globals>(self, items: Items) -> Self { - items.for_each(|(name, t)| { - self.globals.insert(name, t); - }); - self + pub(crate) fn add_globals>(self, items: Items) -> Self { + items.into_iter().fold(self, |acc, (k, v)| acc.add_global(k, v)) } - pub fn locals(mut self, locals: HashMap) -> Self { + pub(crate) fn locals(mut self, locals: HashMap) -> Self { self.locals = locals; self } - pub fn add_local(mut self, k: String, v: Type) -> Self { + pub(crate) fn add_local(mut self, k: String, v: Type) -> Self { self.locals.insert(k, v); self } - pub fn _add_locals>(mut self, items: Items) -> Self { + pub(crate) fn _add_locals>(mut self, items: Items) -> Self { items.for_each(|(name, t)| { self.locals.insert(name, t); }); @@ -123,15 +93,15 @@ impl<'a, I: Iterator>> Parser<'a, I> { } // get at most count arguments - fn get_args(&mut self, count: usize) -> Result, ParseError> { - (0..count).map_while(|_| match self.parse() { - Ok(r) => Some(Ok(r)), - Err(ParseError::NoInput) => None, + fn get_args>>(&mut self, tokens: &mut Peekable, count: usize) -> Result, Error> { + (0..count).map_while(|_| match self.parse(tokens) { + Ok(Some(tree)) => Some(Ok(tree)), + Ok(None) => None, Err(e) => Some(Err(e)), }).collect() } - fn parse_operator(&mut self, op: Op) -> Result { + fn parse_operator>>(&mut self, tokens: &mut Peekable, op: Op) -> Result { let operators: HashMap = HashMap::from([ (Op::Add, FunctionType(Box::new(Type::Any), vec![Type::Any, Type::Any])), (Op::Sub, FunctionType(Box::new(Type::Any), vec![Type::Any, Type::Any])), @@ -162,7 +132,7 @@ impl<'a, I: Iterator>> Parser<'a, I> { ]); let operator = operators.get(&op).expect("All operators should be accounted for"); - let args = self.get_args(operator.1.len())?; + let args = self.get_args(tokens, operator.1.len())?; if args.len() == operator.1.len() { Ok(ParseTree::Operator(op, args)) @@ -188,18 +158,22 @@ impl<'a, I: Iterator>> Parser<'a, I> { } } - fn parse(&mut self) -> Result { - let token = self.tokens.next() - .ok_or(ParseError::NoInput)? - .map_err(|e| ParseError::TokenizeError(e))?; + pub(crate) fn parse>>(&mut self, tokens: &mut Peekable) -> Result, Error> { + let token = match tokens.next() { + Some(Ok(t)) => t, + Some(Err(e)) => return Err(e), + None => return Ok(None), + }; match token.token() { - TokenType::Constant(c) => Ok(ParseTree::Value(c)), + TokenType::Constant(c) => Ok(Some(ParseTree::Value(c))), TokenType::Identifier(ident) => { - match self.get_object_type(&ident).ok_or(ParseError::IdentifierUndefined(token))? { + match self.get_object_type(&ident).ok_or( + Error::new(format!("undefined identifier {ident}")) + .location(token.line, token.location))? { Type::Function(f) => { let f = f.clone(); - let args = self.get_args(f.1.len())?; + let args = self.get_args(tokens, f.1.len())?; if args.len() < f.1.len() { let mut counter = 0; @@ -212,19 +186,19 @@ impl<'a, I: Iterator>> Parser<'a, I> { }).unzip(); let function_type = FunctionType(f.0.clone(), types); - Ok(ParseTree::Value(Value::Function(Function::lambda( + Ok(Some(ParseTree::Value(Value::Function(Function::lambda( function_type, names.clone(), Box::new(ParseTree::FunctionCall(ident, vec![ args, names.into_iter().map(|x| ParseTree::Variable(x)).collect() - ].concat())))))) + ].concat()))))))) } else { - Ok(ParseTree::FunctionCall(ident, args)) + Ok(Some(ParseTree::FunctionCall(ident, args))) } } - _ => Ok(ParseTree::Variable(ident)), + _ => Ok(Some(ParseTree::Variable(ident))), } }, TokenType::Operator(op) => match op { @@ -233,7 +207,7 @@ impl<'a, I: Iterator>> Parser<'a, I> { // take tokens until we reach the end of this array // if we don't collect them here it causes rust to overflow computing the types - let array_tokens = self.tokens.by_ref().take_while(|t| match t { + let array_tokens = tokens.by_ref().take_while(|t| match t { Ok(t) => match t.token() { TokenType::Operator(Op::OpenArray) => { depth += 1; @@ -246,32 +220,33 @@ impl<'a, I: Iterator>> Parser<'a, I> { _ => true, } _ => true, - }).collect::, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?; + }).collect::, Error>>()?; - let mut array_tokens = array_tokens + let array_tokens = array_tokens .into_iter() .map(|t| Ok(t)) - .collect::>>() + .collect::>>() .into_iter() .peekable(); - let trees: Vec = Parser::new(&mut array_tokens, self.globals.borrow_mut()) + let trees: Vec = Parser::new() .locals(self.locals.to_owned()) - .collect::>()?; + .trees(array_tokens) + .collect::>()?; let tree = trees.into_iter().fold( ParseTree::Value(Value::Array(Type::Any, vec![])), |acc, x| ParseTree::Operator(Op::Add, vec![acc, x.clone()]), ); - Ok(tree) + Ok(Some(tree)) }, Op::OpenStatement => { let mut depth = 1; // take tokens until we reach the end of this array // if we don't collect them here it causes rust to overflow computing the types - let array_tokens = self.tokens.by_ref().take_while(|t| match t { + let array_tokens = tokens.by_ref().take_while(|t| match t { Ok(t) => match t.token() { TokenType::Operator(Op::OpenStatement) => { depth += 1; @@ -284,122 +259,135 @@ impl<'a, I: Iterator>> Parser<'a, I> { _ => true, } _ => true, - }).collect::, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?; + }).collect::, Error>>()?; - let mut array_tokens = array_tokens + let array_tokens = array_tokens .into_iter() .map(|t| Ok(t)) - .collect::>>() + .collect::>>() .into_iter() .peekable(); - let trees: Vec = Parser::new(&mut array_tokens, self.globals.borrow_mut()) + let trees: Vec = Parser::new() .locals(self.locals.to_owned()) - .collect::>()?; + .trees(array_tokens) + .collect::>()?; let tree = trees.into_iter().fold( ParseTree::Nop, |acc, x| ParseTree::Operator(Op::Compose, vec![acc, x.clone()]), ); - Ok(tree) + Ok(Some(tree)) }, - Op::Equ | Op::LazyEqu => { - let token = self.tokens.next().ok_or(ParseError::UnexpectedEndInput)?.map_err(|e| ParseError::TokenizeError(e))?; - - let body = Box::new(self.parse()?); + Op::Equ => { + let token = tokens.next() + .ok_or(Error::new("no identifier given for = expression".into()) + .location(token.line, token.location) + .note("expected an identifier after this token".into()))??; if let TokenType::Identifier(ident) = token.token() { - match op { - Op::Equ => Ok(ParseTree::Equ( - ident.clone(), - body, - Box::new(Parser::new(self.tokens.by_ref(), self.globals.borrow_mut()) - .locals(self.locals.clone()) - .add_local(ident, Type::Any) - .parse()?)) - ), - Op::LazyEqu => Ok(ParseTree::LazyEqu( - ident.clone(), - body, - Box::new(Parser::new(self.tokens.by_ref(), self.globals.borrow_mut()) - .locals(self.locals.clone()) - .add_local(ident, Type::Any) - .parse()?)) - ), - _ => unreachable!(), - } + let body = Box::new(self.parse(tokens)?.ok_or(Error::new(format!("the variable `{ident}` has no value")) + .location(token.line, token.location.clone()) + .note("expected a value after this identifier".into()))?); + + let scope = Parser::new() + .locals(self.locals.clone()) + .add_local(ident.clone(), Type::Any) + .parse(tokens)? + .ok_or(Error::new("variable declaration requires a scope defined after it".into()) + .location(token.line, token.location) + .note(format!("this variable {ident} has no scope")))?; + + Ok(Some(ParseTree::Equ( + ident.clone(), + body, + Box::new(scope)) + )) } else { - Err(ParseError::InvalidIdentifier(token)) + Err(Error::new(format!("`{}` is not a valid identifier", token.lexeme)).location(token.line, token.location)) + } + }, + Op::LazyEqu => { + let token = tokens.next() + .ok_or(Error::new("no identifier given for = expression".into()) + .location(token.line, token.location) + .note("expected an identifier after this token".into()))??; + + if let TokenType::Identifier(ident) = token.token() { + let body = Box::new(self.parse(tokens)?.ok_or(Error::new(format!("the variable `{ident}` has no value")) + .location(token.line, token.location.clone()) + .note("expected a value after this identifier".into()))?); + + let scope = Parser::new() + .locals(self.locals.clone()) + .add_local(ident.clone(), Type::Any) + .parse(tokens)? + .ok_or(Error::new("variable declaration requires a scope defined after it".into()) + .location(token.line, token.location) + .note(format!("this variable {ident} has no scope")))?; + + Ok(Some(ParseTree::LazyEqu( + ident.clone(), + body, + Box::new(scope)) + )) + } else { + Err(Error::new(format!("`{}` is not a valid identifier", token.lexeme)).location(token.line, token.location)) } }, Op::FunctionDefine(arg_count) => { - let f = self.parse_function_definition(arg_count)?; + let f = self.parse_function_definition(tokens, arg_count)?; - Ok(ParseTree::FunctionDefinition( - f.clone(), - Box::new( - Parser::new(self.tokens, self.globals.borrow_mut()) - .locals(self.locals.clone()) - .add_local(f.name().unwrap().to_string(), Type::Function(f.get_type())) - .parse()? - ))) + let scope = Parser::new() + .locals(self.locals.clone()) + .add_local(f.name().unwrap().to_string(), Type::Function(f.get_type())) + .parse(tokens)? + .ok_or(Error::new("function declaration requires a scope defined after it".into()) + .location(token.line, token.location) + .note(format!("this function {} has no scope", f.name().unwrap())))?; + + Ok(Some(ParseTree::FunctionDefinition( f.clone(), Box::new(scope)))) }, - Op::LambdaDefine(arg_count) => { - let f = self.parse_lambda_definition(arg_count)?; - - Ok(ParseTree::LambdaDefinition(f)) - }, - Op::Export => { - let list = self.parse()?; - - let mut g = HashMap::new(); - let list = Executor::new(&mut vec![Ok(list)].into_iter(), &mut g) - .next().unwrap().map_err(|_| ParseError::RuntimeError)?; - - if let Value::Array(Type::String, items) = list { - let names = items.into_iter().map(|x| match x { - Value::String(s) => s, - _ => unreachable!(), - }); - - for name in names.clone() { - let t = self.locals.remove(&name).ok_or(ParseError::IdentifierUndefined(token.clone()))?; - self.globals.insert(name, t); - } - - Ok(ParseTree::Export(names.collect())) - } else { - Err(ParseError::NoInput) - } - } - Op::Empty => Ok(ParseTree::Value(Value::Array(Type::Any, vec![]))), + Op::LambdaDefine(arg_count) => Ok(Some(ParseTree::LambdaDefinition(self.parse_lambda_definition(tokens, arg_count)?))), + Op::Empty => Ok(Some(ParseTree::Value(Value::Array(Type::Any, vec![])))), Op::NonCall => { - let name = Self::get_identifier(self.tokens.next())?; - Ok(ParseTree::NonCall(name)) + let name = Self::get_identifier(tokens.next())?; + Ok(Some(ParseTree::NonCall(name))) }, Op::If => { - let cond = self.parse()?; - let truebranch = self.parse()?; + let cond = self.parse(tokens)? + .ok_or(Error::new("? statement requires a condition".into()) + .location(token.line, token.location.clone()))?; + let truebranch = self.parse(tokens)? + .ok_or(Error::new("? statement requires a branch".into()) + .location(token.line, token.location))?; - Ok(ParseTree::If(Box::new(cond), Box::new(truebranch))) + Ok(Some(ParseTree::If(Box::new(cond), Box::new(truebranch)))) }, Op::IfElse => { - let cond = self.parse()?; - let truebranch = self.parse()?; - let falsebranch = self.parse()?; + let cond = self.parse(tokens)? + .ok_or(Error::new("?? statement requires a condition".into()) + .location(token.line, token.location.clone()))?; + let truebranch = self.parse(tokens)? + .ok_or(Error::new("?? statement requires a branch".into()) + .location(token.line, token.location.clone()))?; + let falsebranch = self.parse(tokens)? + .ok_or(Error::new("?? statement requires a false branch".into()) + .location(token.line, token.location))?; - Ok(ParseTree::IfElse( - Box::new(cond), Box::new(truebranch), Box::new(falsebranch))) + Ok(Some(ParseTree::IfElse( + Box::new(cond), Box::new(truebranch), Box::new(falsebranch)))) }, - op => self.parse_operator(op), + Op::Export => todo!(), + op => self.parse_operator(tokens, op).map(|x| Some(x)), }, - _ => Err(ParseError::UnwantedToken(token)), + _ => Err(Error::new(format!("the token {} was unexpected", token.lexeme)).location(token.line, token.location)), } } - fn parse_lambda_definition(&mut self, arg_count: usize) -> Result { - let (t, args) = Self::parse_function_declaration(self.tokens, arg_count)?; + fn parse_lambda_definition>>(&mut self, tokens: &mut Peekable, arg_count: usize) -> Result { + let (t, args) = Self::parse_function_declaration(tokens, arg_count)?; let mut locals = self.locals.clone(); @@ -408,13 +396,13 @@ impl<'a, I: Iterator>> Parser<'a, I> { } Ok(Function::lambda(t, args, Box::new( - Parser::new(self.tokens, &mut self.globals) - .locals(locals).parse()?))) + Parser::new() + .locals(locals).parse(tokens)?.ok_or(Error::new("lambda requires a body".into()))?))) } - fn parse_function_definition(&mut self, arg_count: usize) -> Result { - let name = Self::get_identifier(self.tokens.next())?; - let (t, args) = Self::parse_function_declaration(self.tokens, arg_count)?; + fn parse_function_definition>>(&mut self, tokens: &mut Peekable, arg_count: usize) -> Result { + let name = Self::get_identifier(tokens.next())?; + let (t, args) = Self::parse_function_declaration(tokens, arg_count)?; let mut locals = self.locals.clone(); @@ -425,13 +413,13 @@ impl<'a, I: Iterator>> Parser<'a, I> { locals.insert(name.clone(), Type::Function(t.clone())); Ok(Function::named(&name, t, args, Box::new( - Parser::new(self.tokens, &mut self.globals) - .locals(locals).parse()?))) + Parser::new() + .locals(locals).parse(tokens)?.ok_or(Error::new("function requires a body".into()))?))) } - fn parse_function_declaration( + fn parse_function_declaration>>( tokens: &mut Peekable, - arg_count: usize) -> Result<(FunctionType, Vec), ParseError> + arg_count: usize) -> Result<(FunctionType, Vec), Error> { let args: Vec<(Type, String)> = (0..arg_count) .map(|_| Self::parse_function_declaration_parameter(tokens)) @@ -448,9 +436,8 @@ impl<'a, I: Iterator>> Parser<'a, I> { Ok((FunctionType(Box::new(ret), types), names)) } - fn parse_function_declaration_parameter(mut tokens: &mut Peekable) -> Result<(Type, String), ParseError> - { - let token = tokens.next().ok_or(ParseError::UnexpectedEndInput)?.map_err(|e| ParseError::TokenizeError(e))?; + fn parse_function_declaration_parameter>>(tokens: &mut Peekable) -> Result<(Type, String), Error> { + let token = tokens.next().ok_or(Error::new("function definition is incomplete".into()))??; match token.token() { // untyped variable @@ -459,7 +446,7 @@ impl<'a, I: Iterator>> Parser<'a, I> { // typed variable TokenType::Operator(Op::TypeDeclaration) => { let name = Self::get_identifier(tokens.next())?; - let t = Self::parse_type(&mut tokens)?; + let t = Self::parse_type(tokens)?; Ok((t, name)) } @@ -475,7 +462,7 @@ impl<'a, I: Iterator>> Parser<'a, I> { // typed function TokenType::Operator(Op::FunctionDeclare(n)) => { let name = Self::get_identifier(tokens.next())?; - let args = (0..n).map(|_| Self::parse_type(&mut tokens)).collect::>()?; + let args = (0..n).map(|_| Self::parse_type(tokens)).collect::>()?; let mut ret = Type::Any; // this is annoying @@ -485,22 +472,20 @@ impl<'a, I: Iterator>> Parser<'a, I> { { // so we just check for an error here. this is the only reason t exists. if let Err(e) = t { - return Err(ParseError::TokenizeError(e)); + return Err(e); } - ret = Self::parse_type(&mut tokens)?; + ret = Self::parse_type(tokens)?; } Ok((Type::Function(FunctionType(Box::new(ret), args)), name)) } - _ => Err(ParseError::UnwantedToken(token)), + _ => Err(Error::new(format!("unexpected token {}", token.lexeme))), } } - // for some dumbass reason, - // this is the only code that breaks if it doesn't take an impl Iterator instead of simply I ... - fn parse_type(tokens: &mut Peekable>>) -> Result { - let token = tokens.next().ok_or(ParseError::UnexpectedEndInput)?.map_err(|e| ParseError::TokenizeError(e))?; + fn parse_type>>(tokens: &mut Peekable) -> Result { + let token = tokens.next().ok_or(Error::new("type is incomplete".into()))??; match token.token() { TokenType::Type(t) => Ok(t), @@ -511,67 +496,43 @@ impl<'a, I: Iterator>> Parser<'a, I> { // if we don't collect them here it causes rust to overflow computing the types let array_tokens = tokens.by_ref().take_while(|t| match t { Ok(t) => match t.token() { - TokenType::Operator(Op::OpenStatement) => { + TokenType::Operator(Op::OpenArray) => { depth += 1; true }, - TokenType::Operator(Op::CloseStatement) => { + TokenType::Operator(Op::CloseArray) => { depth -= 1; depth > 0 } _ => true, } _ => true, - }).collect::, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?; + }).collect::, Error>>()?; - // ... thanks to this conversion here. The compiler complains that the types don't - // match. there is code elsewhere in this codebase that looks exactly like this and - // still simply uses &mut Peekable as the type. I don't understand why this code - // is special, but we have to do horribleness for it to work. let mut array_tokens = array_tokens .into_iter() .map(|t| Ok(t)) - .collect::>>() - .into_iter() - .peekable(); + .collect::>() + .into_iter(); - let t = match Self::parse_type(&mut array_tokens) { - Ok(t) => t, - Err(ParseError::UnexpectedEndInput) => Type::Any, - Err(e) => return Err(e), + let t = if array_tokens.len() == 0 { + Type::Any + } else { + Parser::parse_type(&mut array_tokens.by_ref().peekable())? }; Ok(Type::Array(Box::new(t))) }, - _ => Err(ParseError::UnwantedToken(token)), + _ => Err(Error::new(format!("unexpected token {}", token.lexeme))), } } - fn get_identifier(t: Option>) -> Result { - let token = t.ok_or(ParseError::UnexpectedEndInput)? - .map_err(|e| ParseError::TokenizeError(e))?; + fn get_identifier(t: Option>) -> Result { + let token = t.ok_or(Error::new(format!("expected an identifier, found nothing")))??; match token.token() { TokenType::Identifier(ident) => Ok(ident), - _ => Err(ParseError::InvalidIdentifier(token)), - } - } -} - -impl<'a, I: Iterator>> Iterator for Parser<'a, I> { - type Item = Result; - - fn next(&mut self) -> Option { - let tree = self.parse(); - - match tree { - Ok(tree) => Some(Ok(tree)), - Err(e) => { - match e { - ParseError::NoInput => None, - _ => Some(Err(e)), - } - } + _ => Err(Error::new(format!("the identifier {} is invalid", token.lexeme))), } } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index e4c9556..faa1f56 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,48 +1,13 @@ -use std::iter::Peekable; -use std::{error, io}; use std::collections::{VecDeque, HashMap}; +use std::sync::{Arc, Mutex}; -use crate::Type; +use crate::{CodeIter, Type}; +use crate::error::Error; use super::Value; -use std::fmt::{Display, Formatter}; use std::io::BufRead; -use std::sync::Arc; use std::ops::Range; -#[derive(Debug)] -pub enum TokenizeError { - InvalidDynamicOperator(String), - InvalidNumericConstant(String), - InvalidIdentifier(String), - UnableToMatchToken(String), - InvalidCharacter(char), - UnclosedString, - IO(io::Error), - Regex(regex::Error), -} - -impl Display for TokenizeError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - TokenizeError::InvalidDynamicOperator(op) - => write!(f, "invalid dynamic operator `{op}`"), - TokenizeError::InvalidNumericConstant(t) - => write!(f, "invalid numeric constant `{t}`"), - TokenizeError::InvalidIdentifier(ident) - => write!(f, "invalid identifier `{ident}`"), - TokenizeError::UnableToMatchToken(token) - => write!(f, "the token `{token}` was unable to be parsed"), - TokenizeError::InvalidCharacter(c) => write!(f, "`{c}` is not understood"), - TokenizeError::UnclosedString => write!(f, "newline was found before string was closed"), - TokenizeError::IO(io) => write!(f, "{io}"), - TokenizeError::Regex(re) => write!(f, "{re}"), - } - } -} - -impl error::Error for TokenizeError {} - #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Op { Add, @@ -100,10 +65,7 @@ pub enum TokenType { impl TokenType { /// Parse a single token - fn parse(s: &str) -> Result { - let identifier = regex::Regex::new(r#"[A-Za-z_][A-Za-z0-9_']*"#).map_err(|e| TokenizeError::Regex(e))?; - let number = regex::Regex::new(r#"([0-9]+\.?[0-9]*)|(\.[0-9])"#).map_err(|e| TokenizeError::Regex(e))?; - + fn parse(s: &str) -> Result { Ok(match s { // Match keywords first "true" => TokenType::Constant(Value::Bool(true)), @@ -130,18 +92,18 @@ impl TokenType { // then identifiers and numbers _ => { - if identifier.is_match(s) { + if s.starts_with(char::is_alphabetic) { TokenType::Identifier(s.to_string()) - } else if number.is_match(s) { + } else if s.starts_with(|c: char| c.is_digit(10)) { if let Ok(int) = s.parse::() { TokenType::Constant(Value::Int(int)) } else if let Ok(float) = s.parse::() { TokenType::Constant(Value::Float(float)) } else { - return Err(TokenizeError::InvalidNumericConstant(s.to_string())); + return Err(Error::new(format!("Invalid numeric constant `{s}`"))); } } else { - return Err(TokenizeError::UnableToMatchToken(s.to_string())); + return Err(Error::new(format!("Couldn't match token `{s}`"))); } } }) @@ -153,17 +115,15 @@ pub struct Token { t: TokenType, pub lexeme: String, pub line: usize, - pub file: Arc, pub location: Range, } impl Token { - pub fn new(t: TokenType, lexeme: String, file: Arc, line: usize, column: usize) -> Self { + pub fn new(t: TokenType, lexeme: String, line: usize, column: usize) -> Self { Self { t, line, - file, - location: column..column+lexeme.len(), + location: column..column + lexeme.len(), lexeme, } } @@ -174,81 +134,54 @@ impl Token { } /// Tokenize an input stream of source code for a Parser +#[derive(Clone)] pub(crate) struct Tokenizer { - reader: R, - line: usize, - column: usize, - code: String, - filename: Arc, + reader: Arc>>, tokens: VecDeque, } impl Tokenizer { - pub fn new(reader: R, filename: &str) -> Self { + pub fn new(reader: Arc>>) -> Self { Self { reader, - line: 0, - column: 0, - filename: Arc::new(filename.to_string()), - code: String::new(), tokens: VecDeque::new(), } } - fn get_dot_count>(&mut self, s: &mut Peekable) -> Option { + fn next_char(&mut self) -> Option { + let mut reader = self.reader.lock().unwrap(); + let c = reader.next(); + c + } + + fn next_char_if(&mut self, func: impl FnOnce(&char) -> bool) -> Option { + let mut reader = self.reader.lock().unwrap(); + let c = reader.next_if(func); + c + } + + fn getpos(&self) -> (usize, usize) { + let reader = self.reader.lock().unwrap(); + let r = reader.getpos(); + r + } + + fn get_dot_count(&mut self) -> usize { let mut total = 0; - while let Some(n) = self.next_char_if(s, |&c| c == ':' || c == '.').map(|c| match c { + while let Some(n) = self.next_char_if(|&c| c == ':' || c == '.').map(|c| match c { ':' => 2, '.' => 1, - _ => 0, + _ => unreachable!(), }) { total += n; } - Some(total) - } - - fn next_char>(&mut self, iter: &mut Peekable) -> Option { - if let Some(c) = iter.next() { - self.column += 1; - Some(c) - } else { - None - } - } - - fn next_char_if>( - &mut self, - iter: &mut Peekable, - pred: impl FnOnce(&char) -> bool) -> Option - { - if let Some(c) = iter.next_if(pred) { - self.column += 1; - Some(c) - } else { - None - } - } - - fn next_char_while>( - &mut self, - iter: &mut Peekable, - mut pred: impl FnMut(&char) -> bool) -> Option - { - if let Some(c) = self.next_char(iter) { - if (pred)(&c) { - Some(c) - } else { - None - } - } else { - None - } + total } /// Tokenizes more input and adds them to the internal queue - fn tokenize>(&mut self, mut iter: Peekable) -> Result<(), TokenizeError> { + fn tokenize(&mut self) -> Result<(), Error> { let operators: HashMap<&'static str, Op> = HashMap::from([ ("+", Op::Add), ("-", Op::Sub), @@ -284,7 +217,7 @@ impl Tokenizer { ("\\", Op::NonCall), ]); - let c = if let Some(c) = self.next_char(&mut iter) { + let c = if let Some(c) = self.next_char() { c } else { return Ok(()); @@ -293,40 +226,52 @@ impl Tokenizer { if c.is_alphanumeric() { let mut token = String::from(c); - while let Some(c) = self.next_char_if(&mut iter, |&c| c.is_alphanumeric() || c == '.' || c == '\'') { + while let Some(c) = self.next_char_if(|&c| c.is_alphanumeric() || c == '.' || c == '\'') { token.push(c); } - self.tokens.push_back(Token::new(TokenType::parse(&token)?, token, self.filename.clone(), self.line, self.column)); - self.tokenize(iter) + let (line, column) = self.getpos(); + + self.tokens.push_back(Token::new(TokenType::parse(&token) + .map_err(|e| e.location(line, column - token.len() + 1..column + 1))?, token.clone(), line, column - token.len() + 1)); + self.tokenize() } else if c == '#' { - while self.next_char_while(&mut iter, |&c| c != '\n').is_some() {} - self.tokenize(iter) + while self.next_char_if(|&c| c != '\n').is_some() {} + self.tokenize() } else if c == '\"' { let mut token = String::new(); + let (line, col) = self.getpos(); - while let Some(c) = self.next_char(&mut iter) { + while let Some(c) = self.next_char() { match c { '"' => break, - '\n' => return Err(TokenizeError::UnclosedString), - '\\' => match iter.next() { + '\n' => return Err( + Error::new("Unclosed string literal".into()) + .location(line, col..self.getpos().1) + .note("newlines are not allowed in string literals (try \\n)".into())), + '\\' => match self.next_char() { Some('\\') => token.push('\\'), Some('n') => token.push('\n'), Some('t') => token.push('\t'), Some('r') => token.push('\r'), Some('\"') => token.push('"'), Some(c) => token.push(c), - None => return Err(TokenizeError::UnclosedString), + None => return Err( + Error::new("Unclosed string literal".into()) + .location(line, col..self.getpos().1) + .note("end of file found before \"".into())), } _ => token.push(c), } } + let (line, col) = self.getpos(); + self.tokens.push_back( Token::new(TokenType::Constant( - Value::String(token.clone())), token, self.filename.clone(), self.line, self.column)); + Value::String(token.clone())), token, line, col)); - self.tokenize(iter) + self.tokenize() } else if operators.keys().any(|x| x.starts_with(c)) { let mut token = String::from(c); @@ -352,39 +297,31 @@ impl Tokenizer { // if not, we need to make sure that the next characters // we grab *actually* match the last operator if let Some(op) = possible.get(token.as_str()) { - let token = Token::new(TokenType::Operator(match op { + let t = TokenType::Operator(match op { // special handling for "dynamic" operators - Op::FunctionDefine(n) => { - let count = match self.get_dot_count(&mut iter) { - Some(count) => count, - None => return Err(TokenizeError::InvalidDynamicOperator(token)), - }; - Op::FunctionDefine(n + count) - } - Op::FunctionDeclare(n) => { - let count = match self.get_dot_count(&mut iter) { - Some(count) => count, - None => return Err(TokenizeError::InvalidDynamicOperator(token)), - }; - Op::FunctionDeclare(n + count) - } - Op::LambdaDefine(n) => { - let count = match self.get_dot_count(&mut iter) { - Some(count) => count, - None => return Err(TokenizeError::InvalidDynamicOperator(token)), - }; - Op::LambdaDefine(n + count) - } + Op::FunctionDefine(n) => Op::FunctionDefine(n + self.get_dot_count()), + Op::FunctionDeclare(n) => Op::FunctionDeclare(n + self.get_dot_count()), + Op::LambdaDefine(n) => Op::LambdaDefine(n + self.get_dot_count()), op => op.clone(), - }), token, self.filename.clone(), self.line, self.column); + }); + + let (line, col) = self.getpos(); + + let token = Token::new(t, token, line, col); self.tokens.push_back(token); break; } else { - let next = match self.next_char_if(&mut iter, is_expected) { + let next = match self.next_char_if(is_expected) { Some(c) => c, - None => return Err(TokenizeError::UnableToMatchToken(format!("{token}"))), + None => { + let (line, col) = self.getpos(); + + return Err( + Error::new(format!("the operator {token} is undefined")) + .location(line, col - token.len()..col)) + } }; token.push(next); @@ -392,37 +329,22 @@ impl Tokenizer { } 0 => unreachable!(), _ => { - let next = match self.next_char_if(&mut iter, is_expected) { + let c = self.next_char_if(is_expected); + let next = match c { Some(c) => c, None => { - let token = Token::new(TokenType::Operator(match possible.get(token.as_str()).unwrap() { + let t = TokenType::Operator(match possible.get(token.as_str()).unwrap() { // special handling for "dynamic" operators - Op::FunctionDefine(n) => { - let count = match self.get_dot_count(&mut iter) { - Some(count) => count, - None => return Err(TokenizeError::InvalidDynamicOperator(token)), - }; - - Op::FunctionDefine(n + count) - } - Op::FunctionDeclare(n) => { - let count = match self.get_dot_count(&mut iter) { - Some(count) => count, - None => return Err(TokenizeError::InvalidDynamicOperator(token)), - }; - Op::FunctionDeclare(n + count) - } - Op::LambdaDefine(n) => { - let count = match self.get_dot_count(&mut iter) { - Some(count) => count, - None => return Err(TokenizeError::InvalidDynamicOperator(token)), - }; - Op::LambdaDefine(n + count) - } + Op::FunctionDefine(n) => Op::FunctionDefine(n + self.get_dot_count()), + Op::FunctionDeclare(n) => Op::FunctionDeclare(n + self.get_dot_count()), + Op::LambdaDefine(n) => Op::LambdaDefine(n + self.get_dot_count()), op => op.clone(), - }), token, self.filename.clone(), self.line, self.column); - - // at this point, token must be in the hashmap possible, otherwise it wouldn't have any matches + }); + + let (line, col) = self.getpos(); + + let token = Token::new(t, token, line, col); + self.tokens.push_back(token); break; } @@ -433,70 +355,49 @@ impl Tokenizer { } } - self.tokenize(iter) + self.tokenize() } else if c.is_whitespace() { - self.tokenize(iter) + self.tokenize() } else { - return Err(TokenizeError::InvalidCharacter(c)); + let (line, col) = self.getpos(); + + return Err( + Error::new(format!("an unidentified character {c} was found")) + .location(line, col - 1..col)); } } } impl Iterator for Tokenizer { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { if let Some(token) = self.tokens.pop_front() { return Some(Ok(token)); - } + } else { + match self.tokenize() { + Ok(_) => (), + Err(e) => return Some(Err(e)), + }; - let mut input = String::new(); - - match self.reader.read_line(&mut input) { - Ok(0) => None, - Ok(_n) => { - self.code.push_str(&input); - self.line += 1; - self.column = 0; - - match self.tokenize(input.chars().peekable()) { - Ok(()) => (), - Err(e) => return Some(Err(e)), - } - - self.next() - }, - Err(e) => Some(Err(TokenizeError::IO(e))), + self.next() } } } #[cfg(test)] mod tests { - use io::Cursor; - - use crate::parser::Parser; use super::*; - - #[test] - fn tokenizer() { - let program = ": length ?. x [] -> Int ?? x + 1 length tail x 0 length [ 1 2 3 ]"; - - let tokens: Vec = Tokenizer::new(Cursor::new(program), "").collect::>().unwrap(); - - println!("{tokens:#?}"); - } + use std::io::Cursor; #[test] fn a() { - let program = ": length ?. x [] -> Int ?? x + 1 length tail x 0 length [ 1 2 3 ]"; + let program = ": f a * 12 a f 12"; - let mut tokenizer = Tokenizer::new(Cursor::new(program), "").peekable(); + let tokenizer = Tokenizer::new(Arc::new(Mutex::new(CodeIter::new(Cursor::new(program))))); - let mut globals = HashMap::new(); - let mut parser = Parser::new(&mut tokenizer, &mut globals); + let t: Vec<_> = tokenizer.collect(); - let tree = parser.next(); - println!("{tree:#?}"); + println!("{t:#?}"); } } \ No newline at end of file