new error handling!

This commit is contained in:
2024-10-27 22:01:20 -04:00
parent ca6b977220
commit c29f689252
7 changed files with 590 additions and 582 deletions

81
src/error.rs Normal file
View File

@@ -0,0 +1,81 @@
use std::error;
use std::fmt;
use std::ops::Range;
#[derive(Debug, Clone)]
pub struct Error {
message: String,
note: Option<String>,
file: Option<String>,
code: Option<String>,
location: Option<(usize, Range<usize>)>,
}
impl Error {
pub(crate) fn new(message: String) -> Self {
Self {
message,
note: None,
file: None,
code: None,
location: None
}
}
pub(crate) fn note(mut self, note: String) -> Self {
self.note = Some(note);
self
}
pub(crate) fn file(mut self, file: String) -> Self {
self.file = Some(file);
self
}
pub(crate) fn location(mut self, line: usize, r: Range<usize>) -> Self {
self.location = Some((line, r));
self
}
pub(crate) fn code(mut self, code: String) -> Self {
self.code = Some(code);
self
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.message)?;
if let Some((line, loc)) = &self.location {
let filename = self.file.clone().unwrap_or("<unknown>".into());
if let Some(code) = &self.code {
let mut lines = code.lines();
let linect = match lines.nth(*line) {
Some(l) => l,
None => return Ok(()), // there should probably be an error if the line number is somehow out of range
};
write!(f, "\n| --> {filename}:{line}:{}\n| {linect}\n", loc.start)?;
let spaces = " ".repeat(loc.start);
let pointers: String = loc.clone().map(|_| '^').collect();
write!(f, "|{spaces}{pointers}")?;
if let Some(note) = &self.note {
write!(f, " {note}")?;
}
} else {
write!(f, " @ {filename}:{line}:{}", loc.start)?;
}
}
Ok(())
}
}
impl error::Error for Error {}

View File

@@ -1,102 +1,66 @@
use super::{Value, Type, Object};
use super::parser::{ParseTree, ParseError};
use super::parser::ParseTree;
use super::tokenizer::Op;
use super::error::Error;
use std::collections::HashMap;
use std::fmt::Display;
use std::error::Error;
use std::io;
use std::sync::{Arc, Mutex};
#[derive(Debug)]
pub enum RuntimeError {
ParseError(ParseError),
NoOverloadForTypes(String, Vec<Value>),
ImmutableError(String),
VariableUndefined(String),
FunctionUndeclared(String),
FunctionUndefined(String),
NotAVariable(String),
ParseFail(String, Type),
TypeError(Type, Type),
EmptyArray,
IO(io::Error),
}
impl Display for RuntimeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ParseError(e) => write!(f, "Parser Error: {e}"),
Self::NoOverloadForTypes(op, values)
=> write!(f, "No overload of `{op}` exists for the operands `{}`",
values.iter().map(|x| format!("{}({x})", x.get_type())).collect::<Vec<_>>().join(", ")),
Self::ImmutableError(ident) => write!(f, "`{ident}` already exists and cannot be redefined"),
Self::VariableUndefined(ident) => write!(f, "variable `{ident}` was not defined"),
Self::FunctionUndeclared(ident) => write!(f, "function `{ident}` was not declared"),
Self::FunctionUndefined(ident) => write!(f, "function `{ident}` was not defined"),
Self::NotAVariable(ident) => write!(f, "`{ident}` is a function but was attempted to be used like a variable"),
Self::ParseFail(s, t) => write!(f, "`\"{s}\"` couldn't be parsed into {}", t),
Self::IO(e) => write!(f, "{e}"),
Self::TypeError(left, right) => write!(f, "expected type `{left}` but got type `{right}`"),
Self::EmptyArray => write!(f, "attempt to access element from an empty array"),
}
}
}
impl Error for RuntimeError {}
/// Executes an input of ParseTrees
pub struct Executor<'a, I>
where
I: Iterator<Item = Result<ParseTree, ParseError>>
{
exprs: &'a mut I,
globals: &'a mut HashMap<String, Arc<Mutex<Object>>>,
pub(crate) struct Executor {
globals: HashMap<String, Arc<Mutex<Object>>>,
locals: HashMap<String, Arc<Mutex<Object>>>,
}
impl<'a, I> Executor<'a, I>
where
I: Iterator<Item = Result<ParseTree, ParseError>>,
{
pub fn new(exprs: &'a mut I, globals: &'a mut HashMap<String, Arc<Mutex<Object>>>) -> Self {
impl Executor {
pub(crate) fn new() -> Self {
Self {
exprs,
globals,
globals: HashMap::new(),
locals: HashMap::new(),
}
}
pub fn _add_global(self, k: String, v: Arc<Mutex<Object>>) -> Self {
pub(crate) fn values<I>(mut self, iter: I) -> impl Iterator<Item = Result<Value, Error>>
where
I: Iterator<Item = Result<ParseTree, Error>>
{
iter.map(move |x| self.exec(x?))
}
pub(crate) fn add_global(mut self, k: String, v: Arc<Mutex<Object>>) -> Self {
self.globals.insert(k, v);
self
}
pub fn locals(mut self, locals: HashMap<String, Arc<Mutex<Object>>>) -> Self {
pub(crate) fn add_globals<Globals: IntoIterator<Item = (String, Arc<Mutex<Object>>)>>(self, globals: Globals) -> Self {
globals.into_iter().fold(self, |acc, (k, v)| acc.add_global(k, v))
}
pub(crate) fn locals(mut self, locals: HashMap<String, Arc<Mutex<Object>>>) -> Self {
self.locals = locals;
self
}
pub fn add_local(mut self, k: String, v: Arc<Mutex<Object>>) -> Self {
pub(crate) fn add_local(mut self, k: String, v: Arc<Mutex<Object>>) -> Self {
self.locals.insert(k, v);
self
}
fn _get_object(&self, ident: &String) -> Result<&Arc<Mutex<Object>>, RuntimeError> {
fn _get_object(&self, ident: &String) -> Result<&Arc<Mutex<Object>>, Error> {
self.locals.get(ident).or(self.globals.get(ident))
.ok_or(RuntimeError::VariableUndefined(ident.clone()))
.ok_or(Error::new(format!("undefined identifier {}", ident.clone())))
}
fn get_object_mut(&mut self, ident: &String) -> Result<&mut Arc<Mutex<Object>>, RuntimeError> {
fn get_object_mut(&mut self, ident: &String) -> Result<&mut Arc<Mutex<Object>>, Error> {
self.locals.get_mut(ident).or(self.globals.get_mut(ident))
.ok_or(RuntimeError::VariableUndefined(ident.clone()))
.ok_or(Error::new(format!("undefined identifier {}", ident.clone())))
}
fn variable_exists(&self, ident: &String) -> bool {
self.locals.contains_key(ident) || self.globals.contains_key(ident)
}
fn eval(obj: &mut Arc<Mutex<Object>>) -> Result<Value, RuntimeError> {
fn eval(obj: &mut Arc<Mutex<Object>>) -> Result<Value, Error> {
let mut guard = obj.lock().unwrap();
let v = guard.eval()?;
@@ -120,11 +84,11 @@ where
locals
}
pub fn exec(&mut self, tree: Box<ParseTree>) -> Result<Value, RuntimeError> {
match *tree {
pub(crate) fn exec(&mut self, tree: ParseTree) -> Result<Value, Error> {
match tree {
ParseTree::Operator(op, args) => {
let args: Vec<Value> = args.into_iter()
.map(|x| self.exec(Box::new(x))).collect::<Result<_, _>>()?;
.map(|x| self.exec(x)).collect::<Result<_, _>>()?;
match op {
Op::Add => match &args[..] {
@@ -135,7 +99,7 @@ where
[Value::String(x), Value::String(y)] => Ok(Value::String(format!("{x}{y}"))),
[Value::Array(xtype, x), Value::Array(ytype, y)] => {
if xtype != ytype {
return Err(RuntimeError::TypeError(xtype.clone(), ytype.clone()));
return Err(Error::new(format!("expected type {} but found {}", xtype, ytype)));
}
Ok(Value::Array(xtype.clone(), [x.clone(), y.clone()].concat()))
@@ -146,7 +110,7 @@ where
let ytype = y.get_type();
if *t != ytype {
return Err(RuntimeError::TypeError(t.clone(), ytype));
return Err(Error::new(format!("expected type {} but found {}", t, ytype)));
}
// NOTE: use y's type instead of the arrays type.
@@ -158,13 +122,13 @@ where
let xtype = x.get_type();
if *t != xtype {
return Err(RuntimeError::TypeError(t.clone(), xtype));
return Err(Error::new(format!("expected type {} but found {}", t, xtype)));
}
// NOTE: read above
Ok(Value::Array(xtype, [vec![x.clone()], y.clone()].concat()))
},
_ => Err(RuntimeError::NoOverloadForTypes("+".into(), args)),
_ => Err(Error::new("todo: add".into())),
}
Op::Sub => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Int(x - y)),
@@ -173,7 +137,7 @@ where
[Value::Float(x), Value::Float(y)] => Ok(Value::Float(x - y)),
[Value::Nil, x] => Ok(x.clone()),
[x, Value::Nil] => Ok(x.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("-".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::Mul => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Int(x * y)),
@@ -182,7 +146,7 @@ where
[Value::Float(x), Value::Float(y)] => Ok(Value::Float(x * y)),
[Value::Nil, x] => Ok(x.clone()),
[x, Value::Nil] => Ok(x.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("*".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::Div => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Float(*x as f64 / *y as f64)),
@@ -191,7 +155,7 @@ where
[Value::Float(x), Value::Float(y)] => Ok(Value::Float(x / y)),
[Value::Nil, x] => Ok(x.clone()),
[x, Value::Nil] => Ok(x.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("/".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::FloorDiv => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Int(x / y)),
@@ -200,7 +164,7 @@ where
[Value::Float(x), Value::Float(y)] => Ok(Value::Int(*x as i64 / *y as i64)),
[Value::Nil, x] => Ok(x.clone()),
[x, Value::Nil] => Ok(x.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("//".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::Exp => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Float((*x as f64).powf(*y as f64))),
@@ -209,7 +173,7 @@ where
[Value::Float(x), Value::Float(y)] => Ok(Value::Float(x.powf(*y))),
[Value::Nil, x] => Ok(x.clone()),
[x, Value::Nil] => Ok(x.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("**".into(), args)),
_ => Err(Error::new("todo: fsadfdsf".into())),
}
Op::Mod => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Int(x % y)),
@@ -218,85 +182,85 @@ where
[Value::Float(x), Value::Float(y)] => Ok(Value::Float(x % y)),
[Value::Nil, x] => Ok(x.clone()),
[x, Value::Nil] => Ok(x.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("%".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::GreaterThan => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x > y)),
[Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x > *y as f64)),
[Value::Int(x), Value::Float(y)] => Ok(Value::Bool(*x as f64 > *y)),
[Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x > y)),
_ => Err(RuntimeError::NoOverloadForTypes(">".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::GreaterThanOrEqualTo => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x >= y)),
[Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x >= *y as f64)),
[Value::Int(x), Value::Float(y)] => Ok(Value::Bool(*x as f64 >= *y)),
[Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x >= y)),
_ => Err(RuntimeError::NoOverloadForTypes(">=".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::LessThan => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x < y)),
[Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x < *y as f64)),
[Value::Int(x), Value::Float(y)] => Ok(Value::Bool((*x as f64) < *y)),
[Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x < y)),
_ => Err(RuntimeError::NoOverloadForTypes("<".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::LessThanOrEqualTo => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x <= y)),
[Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x <= *y as f64)),
[Value::Int(x), Value::Float(y)] => Ok(Value::Bool(*x as f64 <= *y)),
[Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x <= y)),
_ => Err(RuntimeError::NoOverloadForTypes("<=".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::EqualTo => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x == y)),
[Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x == *y as f64)),
[Value::Int(x), Value::Float(y)] => Ok(Value::Bool(*x as f64 == *y)),
[Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x == y)),
_ => Err(RuntimeError::NoOverloadForTypes("==".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::NotEqualTo => match &args[..] {
[Value::Int(x), Value::Int(y)] => Ok(Value::Bool(x != y)),
[Value::Float(x), Value::Int(y)] => Ok(Value::Bool(*x != *y as f64)),
[Value::Int(x), Value::Float(y)] => Ok(Value::Bool(*x as f64 != *y)),
[Value::Float(x), Value::Float(y)] => Ok(Value::Bool(x != y)),
_ => Err(RuntimeError::NoOverloadForTypes("!=".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::Not => match &args[0] {
Value::Bool(b) => Ok(Value::Bool(!b)),
_ => Err(RuntimeError::NoOverloadForTypes("!".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::Or => match &args[..] {
[Value::Bool(x), Value::Bool(y)] => Ok(Value::Bool(*x || *y)),
[Value::Nil, x] => Ok(x.clone()),
[x, Value::Nil] => Ok(x.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("||".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::And => match &args[..] {
[Value::Bool(x), Value::Bool(y)] => Ok(Value::Bool(*x && *y)),
[Value::Nil, x] => Ok(x.clone()),
[x, Value::Nil] => Ok(x.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("&&".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::Compose => match &args[..] {
[_, v] => Ok(v.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("~".into(), args)),
_ => Err(Error::new("todo: actual error output".into())),
}
Op::Head => match &args[0] {
Value::Array(_, x) => Ok(x.first().ok_or(RuntimeError::EmptyArray)?.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("head".into(), args)),
Value::Array(_, x) => Ok(x.first().ok_or(Error::new(format!("passed an empty array to head")))?.clone()),
_ => Err(Error::new("head".into())),
}
Op::Tail => match &args[0] {
Value::Array(t, x) => Ok(Value::Array(t.clone(), if x.len() > 0 { x[1..].to_vec() } else { vec![] })),
_ => Err(RuntimeError::NoOverloadForTypes("head".into(), args)),
_ => Err(Error::new("tail".into())),
}
Op::Init => match &args[0] {
Value::Array(t, x) => Ok(Value::Array(t.clone(), if x.len() > 0 { x[..x.len() - 1].to_vec() } else { vec![] })),
_ => Err(RuntimeError::NoOverloadForTypes("head".into(), args)),
_ => Err(Error::new("init".into())),
}
Op::Fini => match &args[0] {
Value::Array(_, x) => Ok(x.last().ok_or(RuntimeError::EmptyArray)?.clone()),
_ => Err(RuntimeError::NoOverloadForTypes("head".into(), args)),
Value::Array(_, x) => Ok(x.last().ok_or(Error::new(format!("passed an empty array to fini")))?.clone()),
_ => Err(Error::new("fini".into())),
}
Op::Id => match &args[0] {
x => Ok(x.clone()),
@@ -306,20 +270,20 @@ where
Value::Float(x) => Ok(Value::Int(*x as i64)),
Value::Bool(x) => Ok(Value::Int(if *x { 1 } else { 0 })),
Value::String(x) => {
let r: i64 = x.parse().map_err(|_| RuntimeError::ParseFail(x.clone(), Type::Int))?;
let r: i64 = x.parse().map_err(|_| Error::new(format!("failed to parse {} into {}", x, Type::Int)))?;
Ok(Value::Int(r))
}
x => Err(RuntimeError::NoOverloadForTypes("int".into(), vec![x.clone()])),
x => Err(Error::new(format!("no possible conversion from {} into {}", x, Type::Int))),
}
Op::FloatCast => match &args[0] {
Value::Int(x) => Ok(Value::Float(*x as f64)),
Value::Float(x) => Ok(Value::Float(*x)),
Value::Bool(x) => Ok(Value::Float(if *x { 1.0 } else { 0.0 })),
Value::String(x) => {
let r: f64 = x.parse().map_err(|_| RuntimeError::ParseFail(x.clone(), Type::Int))?;
let r: f64 = x.parse().map_err(|_| Error::new(format!("failed to parse {} into {}", x, Type::Float)))?;
Ok(Value::Float(r))
}
x => Err(RuntimeError::NoOverloadForTypes("float".into(), vec![x.clone()])),
x => Err(Error::new(format!("no possible conversion from {} into {}", x, Type::Float))),
}
Op::BoolCast => match &args[0] {
Value::Int(x) => Ok(Value::Bool(*x != 0)),
@@ -327,7 +291,7 @@ where
Value::Bool(x) => Ok(Value::Bool(*x)),
Value::String(x) => Ok(Value::Bool(!x.is_empty())),
Value::Array(_, vec) => Ok(Value::Bool(!vec.is_empty())),
x => Err(RuntimeError::NoOverloadForTypes("bool".into(), vec![x.clone()])),
x => Err(Error::new(format!("no possible conversion from {} into {}", x, Type::Bool))),
}
Op::StringCast => Ok(Value::String(format!("{}", &args[0]))),
Op::Print => match &args[0] {
@@ -345,38 +309,38 @@ where
}
ParseTree::Equ(ident, body, scope) => {
if self.variable_exists(&ident) {
Err(RuntimeError::ImmutableError(ident.clone()))
Err(Error::new(format!("attempt to override value of variable {ident}")))
} else {
let value = self.exec(body)?;
let value = self.exec(*body)?;
let g = self.globals.clone();
Executor::new(self.exprs, &mut self.globals)
Executor::new()
.locals(self.locals.clone())
.add_local(ident, Arc::new(Mutex::new(Object::value(value, g, self.locals.to_owned()))))
.exec(scope)
.exec(*scope)
}
},
ParseTree::LazyEqu(ident, body, scope) => {
if self.variable_exists(&ident) {
Err(RuntimeError::ImmutableError(ident.clone()))
Err(Error::new(format!("attempt to override value of variable {ident}")))
} else {
let g = self.globals.clone();
Executor::new(self.exprs, &mut self.globals)
Executor::new()
.locals(self.locals.clone())
.add_local(ident, Arc::new(Mutex::new(Object::variable(*body, g, self.locals.to_owned()))))
.exec(scope)
.exec(*scope)
}
},
ParseTree::FunctionDefinition(func, scope) => {
let g = self.globals.clone();
Executor::new(self.exprs, &mut self.globals)
Executor::new()
.locals(self.locals.clone())
.add_local(func.name().unwrap().to_string(),
Arc::new(Mutex::new(Object::function(
func
.globals(g)
.locals(self.locals.clone()), HashMap::new(), HashMap::new()))))
.exec(scope)
.exec(*scope)
},
ParseTree::FunctionCall(ident, args) => {
let obj = self.get_object_mut(&ident)?;
@@ -394,35 +358,35 @@ where
f.call(args)
},
_ => Err(RuntimeError::FunctionUndefined(ident.clone()))
_ => Err(Error::new(format!("the function {ident} is not defined")))
}
},
ParseTree::_FunctionCallLocal(_idx, _args) => todo!(),
ParseTree::If(cond, body) => if match self.exec(cond)? {
ParseTree::If(cond, body) => if match self.exec(*cond)? {
Value::Float(f) => f != 0.0,
Value::Int(i) => i != 0,
Value::Bool(b) => b,
Value::String(s) => !s.is_empty(),
Value::Array(_, vec) => !vec.is_empty(),
Value::Nil => false,
x => return Err(RuntimeError::NoOverloadForTypes("?".into(), vec![x])),
x => return Err(Error::new(format!("could not convert {x} into a bool for truthiness check"))),
} {
self.exec(body)
self.exec(*body)
} else {
Ok(Value::Nil)
},
ParseTree::IfElse(cond, istrue, isfalse) => if match self.exec(cond)? {
ParseTree::IfElse(cond, istrue, isfalse) => if match self.exec(*cond)? {
Value::Float(f) => f != 0.0,
Value::Int(i) => i != 0,
Value::Bool(b) => b,
Value::String(s) => !s.is_empty(),
Value::Array(_, vec) => !vec.is_empty(),
Value::Nil => false,
x => return Err(RuntimeError::NoOverloadForTypes("??".into(), vec![x])),
x => return Err(Error::new(format!("could not convert {x} into a bool for truthiness check"))),
} {
self.exec(istrue)
self.exec(*istrue)
} else {
self.exec(isfalse)
self.exec(*isfalse)
},
ParseTree::Variable(ident) => {
let obj = self.get_object_mut(&ident)?;
@@ -436,7 +400,7 @@ where
ParseTree::Nop => Ok(Value::Nil),
ParseTree::Export(names) => {
for name in names {
let obj = self.locals.remove(&name).ok_or(RuntimeError::VariableUndefined(name.clone()))?;
let obj = self.locals.remove(&name).ok_or(Error::new(format!("attempt to export an object that was not defined")))?;
self.globals.insert(name, obj);
}
@@ -454,17 +418,3 @@ where
}
}
}
impl<'a, I: Iterator<Item = Result<ParseTree, ParseError>>> Iterator for Executor<'a, I> {
type Item = Result<Value, RuntimeError>;
fn next(&mut self) -> Option<Self::Item> {
let expr = self.exprs.next();
match expr {
Some(Ok(expr)) => Some(self.exec(Box::new(expr))),
Some(Err(e)) => Some(Err(RuntimeError::ParseError(e))),
None => None,
}
}
}

View File

@@ -1,6 +1,7 @@
use crate::parser::ParseTree;
use crate::executor::{Executor, RuntimeError};
use crate::executor::Executor;
use crate::{Type, Object, Value};
use super::error::Error;
use std::collections::HashMap;
use std::fmt::{self, Display};
@@ -140,13 +141,11 @@ impl Function {
self.t.clone()
}
pub(crate) fn call(&mut self, args: Vec<Arc<Mutex<Object>>>) -> Result<Value, RuntimeError> {
let mut tree = vec![Ok(*self.body.clone())].into_iter();
let mut globals = self.globals.clone().unwrap();
pub(crate) fn call(&mut self, args: Vec<Arc<Mutex<Object>>>) -> Result<Value, Error> {
let globals = self.globals.clone().unwrap();
let locals = self.locals.clone().unwrap();
let mut exec = Executor::new(&mut tree, &mut globals)
.locals(locals.clone());
let mut exec = Executor::new().add_globals(globals).locals(locals.clone());
if let Some(names) = self.arg_names.clone() {
for (obj, name) in std::iter::zip(args.clone().into_iter(), names.into_iter()) {
@@ -158,7 +157,7 @@ impl Function {
exec = exec.add_local(name, Arc::new(Mutex::new(Object::function(self.clone(), self.globals.clone().unwrap(), locals))));
}
exec.exec(self.body.clone())
exec.exec(*self.body.clone())
}
}

View File

@@ -2,17 +2,19 @@ mod tokenizer;
mod parser;
mod executor;
mod function;
mod error;
use executor::{Executor, RuntimeError};
use executor::Executor;
use parser::{ParseTree, Parser};
use tokenizer::Tokenizer;
use function::{FunctionType, Function};
use error::Error;
use core::str;
use std::collections::HashMap;
use std::fmt::Display;
use std::io::BufRead;
use std::fmt;
use std::iter::Peekable;
use std::sync::{Arc, Mutex};
#[derive(Clone, Debug)]
@@ -147,16 +149,15 @@ impl Object {
}
/// evaluate the tree inside an object if it isn't evaluated yet, returns the value
pub fn eval(&mut self) -> Result<Value, RuntimeError> {
pub fn eval(&mut self) -> Result<Value, Error> {
match self.value.clone() {
Cache::Cached(v) => Ok(v),
Cache::Uncached(tree) => {
let mut t = vec![Ok(tree.clone())].into_iter();
let mut exec = Executor::new(&mut t, &mut self.globals)
let mut exec = Executor::new()
.add_globals(self.globals.clone())
.locals(self.locals.clone());
let v = exec.exec(Box::new(tree))?;
let v = exec.exec(tree)?;
self.value = Cache::Cached(v.clone());
@@ -174,20 +175,111 @@ impl Object {
}
}
pub struct Runtime<'a, R: BufRead> {
tokenizer: Peekable<Tokenizer<R>>,
global_types: HashMap<String, Type>,
globals: HashMap<String, Arc<Mutex<Object>>>,
parser: Option<Parser<'a, Tokenizer<R>>>,
/// A custom type used in the tokenizer to automatically keep track of which character we are on
pub(crate) struct CodeIter<R: BufRead> {
reader: R,
code: String,
// position in code
pos: usize,
// the current line number
line: usize,
// column in the current line
column: usize,
}
impl<'a, R: BufRead> Runtime<'a, R> {
pub fn new(reader: R, name: &str) -> Self {
impl<R: BufRead> CodeIter<R> {
fn new(reader: R) -> Self {
Self {
tokenizer: Tokenizer::new(reader, name).peekable(),
reader,
code: String::new(),
pos: 0,
line: 0,
column: 0,
}
}
pub(crate) fn getpos(&self) -> (usize, usize) {
(self.line, self.column)
}
fn code(&self) -> String {
self.code.clone()
}
// Peekable is useless here because I cannot access the inner object otherwise
pub(crate) fn peek(&mut self) -> Option<char> {
if let Some(c) = self.code.chars().nth(self.pos) {
Some(c)
} else {
match self.reader.read_line(&mut self.code) {
Ok(0) => return None,
Ok(_) => (),
Err(_e) => panic!("aaaa"),
};
self.peek()
}
}
pub(crate) fn next_if(&mut self, func: impl FnOnce(&char) -> bool) -> Option<char> {
let c = self.peek()?;
if (func)(&c) {
self.next()
} else {
None
}
}
}
impl<R: BufRead> Iterator for CodeIter<R> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
if let Some(c) = self.code.chars().nth(self.pos) {
match c {
'\n' => {
self.line += 1;
self.column = 0;
self.pos += 1;
None
},
c => {
self.column += 1;
self.pos += 1;
Some(c)
}
}
} else {
match self.reader.read_line(&mut self.code) {
Ok(0) => return None,
Ok(_) => (),
Err(_e) => panic!("aaaa"),
};
self.next()
}
}
}
pub struct Runtime<R: BufRead> {
reader: Arc<Mutex<CodeIter<R>>>,
filename: String,
global_types: HashMap<String, Type>,
globals: HashMap<String, Arc<Mutex<Object>>>,
}
impl<R: BufRead> Runtime<R> {
pub fn new(reader: R, filename: &str) -> Self {
Self {
reader: Arc::new(Mutex::new(CodeIter::new(reader))),
filename: filename.to_string(),
global_types: HashMap::new(),
globals: HashMap::new(),
parser: None,
}.add_global("version'", Value::String(
format!("{} ({}/{})",
env!("CARGO_PKG_VERSION"),
@@ -197,6 +289,12 @@ impl<'a, R: BufRead> Runtime<'a, R> {
))
}
pub fn code(&self) -> String {
let reader = self.reader.lock().unwrap();
let code = reader.code();
code
}
pub fn add_global(mut self, name: &str, value: Value) -> Self {
self.global_types.insert(name.to_string(), value.get_type());
self.globals.insert(name.to_string(),
@@ -207,9 +305,27 @@ impl<'a, R: BufRead> Runtime<'a, R> {
self
}
}
pub fn values(&'a mut self) -> impl Iterator<Item = Result<Value, RuntimeError>> + 'a {
self.parser = Some(Parser::new(&mut self.tokenizer, &mut self.global_types));
Executor::new(self.parser.as_mut().unwrap(), &mut self.globals)
impl<R: BufRead> Iterator for Runtime<R> {
type Item = Result<Value, Error>;
fn next(&mut self) -> Option<Self::Item> {
let tokenizer = Tokenizer::new(self.reader.clone());
let tree = Parser::new()
.add_globals(self.global_types.clone())
.parse(&mut tokenizer.peekable());
let tree = match tree.map_err(|e| e
.code(self.code())
.file(self.filename.clone()))
{
Ok(Some(tree)) => tree,
Ok(None) => return None,
Err(e) => return Some(Err(e))
};
Some(Executor::new().add_globals(self.globals.clone()).exec(tree))
}
}

View File

@@ -1,12 +1,12 @@
use std::io::{self, BufReader};
fn main() {
let mut runtime = lamm::Runtime::new(BufReader::new(io::stdin()), "<stdin>");
let runtime = lamm::Runtime::new(BufReader::new(io::stdin()), "<stdin>");
for value in runtime.values() {
for value in runtime {
match value {
Ok(v) => println!("=> {v}"),
Err(e) => eprintln!("{e}"),
Err(e) => eprintln!("error: {e}"),
}
}
}

View File

@@ -1,46 +1,11 @@
use crate::executor::Executor;
use super::{Value, Type, Function, FunctionType};
use super::tokenizer::{Token, TokenType, TokenizeError, Op};
use super::tokenizer::{Token, TokenType, Op};
use super::error::Error;
use std::borrow::BorrowMut;
use std::error;
use std::collections::HashMap;
use std::fmt::Display;
use std::iter::Peekable;
#[derive(Debug)]
pub enum ParseError {
NoInput,
UnexpectedEndInput,
IdentifierUndefined(Token),
InvalidIdentifier(Token),
UnmatchedArrayClose,
UnwantedToken(Token),
TokenizeError(TokenizeError),
ImmutableError(String),
RuntimeError,
}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ParseError::UnexpectedEndInput => write!(f, "Input ended unexpectedly"),
ParseError::IdentifierUndefined(name) => write!(f, "Undefined identifier `{}` {}:{}:{}", name.lexeme, name.file, name.line, name.location.start),
ParseError::InvalidIdentifier(t) => write!(f, "Invalid identifier `{t:?}`"),
ParseError::NoInput => write!(f, "No input given"),
ParseError::UnmatchedArrayClose => write!(f, "there was an unmatched array closing operator `]`"),
ParseError::TokenizeError(e) => write!(f, "Tokenizer Error: {e}"),
ParseError::RuntimeError => write!(f, "Runtime Error"),
ParseError::ImmutableError(i) => write!(f, "attempt to redeclare {i} met with force"),
ParseError::UnwantedToken(t) => write!(f, "unexpected token {t:?}"),
}
}
}
impl error::Error for ParseError {}
#[derive(Clone, Debug)]
pub(crate) enum ParseTree {
Operator(Op, Vec<ParseTree>),
@@ -69,45 +34,50 @@ pub(crate) enum ParseTree {
}
/// Parses input tokens and produces ParseTrees for an Executor
pub(crate) struct Parser<'a, I: Iterator<Item = Result<Token, TokenizeError>>> {
tokens: &'a mut Peekable<I>,
globals: &'a mut HashMap<String, Type>,
#[derive(Clone)]
pub(crate) struct Parser {
globals: HashMap<String, Type>,
locals: HashMap<String, Type>,
}
impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
pub fn new(tokens: &'a mut Peekable<I>, globals: &'a mut HashMap<String, Type>) -> Self {
impl Parser {
pub(crate) fn new() -> Self {
Self {
tokens,
globals,
globals: HashMap::new(),
locals: HashMap::new()
}
}
pub(crate) fn trees<I: Iterator<Item = Result<Token, Error>>>(mut self, mut tokens: Peekable<I>) -> impl Iterator<Item = Result<ParseTree, Error>> {
std::iter::from_fn(move || {
match self.parse(&mut tokens) {
Ok(Some(tree)) => Some(Ok(tree)),
Ok(None) => None,
Err(e) => Some(Err(e)),
}
})
}
pub fn _add_global(self, k: String, v: Type) -> Self {
pub(crate) fn add_global(mut self, k: String, v: Type) -> Self {
self.globals.insert(k, v);
self
}
pub fn _add_globals<Items: Iterator<Item = (String, Type)>>(self, items: Items) -> Self {
items.for_each(|(name, t)| {
self.globals.insert(name, t);
});
self
pub(crate) fn add_globals<Items: IntoIterator<Item = (String, Type)>>(self, items: Items) -> Self {
items.into_iter().fold(self, |acc, (k, v)| acc.add_global(k, v))
}
pub fn locals(mut self, locals: HashMap<String, Type>) -> Self {
pub(crate) fn locals(mut self, locals: HashMap<String, Type>) -> Self {
self.locals = locals;
self
}
pub fn add_local(mut self, k: String, v: Type) -> Self {
pub(crate) fn add_local(mut self, k: String, v: Type) -> Self {
self.locals.insert(k, v);
self
}
pub fn _add_locals<Items: Iterator<Item = (String, Type)>>(mut self, items: Items) -> Self {
pub(crate) fn _add_locals<Items: Iterator<Item = (String, Type)>>(mut self, items: Items) -> Self {
items.for_each(|(name, t)| {
self.locals.insert(name, t);
});
@@ -123,15 +93,15 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
}
// get at most count arguments
fn get_args(&mut self, count: usize) -> Result<Vec<ParseTree>, ParseError> {
(0..count).map_while(|_| match self.parse() {
Ok(r) => Some(Ok(r)),
Err(ParseError::NoInput) => None,
fn get_args<I: Iterator<Item = Result<Token, Error>>>(&mut self, tokens: &mut Peekable<I>, count: usize) -> Result<Vec<ParseTree>, Error> {
(0..count).map_while(|_| match self.parse(tokens) {
Ok(Some(tree)) => Some(Ok(tree)),
Ok(None) => None,
Err(e) => Some(Err(e)),
}).collect()
}
fn parse_operator(&mut self, op: Op) -> Result<ParseTree, ParseError> {
fn parse_operator<I: Iterator<Item = Result<Token, Error>>>(&mut self, tokens: &mut Peekable<I>, op: Op) -> Result<ParseTree, Error> {
let operators: HashMap<Op, FunctionType> = HashMap::from([
(Op::Add, FunctionType(Box::new(Type::Any), vec![Type::Any, Type::Any])),
(Op::Sub, FunctionType(Box::new(Type::Any), vec![Type::Any, Type::Any])),
@@ -162,7 +132,7 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
]);
let operator = operators.get(&op).expect("All operators should be accounted for");
let args = self.get_args(operator.1.len())?;
let args = self.get_args(tokens, operator.1.len())?;
if args.len() == operator.1.len() {
Ok(ParseTree::Operator(op, args))
@@ -188,18 +158,22 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
}
}
fn parse(&mut self) -> Result<ParseTree, ParseError> {
let token = self.tokens.next()
.ok_or(ParseError::NoInput)?
.map_err(|e| ParseError::TokenizeError(e))?;
pub(crate) fn parse<I: Iterator<Item = Result<Token, Error>>>(&mut self, tokens: &mut Peekable<I>) -> Result<Option<ParseTree>, Error> {
let token = match tokens.next() {
Some(Ok(t)) => t,
Some(Err(e)) => return Err(e),
None => return Ok(None),
};
match token.token() {
TokenType::Constant(c) => Ok(ParseTree::Value(c)),
TokenType::Constant(c) => Ok(Some(ParseTree::Value(c))),
TokenType::Identifier(ident) => {
match self.get_object_type(&ident).ok_or(ParseError::IdentifierUndefined(token))? {
match self.get_object_type(&ident).ok_or(
Error::new(format!("undefined identifier {ident}"))
.location(token.line, token.location))? {
Type::Function(f) => {
let f = f.clone();
let args = self.get_args(f.1.len())?;
let args = self.get_args(tokens, f.1.len())?;
if args.len() < f.1.len() {
let mut counter = 0;
@@ -212,19 +186,19 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
}).unzip();
let function_type = FunctionType(f.0.clone(), types);
Ok(ParseTree::Value(Value::Function(Function::lambda(
Ok(Some(ParseTree::Value(Value::Function(Function::lambda(
function_type,
names.clone(),
Box::new(ParseTree::FunctionCall(ident,
vec![
args,
names.into_iter().map(|x| ParseTree::Variable(x)).collect()
].concat()))))))
].concat())))))))
} else {
Ok(ParseTree::FunctionCall(ident, args))
Ok(Some(ParseTree::FunctionCall(ident, args)))
}
}
_ => Ok(ParseTree::Variable(ident)),
_ => Ok(Some(ParseTree::Variable(ident))),
}
},
TokenType::Operator(op) => match op {
@@ -233,7 +207,7 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
// take tokens until we reach the end of this array
// if we don't collect them here it causes rust to overflow computing the types
let array_tokens = self.tokens.by_ref().take_while(|t| match t {
let array_tokens = tokens.by_ref().take_while(|t| match t {
Ok(t) => match t.token() {
TokenType::Operator(Op::OpenArray) => {
depth += 1;
@@ -246,32 +220,33 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
_ => true,
}
_ => true,
}).collect::<Result<Vec<_>, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?;
}).collect::<Result<Vec<_>, Error>>()?;
let mut array_tokens = array_tokens
let array_tokens = array_tokens
.into_iter()
.map(|t| Ok(t))
.collect::<Vec<Result<Token, TokenizeError>>>()
.collect::<Vec<Result<Token, Error>>>()
.into_iter()
.peekable();
let trees: Vec<ParseTree> = Parser::new(&mut array_tokens, self.globals.borrow_mut())
let trees: Vec<ParseTree> = Parser::new()
.locals(self.locals.to_owned())
.collect::<Result<_, ParseError>>()?;
.trees(array_tokens)
.collect::<Result<_, Error>>()?;
let tree = trees.into_iter().fold(
ParseTree::Value(Value::Array(Type::Any, vec![])),
|acc, x| ParseTree::Operator(Op::Add, vec![acc, x.clone()]),
);
Ok(tree)
Ok(Some(tree))
},
Op::OpenStatement => {
let mut depth = 1;
// take tokens until we reach the end of this array
// if we don't collect them here it causes rust to overflow computing the types
let array_tokens = self.tokens.by_ref().take_while(|t| match t {
let array_tokens = tokens.by_ref().take_while(|t| match t {
Ok(t) => match t.token() {
TokenType::Operator(Op::OpenStatement) => {
depth += 1;
@@ -284,122 +259,135 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
_ => true,
}
_ => true,
}).collect::<Result<Vec<_>, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?;
}).collect::<Result<Vec<_>, Error>>()?;
let mut array_tokens = array_tokens
let array_tokens = array_tokens
.into_iter()
.map(|t| Ok(t))
.collect::<Vec<Result<Token, TokenizeError>>>()
.collect::<Vec<Result<Token, Error>>>()
.into_iter()
.peekable();
let trees: Vec<ParseTree> = Parser::new(&mut array_tokens, self.globals.borrow_mut())
let trees: Vec<ParseTree> = Parser::new()
.locals(self.locals.to_owned())
.collect::<Result<_, ParseError>>()?;
.trees(array_tokens)
.collect::<Result<_, Error>>()?;
let tree = trees.into_iter().fold(
ParseTree::Nop,
|acc, x| ParseTree::Operator(Op::Compose, vec![acc, x.clone()]),
);
Ok(tree)
Ok(Some(tree))
},
Op::Equ | Op::LazyEqu => {
let token = self.tokens.next().ok_or(ParseError::UnexpectedEndInput)?.map_err(|e| ParseError::TokenizeError(e))?;
let body = Box::new(self.parse()?);
Op::Equ => {
let token = tokens.next()
.ok_or(Error::new("no identifier given for = expression".into())
.location(token.line, token.location)
.note("expected an identifier after this token".into()))??;
if let TokenType::Identifier(ident) = token.token() {
match op {
Op::Equ => Ok(ParseTree::Equ(
ident.clone(),
body,
Box::new(Parser::new(self.tokens.by_ref(), self.globals.borrow_mut())
.locals(self.locals.clone())
.add_local(ident, Type::Any)
.parse()?))
),
Op::LazyEqu => Ok(ParseTree::LazyEqu(
ident.clone(),
body,
Box::new(Parser::new(self.tokens.by_ref(), self.globals.borrow_mut())
.locals(self.locals.clone())
.add_local(ident, Type::Any)
.parse()?))
),
_ => unreachable!(),
}
let body = Box::new(self.parse(tokens)?.ok_or(Error::new(format!("the variable `{ident}` has no value"))
.location(token.line, token.location.clone())
.note("expected a value after this identifier".into()))?);
let scope = Parser::new()
.locals(self.locals.clone())
.add_local(ident.clone(), Type::Any)
.parse(tokens)?
.ok_or(Error::new("variable declaration requires a scope defined after it".into())
.location(token.line, token.location)
.note(format!("this variable {ident} has no scope")))?;
Ok(Some(ParseTree::Equ(
ident.clone(),
body,
Box::new(scope))
))
} else {
Err(ParseError::InvalidIdentifier(token))
Err(Error::new(format!("`{}` is not a valid identifier", token.lexeme)).location(token.line, token.location))
}
},
Op::LazyEqu => {
let token = tokens.next()
.ok_or(Error::new("no identifier given for = expression".into())
.location(token.line, token.location)
.note("expected an identifier after this token".into()))??;
if let TokenType::Identifier(ident) = token.token() {
let body = Box::new(self.parse(tokens)?.ok_or(Error::new(format!("the variable `{ident}` has no value"))
.location(token.line, token.location.clone())
.note("expected a value after this identifier".into()))?);
let scope = Parser::new()
.locals(self.locals.clone())
.add_local(ident.clone(), Type::Any)
.parse(tokens)?
.ok_or(Error::new("variable declaration requires a scope defined after it".into())
.location(token.line, token.location)
.note(format!("this variable {ident} has no scope")))?;
Ok(Some(ParseTree::LazyEqu(
ident.clone(),
body,
Box::new(scope))
))
} else {
Err(Error::new(format!("`{}` is not a valid identifier", token.lexeme)).location(token.line, token.location))
}
},
Op::FunctionDefine(arg_count) => {
let f = self.parse_function_definition(arg_count)?;
let f = self.parse_function_definition(tokens, arg_count)?;
Ok(ParseTree::FunctionDefinition(
f.clone(),
Box::new(
Parser::new(self.tokens, self.globals.borrow_mut())
.locals(self.locals.clone())
.add_local(f.name().unwrap().to_string(), Type::Function(f.get_type()))
.parse()?
)))
let scope = Parser::new()
.locals(self.locals.clone())
.add_local(f.name().unwrap().to_string(), Type::Function(f.get_type()))
.parse(tokens)?
.ok_or(Error::new("function declaration requires a scope defined after it".into())
.location(token.line, token.location)
.note(format!("this function {} has no scope", f.name().unwrap())))?;
Ok(Some(ParseTree::FunctionDefinition( f.clone(), Box::new(scope))))
},
Op::LambdaDefine(arg_count) => {
let f = self.parse_lambda_definition(arg_count)?;
Ok(ParseTree::LambdaDefinition(f))
},
Op::Export => {
let list = self.parse()?;
let mut g = HashMap::new();
let list = Executor::new(&mut vec![Ok(list)].into_iter(), &mut g)
.next().unwrap().map_err(|_| ParseError::RuntimeError)?;
if let Value::Array(Type::String, items) = list {
let names = items.into_iter().map(|x| match x {
Value::String(s) => s,
_ => unreachable!(),
});
for name in names.clone() {
let t = self.locals.remove(&name).ok_or(ParseError::IdentifierUndefined(token.clone()))?;
self.globals.insert(name, t);
}
Ok(ParseTree::Export(names.collect()))
} else {
Err(ParseError::NoInput)
}
}
Op::Empty => Ok(ParseTree::Value(Value::Array(Type::Any, vec![]))),
Op::LambdaDefine(arg_count) => Ok(Some(ParseTree::LambdaDefinition(self.parse_lambda_definition(tokens, arg_count)?))),
Op::Empty => Ok(Some(ParseTree::Value(Value::Array(Type::Any, vec![])))),
Op::NonCall => {
let name = Self::get_identifier(self.tokens.next())?;
Ok(ParseTree::NonCall(name))
let name = Self::get_identifier(tokens.next())?;
Ok(Some(ParseTree::NonCall(name)))
},
Op::If => {
let cond = self.parse()?;
let truebranch = self.parse()?;
let cond = self.parse(tokens)?
.ok_or(Error::new("? statement requires a condition".into())
.location(token.line, token.location.clone()))?;
let truebranch = self.parse(tokens)?
.ok_or(Error::new("? statement requires a branch".into())
.location(token.line, token.location))?;
Ok(ParseTree::If(Box::new(cond), Box::new(truebranch)))
Ok(Some(ParseTree::If(Box::new(cond), Box::new(truebranch))))
},
Op::IfElse => {
let cond = self.parse()?;
let truebranch = self.parse()?;
let falsebranch = self.parse()?;
let cond = self.parse(tokens)?
.ok_or(Error::new("?? statement requires a condition".into())
.location(token.line, token.location.clone()))?;
let truebranch = self.parse(tokens)?
.ok_or(Error::new("?? statement requires a branch".into())
.location(token.line, token.location.clone()))?;
let falsebranch = self.parse(tokens)?
.ok_or(Error::new("?? statement requires a false branch".into())
.location(token.line, token.location))?;
Ok(ParseTree::IfElse(
Box::new(cond), Box::new(truebranch), Box::new(falsebranch)))
Ok(Some(ParseTree::IfElse(
Box::new(cond), Box::new(truebranch), Box::new(falsebranch))))
},
op => self.parse_operator(op),
Op::Export => todo!(),
op => self.parse_operator(tokens, op).map(|x| Some(x)),
},
_ => Err(ParseError::UnwantedToken(token)),
_ => Err(Error::new(format!("the token {} was unexpected", token.lexeme)).location(token.line, token.location)),
}
}
fn parse_lambda_definition(&mut self, arg_count: usize) -> Result<Function, ParseError> {
let (t, args) = Self::parse_function_declaration(self.tokens, arg_count)?;
fn parse_lambda_definition<I: Iterator<Item = Result<Token, Error>>>(&mut self, tokens: &mut Peekable<I>, arg_count: usize) -> Result<Function, Error> {
let (t, args) = Self::parse_function_declaration(tokens, arg_count)?;
let mut locals = self.locals.clone();
@@ -408,13 +396,13 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
}
Ok(Function::lambda(t, args, Box::new(
Parser::new(self.tokens, &mut self.globals)
.locals(locals).parse()?)))
Parser::new()
.locals(locals).parse(tokens)?.ok_or(Error::new("lambda requires a body".into()))?)))
}
fn parse_function_definition(&mut self, arg_count: usize) -> Result<Function, ParseError> {
let name = Self::get_identifier(self.tokens.next())?;
let (t, args) = Self::parse_function_declaration(self.tokens, arg_count)?;
fn parse_function_definition<I: Iterator<Item = Result<Token, Error>>>(&mut self, tokens: &mut Peekable<I>, arg_count: usize) -> Result<Function, Error> {
let name = Self::get_identifier(tokens.next())?;
let (t, args) = Self::parse_function_declaration(tokens, arg_count)?;
let mut locals = self.locals.clone();
@@ -425,13 +413,13 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
locals.insert(name.clone(), Type::Function(t.clone()));
Ok(Function::named(&name, t, args, Box::new(
Parser::new(self.tokens, &mut self.globals)
.locals(locals).parse()?)))
Parser::new()
.locals(locals).parse(tokens)?.ok_or(Error::new("function requires a body".into()))?)))
}
fn parse_function_declaration(
fn parse_function_declaration<I: Iterator<Item = Result<Token, Error>>>(
tokens: &mut Peekable<I>,
arg_count: usize) -> Result<(FunctionType, Vec<String>), ParseError>
arg_count: usize) -> Result<(FunctionType, Vec<String>), Error>
{
let args: Vec<(Type, String)> = (0..arg_count)
.map(|_| Self::parse_function_declaration_parameter(tokens))
@@ -448,9 +436,8 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
Ok((FunctionType(Box::new(ret), types), names))
}
fn parse_function_declaration_parameter(mut tokens: &mut Peekable<I>) -> Result<(Type, String), ParseError>
{
let token = tokens.next().ok_or(ParseError::UnexpectedEndInput)?.map_err(|e| ParseError::TokenizeError(e))?;
fn parse_function_declaration_parameter<I: Iterator<Item = Result<Token, Error>>>(tokens: &mut Peekable<I>) -> Result<(Type, String), Error> {
let token = tokens.next().ok_or(Error::new("function definition is incomplete".into()))??;
match token.token() {
// untyped variable
@@ -459,7 +446,7 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
// typed variable
TokenType::Operator(Op::TypeDeclaration) => {
let name = Self::get_identifier(tokens.next())?;
let t = Self::parse_type(&mut tokens)?;
let t = Self::parse_type(tokens)?;
Ok((t, name))
}
@@ -475,7 +462,7 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
// typed function
TokenType::Operator(Op::FunctionDeclare(n)) => {
let name = Self::get_identifier(tokens.next())?;
let args = (0..n).map(|_| Self::parse_type(&mut tokens)).collect::<Result<_, _>>()?;
let args = (0..n).map(|_| Self::parse_type(tokens)).collect::<Result<_, _>>()?;
let mut ret = Type::Any;
// this is annoying
@@ -485,22 +472,20 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
{
// so we just check for an error here. this is the only reason t exists.
if let Err(e) = t {
return Err(ParseError::TokenizeError(e));
return Err(e);
}
ret = Self::parse_type(&mut tokens)?;
ret = Self::parse_type(tokens)?;
}
Ok((Type::Function(FunctionType(Box::new(ret), args)), name))
}
_ => Err(ParseError::UnwantedToken(token)),
_ => Err(Error::new(format!("unexpected token {}", token.lexeme))),
}
}
// for some dumbass reason,
// this is the only code that breaks if it doesn't take an impl Iterator instead of simply I ...
fn parse_type(tokens: &mut Peekable<impl Iterator<Item = Result<Token, TokenizeError>>>) -> Result<Type, ParseError> {
let token = tokens.next().ok_or(ParseError::UnexpectedEndInput)?.map_err(|e| ParseError::TokenizeError(e))?;
fn parse_type<I: Iterator<Item = Result<Token, Error>>>(tokens: &mut Peekable<I>) -> Result<Type, Error> {
let token = tokens.next().ok_or(Error::new("type is incomplete".into()))??;
match token.token() {
TokenType::Type(t) => Ok(t),
@@ -511,67 +496,43 @@ impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Parser<'a, I> {
// if we don't collect them here it causes rust to overflow computing the types
let array_tokens = tokens.by_ref().take_while(|t| match t {
Ok(t) => match t.token() {
TokenType::Operator(Op::OpenStatement) => {
TokenType::Operator(Op::OpenArray) => {
depth += 1;
true
},
TokenType::Operator(Op::CloseStatement) => {
TokenType::Operator(Op::CloseArray) => {
depth -= 1;
depth > 0
}
_ => true,
}
_ => true,
}).collect::<Result<Vec<_>, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?;
}).collect::<Result<Vec<_>, Error>>()?;
// ... thanks to this conversion here. The compiler complains that the types don't
// match. there is code elsewhere in this codebase that looks exactly like this and
// still simply uses &mut Peekable<I> as the type. I don't understand why this code
// is special, but we have to do horribleness for it to work.
let mut array_tokens = array_tokens
.into_iter()
.map(|t| Ok(t))
.collect::<Vec<Result<Token, TokenizeError>>>()
.into_iter()
.peekable();
.collect::<Vec<_>>()
.into_iter();
let t = match Self::parse_type(&mut array_tokens) {
Ok(t) => t,
Err(ParseError::UnexpectedEndInput) => Type::Any,
Err(e) => return Err(e),
let t = if array_tokens.len() == 0 {
Type::Any
} else {
Parser::parse_type(&mut array_tokens.by_ref().peekable())?
};
Ok(Type::Array(Box::new(t)))
},
_ => Err(ParseError::UnwantedToken(token)),
_ => Err(Error::new(format!("unexpected token {}", token.lexeme))),
}
}
fn get_identifier(t: Option<Result<Token, TokenizeError>>) -> Result<String, ParseError> {
let token = t.ok_or(ParseError::UnexpectedEndInput)?
.map_err(|e| ParseError::TokenizeError(e))?;
fn get_identifier(t: Option<Result<Token, Error>>) -> Result<String, Error> {
let token = t.ok_or(Error::new(format!("expected an identifier, found nothing")))??;
match token.token() {
TokenType::Identifier(ident) => Ok(ident),
_ => Err(ParseError::InvalidIdentifier(token)),
}
}
}
impl<'a, I: Iterator<Item = Result<Token, TokenizeError>>> Iterator for Parser<'a, I> {
type Item = Result<ParseTree, ParseError>;
fn next(&mut self) -> Option<Self::Item> {
let tree = self.parse();
match tree {
Ok(tree) => Some(Ok(tree)),
Err(e) => {
match e {
ParseError::NoInput => None,
_ => Some(Err(e)),
}
}
_ => Err(Error::new(format!("the identifier {} is invalid", token.lexeme))),
}
}
}

View File

@@ -1,48 +1,13 @@
use std::iter::Peekable;
use std::{error, io};
use std::collections::{VecDeque, HashMap};
use std::sync::{Arc, Mutex};
use crate::Type;
use crate::{CodeIter, Type};
use crate::error::Error;
use super::Value;
use std::fmt::{Display, Formatter};
use std::io::BufRead;
use std::sync::Arc;
use std::ops::Range;
#[derive(Debug)]
pub enum TokenizeError {
InvalidDynamicOperator(String),
InvalidNumericConstant(String),
InvalidIdentifier(String),
UnableToMatchToken(String),
InvalidCharacter(char),
UnclosedString,
IO(io::Error),
Regex(regex::Error),
}
impl Display for TokenizeError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
TokenizeError::InvalidDynamicOperator(op)
=> write!(f, "invalid dynamic operator `{op}`"),
TokenizeError::InvalidNumericConstant(t)
=> write!(f, "invalid numeric constant `{t}`"),
TokenizeError::InvalidIdentifier(ident)
=> write!(f, "invalid identifier `{ident}`"),
TokenizeError::UnableToMatchToken(token)
=> write!(f, "the token `{token}` was unable to be parsed"),
TokenizeError::InvalidCharacter(c) => write!(f, "`{c}` is not understood"),
TokenizeError::UnclosedString => write!(f, "newline was found before string was closed"),
TokenizeError::IO(io) => write!(f, "{io}"),
TokenizeError::Regex(re) => write!(f, "{re}"),
}
}
}
impl error::Error for TokenizeError {}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Op {
Add,
@@ -100,10 +65,7 @@ pub enum TokenType {
impl TokenType {
/// Parse a single token
fn parse(s: &str) -> Result<Self, TokenizeError> {
let identifier = regex::Regex::new(r#"[A-Za-z_][A-Za-z0-9_']*"#).map_err(|e| TokenizeError::Regex(e))?;
let number = regex::Regex::new(r#"([0-9]+\.?[0-9]*)|(\.[0-9])"#).map_err(|e| TokenizeError::Regex(e))?;
fn parse(s: &str) -> Result<Self, Error> {
Ok(match s {
// Match keywords first
"true" => TokenType::Constant(Value::Bool(true)),
@@ -130,18 +92,18 @@ impl TokenType {
// then identifiers and numbers
_ => {
if identifier.is_match(s) {
if s.starts_with(char::is_alphabetic) {
TokenType::Identifier(s.to_string())
} else if number.is_match(s) {
} else if s.starts_with(|c: char| c.is_digit(10)) {
if let Ok(int) = s.parse::<i64>() {
TokenType::Constant(Value::Int(int))
} else if let Ok(float) = s.parse::<f64>() {
TokenType::Constant(Value::Float(float))
} else {
return Err(TokenizeError::InvalidNumericConstant(s.to_string()));
return Err(Error::new(format!("Invalid numeric constant `{s}`")));
}
} else {
return Err(TokenizeError::UnableToMatchToken(s.to_string()));
return Err(Error::new(format!("Couldn't match token `{s}`")));
}
}
})
@@ -153,17 +115,15 @@ pub struct Token {
t: TokenType,
pub lexeme: String,
pub line: usize,
pub file: Arc<String>,
pub location: Range<usize>,
}
impl Token {
pub fn new(t: TokenType, lexeme: String, file: Arc<String>, line: usize, column: usize) -> Self {
pub fn new(t: TokenType, lexeme: String, line: usize, column: usize) -> Self {
Self {
t,
line,
file,
location: column..column+lexeme.len(),
location: column..column + lexeme.len(),
lexeme,
}
}
@@ -174,81 +134,54 @@ impl Token {
}
/// Tokenize an input stream of source code for a Parser
#[derive(Clone)]
pub(crate) struct Tokenizer<R: BufRead> {
reader: R,
line: usize,
column: usize,
code: String,
filename: Arc<String>,
reader: Arc<Mutex<CodeIter<R>>>,
tokens: VecDeque<Token>,
}
impl<R: BufRead> Tokenizer<R> {
pub fn new(reader: R, filename: &str) -> Self {
pub fn new(reader: Arc<Mutex<CodeIter<R>>>) -> Self {
Self {
reader,
line: 0,
column: 0,
filename: Arc::new(filename.to_string()),
code: String::new(),
tokens: VecDeque::new(),
}
}
fn get_dot_count<I: Iterator<Item = char>>(&mut self, s: &mut Peekable<I>) -> Option<usize> {
fn next_char(&mut self) -> Option<char> {
let mut reader = self.reader.lock().unwrap();
let c = reader.next();
c
}
fn next_char_if(&mut self, func: impl FnOnce(&char) -> bool) -> Option<char> {
let mut reader = self.reader.lock().unwrap();
let c = reader.next_if(func);
c
}
fn getpos(&self) -> (usize, usize) {
let reader = self.reader.lock().unwrap();
let r = reader.getpos();
r
}
fn get_dot_count(&mut self) -> usize {
let mut total = 0;
while let Some(n) = self.next_char_if(s, |&c| c == ':' || c == '.').map(|c| match c {
while let Some(n) = self.next_char_if(|&c| c == ':' || c == '.').map(|c| match c {
':' => 2,
'.' => 1,
_ => 0,
_ => unreachable!(),
}) {
total += n;
}
Some(total)
}
fn next_char<I: Iterator<Item = char>>(&mut self, iter: &mut Peekable<I>) -> Option<char> {
if let Some(c) = iter.next() {
self.column += 1;
Some(c)
} else {
None
}
}
fn next_char_if<I: Iterator<Item = char>>(
&mut self,
iter: &mut Peekable<I>,
pred: impl FnOnce(&char) -> bool) -> Option<char>
{
if let Some(c) = iter.next_if(pred) {
self.column += 1;
Some(c)
} else {
None
}
}
fn next_char_while<I: Iterator<Item = char>>(
&mut self,
iter: &mut Peekable<I>,
mut pred: impl FnMut(&char) -> bool) -> Option<char>
{
if let Some(c) = self.next_char(iter) {
if (pred)(&c) {
Some(c)
} else {
None
}
} else {
None
}
total
}
/// Tokenizes more input and adds them to the internal queue
fn tokenize<I: Iterator<Item = char>>(&mut self, mut iter: Peekable<I>) -> Result<(), TokenizeError> {
fn tokenize(&mut self) -> Result<(), Error> {
let operators: HashMap<&'static str, Op> = HashMap::from([
("+", Op::Add),
("-", Op::Sub),
@@ -284,7 +217,7 @@ impl<R: BufRead> Tokenizer<R> {
("\\", Op::NonCall),
]);
let c = if let Some(c) = self.next_char(&mut iter) {
let c = if let Some(c) = self.next_char() {
c
} else {
return Ok(());
@@ -293,40 +226,52 @@ impl<R: BufRead> Tokenizer<R> {
if c.is_alphanumeric() {
let mut token = String::from(c);
while let Some(c) = self.next_char_if(&mut iter, |&c| c.is_alphanumeric() || c == '.' || c == '\'') {
while let Some(c) = self.next_char_if(|&c| c.is_alphanumeric() || c == '.' || c == '\'') {
token.push(c);
}
self.tokens.push_back(Token::new(TokenType::parse(&token)?, token, self.filename.clone(), self.line, self.column));
self.tokenize(iter)
let (line, column) = self.getpos();
self.tokens.push_back(Token::new(TokenType::parse(&token)
.map_err(|e| e.location(line, column - token.len() + 1..column + 1))?, token.clone(), line, column - token.len() + 1));
self.tokenize()
} else if c == '#' {
while self.next_char_while(&mut iter, |&c| c != '\n').is_some() {}
self.tokenize(iter)
while self.next_char_if(|&c| c != '\n').is_some() {}
self.tokenize()
} else if c == '\"' {
let mut token = String::new();
let (line, col) = self.getpos();
while let Some(c) = self.next_char(&mut iter) {
while let Some(c) = self.next_char() {
match c {
'"' => break,
'\n' => return Err(TokenizeError::UnclosedString),
'\\' => match iter.next() {
'\n' => return Err(
Error::new("Unclosed string literal".into())
.location(line, col..self.getpos().1)
.note("newlines are not allowed in string literals (try \\n)".into())),
'\\' => match self.next_char() {
Some('\\') => token.push('\\'),
Some('n') => token.push('\n'),
Some('t') => token.push('\t'),
Some('r') => token.push('\r'),
Some('\"') => token.push('"'),
Some(c) => token.push(c),
None => return Err(TokenizeError::UnclosedString),
None => return Err(
Error::new("Unclosed string literal".into())
.location(line, col..self.getpos().1)
.note("end of file found before \"".into())),
}
_ => token.push(c),
}
}
let (line, col) = self.getpos();
self.tokens.push_back(
Token::new(TokenType::Constant(
Value::String(token.clone())), token, self.filename.clone(), self.line, self.column));
Value::String(token.clone())), token, line, col));
self.tokenize(iter)
self.tokenize()
} else if operators.keys().any(|x| x.starts_with(c)) {
let mut token = String::from(c);
@@ -352,39 +297,31 @@ impl<R: BufRead> Tokenizer<R> {
// if not, we need to make sure that the next characters
// we grab *actually* match the last operator
if let Some(op) = possible.get(token.as_str()) {
let token = Token::new(TokenType::Operator(match op {
let t = TokenType::Operator(match op {
// special handling for "dynamic" operators
Op::FunctionDefine(n) => {
let count = match self.get_dot_count(&mut iter) {
Some(count) => count,
None => return Err(TokenizeError::InvalidDynamicOperator(token)),
};
Op::FunctionDefine(n + count)
}
Op::FunctionDeclare(n) => {
let count = match self.get_dot_count(&mut iter) {
Some(count) => count,
None => return Err(TokenizeError::InvalidDynamicOperator(token)),
};
Op::FunctionDeclare(n + count)
}
Op::LambdaDefine(n) => {
let count = match self.get_dot_count(&mut iter) {
Some(count) => count,
None => return Err(TokenizeError::InvalidDynamicOperator(token)),
};
Op::LambdaDefine(n + count)
}
Op::FunctionDefine(n) => Op::FunctionDefine(n + self.get_dot_count()),
Op::FunctionDeclare(n) => Op::FunctionDeclare(n + self.get_dot_count()),
Op::LambdaDefine(n) => Op::LambdaDefine(n + self.get_dot_count()),
op => op.clone(),
}), token, self.filename.clone(), self.line, self.column);
});
let (line, col) = self.getpos();
let token = Token::new(t, token, line, col);
self.tokens.push_back(token);
break;
} else {
let next = match self.next_char_if(&mut iter, is_expected) {
let next = match self.next_char_if(is_expected) {
Some(c) => c,
None => return Err(TokenizeError::UnableToMatchToken(format!("{token}"))),
None => {
let (line, col) = self.getpos();
return Err(
Error::new(format!("the operator {token} is undefined"))
.location(line, col - token.len()..col))
}
};
token.push(next);
@@ -392,37 +329,22 @@ impl<R: BufRead> Tokenizer<R> {
}
0 => unreachable!(),
_ => {
let next = match self.next_char_if(&mut iter, is_expected) {
let c = self.next_char_if(is_expected);
let next = match c {
Some(c) => c,
None => {
let token = Token::new(TokenType::Operator(match possible.get(token.as_str()).unwrap() {
let t = TokenType::Operator(match possible.get(token.as_str()).unwrap() {
// special handling for "dynamic" operators
Op::FunctionDefine(n) => {
let count = match self.get_dot_count(&mut iter) {
Some(count) => count,
None => return Err(TokenizeError::InvalidDynamicOperator(token)),
};
Op::FunctionDefine(n + count)
}
Op::FunctionDeclare(n) => {
let count = match self.get_dot_count(&mut iter) {
Some(count) => count,
None => return Err(TokenizeError::InvalidDynamicOperator(token)),
};
Op::FunctionDeclare(n + count)
}
Op::LambdaDefine(n) => {
let count = match self.get_dot_count(&mut iter) {
Some(count) => count,
None => return Err(TokenizeError::InvalidDynamicOperator(token)),
};
Op::LambdaDefine(n + count)
}
Op::FunctionDefine(n) => Op::FunctionDefine(n + self.get_dot_count()),
Op::FunctionDeclare(n) => Op::FunctionDeclare(n + self.get_dot_count()),
Op::LambdaDefine(n) => Op::LambdaDefine(n + self.get_dot_count()),
op => op.clone(),
}), token, self.filename.clone(), self.line, self.column);
});
let (line, col) = self.getpos();
let token = Token::new(t, token, line, col);
// at this point, token must be in the hashmap possible, otherwise it wouldn't have any matches
self.tokens.push_back(token);
break;
}
@@ -433,70 +355,49 @@ impl<R: BufRead> Tokenizer<R> {
}
}
self.tokenize(iter)
self.tokenize()
} else if c.is_whitespace() {
self.tokenize(iter)
self.tokenize()
} else {
return Err(TokenizeError::InvalidCharacter(c));
let (line, col) = self.getpos();
return Err(
Error::new(format!("an unidentified character {c} was found"))
.location(line, col - 1..col));
}
}
}
impl<R: BufRead> Iterator for Tokenizer<R> {
type Item = Result<Token, TokenizeError>;
type Item = Result<Token, Error>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(token) = self.tokens.pop_front() {
return Some(Ok(token));
}
} else {
match self.tokenize() {
Ok(_) => (),
Err(e) => return Some(Err(e)),
};
let mut input = String::new();
match self.reader.read_line(&mut input) {
Ok(0) => None,
Ok(_n) => {
self.code.push_str(&input);
self.line += 1;
self.column = 0;
match self.tokenize(input.chars().peekable()) {
Ok(()) => (),
Err(e) => return Some(Err(e)),
}
self.next()
},
Err(e) => Some(Err(TokenizeError::IO(e))),
self.next()
}
}
}
#[cfg(test)]
mod tests {
use io::Cursor;
use crate::parser::Parser;
use super::*;
#[test]
fn tokenizer() {
let program = ": length ?. x [] -> Int ?? x + 1 length tail x 0 length [ 1 2 3 ]";
let tokens: Vec<Token> = Tokenizer::new(Cursor::new(program), "<tokenizer>").collect::<Result<_, _>>().unwrap();
println!("{tokens:#?}");
}
use std::io::Cursor;
#[test]
fn a() {
let program = ": length ?. x [] -> Int ?? x + 1 length tail x 0 length [ 1 2 3 ]";
let program = ": f a * 12 a f 12";
let mut tokenizer = Tokenizer::new(Cursor::new(program), "<a>").peekable();
let tokenizer = Tokenizer::new(Arc::new(Mutex::new(CodeIter::new(Cursor::new(program)))));
let mut globals = HashMap::new();
let mut parser = Parser::new(&mut tokenizer, &mut globals);
let t: Vec<_> = tokenizer.collect();
let tree = parser.next();
println!("{tree:#?}");
println!("{t:#?}");
}
}