From 7dc88b8fb064fa4c3faa6068fa6593ab40dd56f5 Mon Sep 17 00:00:00 2001 From: minneelyyyy Date: Wed, 16 Oct 2024 15:30:38 -0400 Subject: [PATCH] new function parsing --- src/executor.rs | 42 ++------ src/parser.rs | 245 ++++++++++++++++++++++++----------------------- src/tokenizer.rs | 65 ++++++++++--- 3 files changed, 184 insertions(+), 168 deletions(-) diff --git a/src/executor.rs b/src/executor.rs index 2ff5b1b..05aadd0 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -232,31 +232,11 @@ where } }, ParseTree::FunctionDefinition(func, scope) => { - let ident = func.name.clone().unwrap(); - - let existing = locals.get(&ident) - .or(self.globals.get(&ident)); - - match existing { - Some(Object::Function(f)) => { - if f.body.is_some() || f.arg_names.is_some() { - return Err(RuntimeError::ImmutableError(ident.clone())); - } - - let new_func = Function::named(func.name.unwrap().as_str(), func.t.clone(), func.arg_names.clone(), func.body.clone()); + let locals = locals.to_mut(); - let locals = locals.to_mut(); - locals.insert(ident.clone(), Object::Function(new_func)); + locals.insert(func.name.clone().unwrap(), Object::Function(func)); - self.exec(scope, &mut Cow::Borrowed(&locals)) - } - Some(Object::Variable(_)) => Err(RuntimeError::ImmutableError(ident.clone())), - None => { - let locals = locals.to_mut(); - locals.insert(ident.clone(), Object::Function(func)); - self.exec(scope, &mut Cow::Borrowed(&locals)) - } - } + self.exec(scope, &mut Cow::Borrowed(&locals)) }, ParseTree::Compose(x, y) => { self.exec(x, locals)?; @@ -296,6 +276,9 @@ where Some(Object::Function(f)) => { let locals = locals.to_mut(); + assert!(f.arg_names.is_some()); + assert!(f.body.is_some()); + for ((t, name), tree) in std::iter::zip(std::iter::zip(f.t.1, f.arg_names.unwrap()), args) { let v = self.exec(Box::new(tree), &mut Cow::Borrowed(locals))?; @@ -369,18 +352,7 @@ where Ok(Value::Nil) } } - ParseTree::FunctionDeclaration(func, scope) => { - let locals = locals.to_mut(); - let name = func.name.clone().unwrap(); - - if locals.contains_key(&name) { - Err(RuntimeError::ImmutableError(name.clone())) - } else { - locals.insert(name, Object::Function(func)); - self.exec(scope, &mut Cow::Borrowed(&locals)) - } - } - ParseTree::LambdaDefinition(func) => Ok(Value::Function(func)), + ParseTree::LambdaDefinition(func) => todo!(), } } } diff --git a/src/parser.rs b/src/parser.rs index f61d288..2376996 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,13 +5,14 @@ use std::error; use std::collections::HashMap; use std::fmt::Display; use std::borrow::Cow; +use std::iter::Peekable; #[derive(Debug)] pub enum ParseError { NoInput, UnexpectedEndInput, IdentifierUndefined(String), - InvalidIdentifier, + InvalidIdentifier(Token), FunctionUndefined(String), VariableUndefined(String), UnmatchedArrayClose, @@ -25,7 +26,7 @@ impl Display for ParseError { match self { ParseError::UnexpectedEndInput => write!(f, "Input ended unexpectedly"), ParseError::IdentifierUndefined(name) => write!(f, "Undefined variable `{name}`"), - ParseError::InvalidIdentifier => write!(f, "Invalid identifier"), + ParseError::InvalidIdentifier(t) => write!(f, "Invalid identifier `{t:?}`"), ParseError::FunctionUndefined(name) => write!(f, "Undefined function `{name}`"), ParseError::VariableUndefined(name) => write!(f, "Undefined variable `{name}`"), ParseError::NoInput => write!(f, "No input given"), @@ -64,7 +65,6 @@ pub(crate) enum ParseTree { Equ(String, Box, Box), LazyEqu(String, Box, Box), FunctionDefinition(Function, Box), - FunctionDeclaration(Function, Box), LambdaDefinition(Function), // Functional Operations @@ -116,7 +116,7 @@ macro_rules! three_arg { impl ParseTree { fn parse( - tokens: &mut I, + tokens: &mut Peekable, globals: &HashMap, locals: &mut Cow>) -> Result where @@ -165,63 +165,32 @@ impl ParseTree { _ => panic!("Operator literally changed under your nose"), } } else { - Err(ParseError::InvalidIdentifier) + Err(ParseError::InvalidIdentifier(token)) } } - Op::FunctionDefine(nargs) => { - let token = tokens.next() - .ok_or(ParseError::UnexpectedEndInput)? - .map_err(|e| ParseError::TokenizeError(e))?; + Op::FunctionDefine(arg_count) => { + let mut f = ParseTree::parse_function(tokens, arg_count)?; - if let Token::Identifier(ident) = token { - let args: Vec = tokens.take(nargs) - .map(|token| match token { - Ok(Token::Identifier(ident)) => Ok(ident), - Ok(_) => Err(ParseError::InvalidIdentifier), - Err(e) => Err(ParseError::TokenizeError(e)), - }) - .collect::, ParseError>>()?; + assert!(f.arg_names.is_some()); + assert!(f.name.is_some()); + assert!(f.body.is_none()); - let f = if locals.contains_key(&ident) { - let locals = locals.to_mut(); - let f = locals.get(&ident).unwrap(); - let f = f.clone(); - - // iterate over f's types and push them - for (t, name) in std::iter::zip(f.t.1.clone(), args.clone()) { - match t { - Type::Function(finner) => { - locals.insert(name.clone(), Function::named(&name, finner, None, None)); - } - _ => (), - } - } - - Function::named( - &ident, - f.t.clone(), - Some(args), - Some(Box::new(ParseTree::parse(tokens, globals, &mut Cow::Borrowed(&locals))?))) - } else { - let f = Function::named( - &ident, - FunctionType(Box::new(Type::Any), args.iter().map(|_| Type::Any).collect()), - Some(args), - Some(Box::new(ParseTree::parse(tokens, globals, &mut Cow::Borrowed(&locals))?))); - - let locals = locals.to_mut(); - - locals.insert(ident.clone(), f.clone()); - - f - }; - - Ok(ParseTree::FunctionDefinition(f, - Box::new(ParseTree::parse(tokens, globals, locals)?))) - } else { - Err(ParseError::InvalidIdentifier) + if locals.contains_key(&f.name.clone().unwrap()) { + return Err(ParseError::ImmutableError(f.name.unwrap())); } - } + + let locals = locals.to_mut(); + + // recursion requires that f's prototype is present in locals + locals.insert(f.name.clone().unwrap(), f.clone()); + + f.body = Some(Box::new(ParseTree::parse(tokens, globals, &mut Cow::Borrowed(&locals))?)); + assert!(f.body.is_some()); + + println!("{:?} = {:?}", f.name, f); + + Ok(ParseTree::FunctionDefinition(f, Box::new(ParseTree::parse(tokens, globals, &mut Cow::Borrowed(&locals))?))) + }, Op::Compose => two_arg!(Compose, tokens, globals, locals), Op::Id => one_arg!(Id, tokens, globals, locals), Op::If => two_arg!(If, tokens, globals, locals), @@ -273,69 +242,9 @@ impl ParseTree { Op::NotEqualTo => two_arg!(NotEqualTo, tokens, globals, locals), Op::And => two_arg!(And, tokens, globals, locals), Op::Or => two_arg!(Or, tokens, globals, locals), - Op::FunctionDeclare(arg_count) => { - let name = match tokens.next() - .ok_or(ParseError::UnexpectedEndInput)? - .map_err(|e| ParseError::TokenizeError(e))? - { - Token::Identifier(x) => x, - _ => return Err(ParseError::InvalidIdentifier), - }; - - let args: Vec = (0..arg_count) - .map(|_| Self::parse_type(tokens)) - .collect::>()?; - - let rett = Self::parse_type(tokens)?; - - if locals.contains_key(&name) { - println!("{name} already found: {locals:?}"); - return Err(ParseError::ImmutableError(name.clone())); - } - - let f = Function::named( - &name, - FunctionType(Box::new(rett), args), - None, - None); - - let locals = locals.to_mut(); - - locals.insert(name, f.clone()); - - Ok(ParseTree::FunctionDeclaration( - f, - Box::new(ParseTree::parse(tokens, globals, &mut Cow::Borrowed(&*locals))?))) - } - Op::LambdaDefine(arg_count) => { - let args: Vec = tokens.take(arg_count) - .map(|token| match token { - Ok(Token::Identifier(ident)) => Ok(ident), - Ok(_) => Err(ParseError::InvalidIdentifier), - Err(e) => Err(ParseError::TokenizeError(e)), - }) - .collect::, ParseError>>()?; - - Ok(ParseTree::LambdaDefinition( - Function::lambda( - FunctionType(Box::new(Type::Any), args.clone().into_iter().map(|_| Type::Any).collect()), - args, - Some(Box::new(ParseTree::parse(tokens, globals, &mut Cow::Borrowed(&*locals))?))))) - } - Op::NonCall => { - let ident = match tokens.next().ok_or(ParseError::UnexpectedEndInput)? - .map_err(|e| ParseError::TokenizeError(e))? - { - Token::Identifier(x) => x, - _ => return Err(ParseError::InvalidIdentifier), - }; - - if let Some(f) = locals.clone().get(&ident).or(globals.clone().get(&ident)).cloned() { - Ok(ParseTree::Constant(Value::Function(f))) - } else { - Err(ParseError::FunctionUndefined(ident.clone())) - } - } + Op::LambdaDefine(_arg_count) => todo!(), + Op::NonCall => todo!(), + op => Err(ParseError::UnwantedToken(Token::Operator(op))), } } t => Err(ParseError::UnwantedToken(t)), @@ -346,6 +255,92 @@ impl ParseTree { } } + fn parse_function(tokens: &mut Peekable, arg_count: usize) -> Result + where + I: Iterator>, + { + let name = Self::get_identifier(tokens.next())?; + let (t, args) = Self::parse_function_declaration(tokens, arg_count)?; + + Ok(Function::named(&name, t, Some(args), None)) + } + + fn parse_function_declaration(tokens: &mut Peekable, arg_count: usize) -> Result<(FunctionType, Vec), ParseError> + where + I: Iterator> + { + let args: Vec<(Type, String)> = (0..arg_count) + .map(|_| Self::parse_function_declaration_parameter(tokens)) + .collect::>()?; + + + let (types, names): (Vec<_>, Vec<_>) = args.into_iter().unzip(); + let mut ret = Type::Any; + + if let Some(t) = tokens.next_if(|x| matches!(x, Ok(Token::Operator(Op::Arrow)))) + { + if let Err(e) = t { + return Err(ParseError::TokenizeError(e)); + } + + ret = Self::parse_type(tokens)?; + } + + Ok((FunctionType(Box::new(ret), types), names)) + } + + fn parse_function_declaration_parameter(mut tokens: &mut Peekable) -> Result<(Type, String), ParseError> + where + I: Iterator> + { + match tokens.next() { + // untyped variable + Some(Ok(Token::Identifier(x))) => Ok((Type::Any, x)), + + // typed variable + Some(Ok(Token::Operator(Op::TypeDeclaration))) => { + let name = Self::get_identifier(tokens.next())?; + let t = Self::parse_type(&mut tokens)?; + + Ok((t, name)) + } + + // untyped function (all args Any, return type Any) + Some(Ok(Token::Operator(Op::FunctionDefine(n)))) => { + let name = Self::get_identifier(tokens.next())?; + let args = (0..n).map(|_| Type::Any).collect(); + + Ok((Type::Function(FunctionType(Box::new(Type::Any), args)), name)) + } + + // typed function + Some(Ok(Token::Operator(Op::FunctionDeclare(n)))) => { + let name = Self::get_identifier(tokens.next())?; + let args = (0..n).map(|_| Self::parse_type(&mut tokens)).collect::>()?; + let mut ret = Type::Any; + + // this is annoying + // inside of the next_if closure, we already can know that its an error + // and return it, but we cannot return out of a closure + if let Some(t) = tokens.next_if(|x| matches!(x, Ok(Token::Operator(Op::Arrow)))) + { + // so we just check for an error here. this is the only reason t exists. + if let Err(e) = t { + return Err(ParseError::TokenizeError(e)); + } + + ret = Self::parse_type(&mut tokens)?; + } + + Ok((Type::Function(FunctionType(Box::new(ret), args)), name)) + } + + Some(Ok(t)) => Err(ParseError::UnwantedToken(t)), + Some(Err(e)) => Err(ParseError::TokenizeError(e)), + None => Err(ParseError::UnexpectedEndInput), + } + } + fn parse_type(tokens: &mut I) -> Result where I: Iterator>, @@ -375,6 +370,16 @@ impl ParseTree { None => Err(ParseError::UnexpectedEndInput), } } + + fn get_identifier(t: Option>) -> Result { + match t.ok_or(ParseError::UnexpectedEndInput)? + .map_err(|e| ParseError::TokenizeError(e)) + { + Ok(Token::Identifier(ident)) => Ok(ident), + Ok(t) => Err(ParseError::InvalidIdentifier(t)), + Err(e) => Err(e), + } + } } /// Parses input tokens and produces ParseTrees for an Executor @@ -418,7 +423,7 @@ impl>> Iterator for Parser { type Item = Result; fn next(&mut self) -> Option { - let tree = ParseTree::parse(&mut self.tokens, &self.globals, &mut Cow::Borrowed(&self.locals)); + let tree = ParseTree::parse(&mut self.tokens.by_ref().peekable(), &self.globals, &mut Cow::Borrowed(&self.locals)); match tree { Ok(tree) => Some(Ok(tree)), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 69e1adb..57b97d8 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -41,7 +41,7 @@ impl Display for TokenizeError { impl error::Error for TokenizeError {} -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub(crate) enum Op { Add, Sub, @@ -51,9 +51,11 @@ pub(crate) enum Op { Equ, Mod, LazyEqu, + TypeDeclaration, FunctionDefine(usize), FunctionDeclare(usize), LambdaDefine(usize), + Arrow, Compose, Id, If, @@ -169,9 +171,11 @@ impl Tokenizer { ("%", Op::Mod), ("=", Op::Equ), (".", Op::LazyEqu), + ("?.", Op::TypeDeclaration), (":", Op::FunctionDefine(1)), ("?:", Op::FunctionDeclare(1)), (";", Op::LambdaDefine(1)), + ("->", Op::Arrow), ("~", Op::Compose), (",", Op::Id), ("?", Op::If), @@ -262,16 +266,6 @@ impl Tokenizer { if let Some(op) = possible.get(token.as_str()) { self.tokens.push_back(Ok(Token::Operator(match op { // special handling for "dynamic" operators - Op::FunctionDeclare(n) => { - let count = match get_dot_count(&mut iter) { - Some(count) => count, - None => { - self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); - return; - } - }; - Op::FunctionDeclare(n + count) - } Op::FunctionDefine(n) => { let count = match get_dot_count(&mut iter) { Some(count) => count, @@ -282,6 +276,16 @@ impl Tokenizer { }; Op::FunctionDefine(n + count) } + Op::FunctionDeclare(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + Op::FunctionDeclare(n + count) + } Op::LambdaDefine(n) => { let count = match get_dot_count(&mut iter) { Some(count) => count, @@ -314,7 +318,42 @@ impl Tokenizer { Some(c) => c, None => { // at this point, token must be in the hashmap possible, otherwise it wouldnt have any matches - self.tokens.push_back(Ok(Token::Operator(possible.get(token.as_str()).unwrap().clone()))); + self.tokens.push_back(Ok(Token::Operator(match possible.get(token.as_str()).unwrap() { + // special handling for "dynamic" operators + Op::FunctionDefine(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + println!("{n} + {count}"); + + Op::FunctionDefine(n + count) + } + Op::FunctionDeclare(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + Op::FunctionDeclare(n + count) + } + Op::LambdaDefine(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + Op::LambdaDefine(n + count) + } + op => op.clone(), + }))); break; } }; @@ -374,7 +413,7 @@ mod tests { #[test] fn uwu() { - let program = "?:. apply : Any Any Any Any :. apply f x f x : id x x apply ; x id x 12"; + let program = ":. add x y + x y"; let tokens: Vec = Tokenizer::from_str(program).unwrap().collect::>().unwrap();