diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7c544ea..aba7b6e 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,6 +1,6 @@ use std::iter::Peekable; use std::{error, io}; -use std::collections::VecDeque; +use std::collections::{VecDeque, HashMap}; use super::Value; use std::fmt::{Display, Formatter}; @@ -77,69 +77,41 @@ pub(crate) enum Token { Constant(Value), } -fn get_dot_count(s: &str) -> Option { - s.chars().fold(Some(0), |acc, c| - match c { - ':' => acc.map(|acc| acc + 2), - '.' => acc.map(|acc| acc + 1), - _ => None, - } - ) +fn get_dot_count>(s: &mut Peekable) -> Option { + let mut total = 0; + + while let Some(n) = s.next_if(|&c| c == ':' || c == '.').map(|c| match c { + ':' => 2, + '.' => 1, + _ => 0, + }) { + total += n; + } + + Some(total) } impl Token { /// Parse a single token fn parse(s: &str) -> Result { - let string = regex::Regex::new(r#"".+""#).map_err(|e| TokenizeError::Regex(e))?; let identifier = regex::Regex::new(r#"[A-Za-z_][A-Za-z0-9_']*"#).map_err(|e| TokenizeError::Regex(e))?; let number = regex::Regex::new(r#"([0-9]+\.?[0-9]*)|(\.[0-9])"#).map_err(|e| TokenizeError::Regex(e))?; match s { - // First check if s is an operator - "+" => Ok(Token::Operator(Op::Add)), - "-" => Ok(Token::Operator(Op::Sub)), - "*" => Ok(Token::Operator(Op::Mul)), - "/" => Ok(Token::Operator(Op::Div)), - "**" => Ok(Token::Operator(Op::Exp)), - "%" => Ok(Token::Operator(Op::Mod)), - "=" => Ok(Token::Operator(Op::Equ)), - "." => Ok(Token::Operator(Op::LazyEqu)), - "~" => Ok(Token::Operator(Op::Compose)), - "," => Ok(Token::Operator(Op::Id)), - "?" => Ok(Token::Operator(Op::If)), - "??" => Ok(Token::Operator(Op::IfElse)), - ">" => Ok(Token::Operator(Op::GreaterThan)), - "<" => Ok(Token::Operator(Op::LessThan)), - ">=" => Ok(Token::Operator(Op::GreaterThanOrEqualTo)), - "<=" => Ok(Token::Operator(Op::LessThanOrEqualTo)), - "==" => Ok(Token::Operator(Op::EqualTo)), - "[" => Ok(Token::Operator(Op::OpenArray)), - "]" => Ok(Token::Operator(Op::CloseArray)), - - // then some keywords + // Match keywords first "true" => Ok(Token::Constant(Value::Bool(true))), "false" => Ok(Token::Constant(Value::Bool(false))), "not" => Ok(Token::Operator(Op::Not)), - - // Type casting "int" => Ok(Token::Operator(Op::IntCast)), "float" => Ok(Token::Operator(Op::FloatCast)), "bool" => Ok(Token::Operator(Op::BoolCast)), "string" => Ok(Token::Operator(Op::StringCast)), - - // misc "print" => Ok(Token::Operator(Op::Print)), "empty" => Ok(Token::Operator(Op::Empty)), - // then variable length keywords + // then identifiers and numbers _ => { - if s.starts_with(":") { - Ok(Token::Operator(Op::FunctionDeclare( - get_dot_count(s).map(|x| x - 1).ok_or(TokenizeError::InvalidDynamicOperator(s.to_string()))? - ))) - } else if string.is_match(s) { - Ok(Token::Constant(Value::String(s[1..s.len() - 1].to_string()))) - } else if identifier.is_match(s) { + if identifier.is_match(s) { Ok(Token::Identifier(s.to_string())) } else if number.is_match(s) { if let Ok(int) = s.parse::() { @@ -173,7 +145,28 @@ impl Tokenizer { /// Tokenizes more input and adds them to the internal queue fn tokenize>(&mut self, mut iter: Peekable) { - const OPERATOR_CHARS: &'static str = "!@$%^&*()-=+[]{}|;:,<.>/?"; + let operators: HashMap<&'static str, Op> = HashMap::from([ + ("+", Op::Add), + ("-", Op::Sub), + ("*", Op::Mul), + ("/", Op::Div), + ("**", Op::Exp), + ("%", Op::Mod), + ("=", Op::Equ), + (".", Op::LazyEqu), + (":", Op::FunctionDeclare(1)), + ("~", Op::Compose), + (",", Op::Id), + ("?", Op::If), + ("??", Op::IfElse), + (">", Op::GreaterThan), + ("<", Op::LessThan), + (">=", Op::GreaterThanOrEqualTo), + ("<=", Op::LessThanOrEqualTo), + ("==", Op::EqualTo), + ("[", Op::OpenArray), + ("]", Op::CloseArray), + ]); let c = if let Some(c) = iter.next() { c @@ -188,19 +181,9 @@ impl Tokenizer { token.push(c); } - self.tokens.push_back(Token::parse(&token)); - self.tokenize(iter) - } else if OPERATOR_CHARS.contains(c) { - let mut token = String::from(c); - - while let Some(c) = iter.next_if(|&c| OPERATOR_CHARS.contains(c)) { - token.push(c); - } - self.tokens.push_back(Token::parse(&token)); self.tokenize(iter) } else if c == '#' { - // consume comments let _: String = iter.by_ref().take_while(|&c| c != '\n').collect(); } else if c == '\"' { let mut token = String::new(); @@ -229,11 +212,64 @@ impl Tokenizer { } self.tokens.push_back(Ok(Token::Constant(Value::String(token)))); + self.tokenize(iter) + } else if operators.keys().any(|x| x.starts_with(c)) { + let mut token = String::from(c); + + loop { + let possible: HashMap<&'static str, Op> = operators + .clone().into_iter() + .filter(|(key, _)| key.starts_with(&token)) + .collect(); + + let is_expected = |c: &char| + possible.iter().any(|(op, _)| match op.chars().nth(token.len()) { + Some(i) => *c == i, + None => false, + }); + + match possible.len() { + 1 => { + self.tokens.push_back(Ok(Token::Operator(match possible.get(token.as_str()).unwrap().clone() { + Op::FunctionDeclare(n) => { + let count = match get_dot_count(&mut iter) { + Some(count) => count, + None => { + self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); + return; + } + }; + Op::FunctionDeclare(n + count) + } + op => op, + }))); + break; + } + 0 => { + self.tokens.push_back(Err(TokenizeError::InvalidCharacter(c))); + return; + } + _ => { + let next = match iter.next_if(is_expected) { + Some(c) => c, + None => { + // at this point, token must be in the hashmap possible, otherwise it wouldnt have any matches + self.tokens.push_back(Ok(Token::Operator(possible.get(token.as_str()).unwrap().clone()))); + break; + } + }; + + token.push(next); + } + } + } + self.tokenize(iter) } else if c.is_whitespace() { self.tokenize(iter) } else { self.tokens.push_back(Err(TokenizeError::InvalidCharacter(c))); + return; } } }