major tokenizer fixes
This commit is contained in:
144
src/tokenizer.rs
144
src/tokenizer.rs
@@ -1,6 +1,6 @@
|
||||
use std::iter::Peekable;
|
||||
use std::{error, io};
|
||||
use std::collections::VecDeque;
|
||||
use std::collections::{VecDeque, HashMap};
|
||||
|
||||
use super::Value;
|
||||
use std::fmt::{Display, Formatter};
|
||||
@@ -77,69 +77,41 @@ pub(crate) enum Token {
|
||||
Constant(Value),
|
||||
}
|
||||
|
||||
fn get_dot_count(s: &str) -> Option<usize> {
|
||||
s.chars().fold(Some(0), |acc, c|
|
||||
match c {
|
||||
':' => acc.map(|acc| acc + 2),
|
||||
'.' => acc.map(|acc| acc + 1),
|
||||
_ => None,
|
||||
fn get_dot_count<I: Iterator<Item = char>>(s: &mut Peekable<I>) -> Option<usize> {
|
||||
let mut total = 0;
|
||||
|
||||
while let Some(n) = s.next_if(|&c| c == ':' || c == '.').map(|c| match c {
|
||||
':' => 2,
|
||||
'.' => 1,
|
||||
_ => 0,
|
||||
}) {
|
||||
total += n;
|
||||
}
|
||||
)
|
||||
|
||||
Some(total)
|
||||
}
|
||||
|
||||
impl Token {
|
||||
/// Parse a single token
|
||||
fn parse(s: &str) -> Result<Self, TokenizeError> {
|
||||
let string = regex::Regex::new(r#"".+""#).map_err(|e| TokenizeError::Regex(e))?;
|
||||
let identifier = regex::Regex::new(r#"[A-Za-z_][A-Za-z0-9_']*"#).map_err(|e| TokenizeError::Regex(e))?;
|
||||
let number = regex::Regex::new(r#"([0-9]+\.?[0-9]*)|(\.[0-9])"#).map_err(|e| TokenizeError::Regex(e))?;
|
||||
|
||||
match s {
|
||||
// First check if s is an operator
|
||||
"+" => Ok(Token::Operator(Op::Add)),
|
||||
"-" => Ok(Token::Operator(Op::Sub)),
|
||||
"*" => Ok(Token::Operator(Op::Mul)),
|
||||
"/" => Ok(Token::Operator(Op::Div)),
|
||||
"**" => Ok(Token::Operator(Op::Exp)),
|
||||
"%" => Ok(Token::Operator(Op::Mod)),
|
||||
"=" => Ok(Token::Operator(Op::Equ)),
|
||||
"." => Ok(Token::Operator(Op::LazyEqu)),
|
||||
"~" => Ok(Token::Operator(Op::Compose)),
|
||||
"," => Ok(Token::Operator(Op::Id)),
|
||||
"?" => Ok(Token::Operator(Op::If)),
|
||||
"??" => Ok(Token::Operator(Op::IfElse)),
|
||||
">" => Ok(Token::Operator(Op::GreaterThan)),
|
||||
"<" => Ok(Token::Operator(Op::LessThan)),
|
||||
">=" => Ok(Token::Operator(Op::GreaterThanOrEqualTo)),
|
||||
"<=" => Ok(Token::Operator(Op::LessThanOrEqualTo)),
|
||||
"==" => Ok(Token::Operator(Op::EqualTo)),
|
||||
"[" => Ok(Token::Operator(Op::OpenArray)),
|
||||
"]" => Ok(Token::Operator(Op::CloseArray)),
|
||||
|
||||
// then some keywords
|
||||
// Match keywords first
|
||||
"true" => Ok(Token::Constant(Value::Bool(true))),
|
||||
"false" => Ok(Token::Constant(Value::Bool(false))),
|
||||
"not" => Ok(Token::Operator(Op::Not)),
|
||||
|
||||
// Type casting
|
||||
"int" => Ok(Token::Operator(Op::IntCast)),
|
||||
"float" => Ok(Token::Operator(Op::FloatCast)),
|
||||
"bool" => Ok(Token::Operator(Op::BoolCast)),
|
||||
"string" => Ok(Token::Operator(Op::StringCast)),
|
||||
|
||||
// misc
|
||||
"print" => Ok(Token::Operator(Op::Print)),
|
||||
"empty" => Ok(Token::Operator(Op::Empty)),
|
||||
|
||||
// then variable length keywords
|
||||
// then identifiers and numbers
|
||||
_ => {
|
||||
if s.starts_with(":") {
|
||||
Ok(Token::Operator(Op::FunctionDeclare(
|
||||
get_dot_count(s).map(|x| x - 1).ok_or(TokenizeError::InvalidDynamicOperator(s.to_string()))?
|
||||
)))
|
||||
} else if string.is_match(s) {
|
||||
Ok(Token::Constant(Value::String(s[1..s.len() - 1].to_string())))
|
||||
} else if identifier.is_match(s) {
|
||||
if identifier.is_match(s) {
|
||||
Ok(Token::Identifier(s.to_string()))
|
||||
} else if number.is_match(s) {
|
||||
if let Ok(int) = s.parse::<i64>() {
|
||||
@@ -173,7 +145,28 @@ impl<R: BufRead> Tokenizer<R> {
|
||||
|
||||
/// Tokenizes more input and adds them to the internal queue
|
||||
fn tokenize<I: Iterator<Item = char>>(&mut self, mut iter: Peekable<I>) {
|
||||
const OPERATOR_CHARS: &'static str = "!@$%^&*()-=+[]{}|;:,<.>/?";
|
||||
let operators: HashMap<&'static str, Op> = HashMap::from([
|
||||
("+", Op::Add),
|
||||
("-", Op::Sub),
|
||||
("*", Op::Mul),
|
||||
("/", Op::Div),
|
||||
("**", Op::Exp),
|
||||
("%", Op::Mod),
|
||||
("=", Op::Equ),
|
||||
(".", Op::LazyEqu),
|
||||
(":", Op::FunctionDeclare(1)),
|
||||
("~", Op::Compose),
|
||||
(",", Op::Id),
|
||||
("?", Op::If),
|
||||
("??", Op::IfElse),
|
||||
(">", Op::GreaterThan),
|
||||
("<", Op::LessThan),
|
||||
(">=", Op::GreaterThanOrEqualTo),
|
||||
("<=", Op::LessThanOrEqualTo),
|
||||
("==", Op::EqualTo),
|
||||
("[", Op::OpenArray),
|
||||
("]", Op::CloseArray),
|
||||
]);
|
||||
|
||||
let c = if let Some(c) = iter.next() {
|
||||
c
|
||||
@@ -188,19 +181,9 @@ impl<R: BufRead> Tokenizer<R> {
|
||||
token.push(c);
|
||||
}
|
||||
|
||||
self.tokens.push_back(Token::parse(&token));
|
||||
self.tokenize(iter)
|
||||
} else if OPERATOR_CHARS.contains(c) {
|
||||
let mut token = String::from(c);
|
||||
|
||||
while let Some(c) = iter.next_if(|&c| OPERATOR_CHARS.contains(c)) {
|
||||
token.push(c);
|
||||
}
|
||||
|
||||
self.tokens.push_back(Token::parse(&token));
|
||||
self.tokenize(iter)
|
||||
} else if c == '#' {
|
||||
// consume comments
|
||||
let _: String = iter.by_ref().take_while(|&c| c != '\n').collect();
|
||||
} else if c == '\"' {
|
||||
let mut token = String::new();
|
||||
@@ -229,11 +212,64 @@ impl<R: BufRead> Tokenizer<R> {
|
||||
}
|
||||
|
||||
self.tokens.push_back(Ok(Token::Constant(Value::String(token))));
|
||||
self.tokenize(iter)
|
||||
} else if operators.keys().any(|x| x.starts_with(c)) {
|
||||
let mut token = String::from(c);
|
||||
|
||||
loop {
|
||||
let possible: HashMap<&'static str, Op> = operators
|
||||
.clone().into_iter()
|
||||
.filter(|(key, _)| key.starts_with(&token))
|
||||
.collect();
|
||||
|
||||
let is_expected = |c: &char|
|
||||
possible.iter().any(|(op, _)| match op.chars().nth(token.len()) {
|
||||
Some(i) => *c == i,
|
||||
None => false,
|
||||
});
|
||||
|
||||
match possible.len() {
|
||||
1 => {
|
||||
self.tokens.push_back(Ok(Token::Operator(match possible.get(token.as_str()).unwrap().clone() {
|
||||
Op::FunctionDeclare(n) => {
|
||||
let count = match get_dot_count(&mut iter) {
|
||||
Some(count) => count,
|
||||
None => {
|
||||
self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token)));
|
||||
return;
|
||||
}
|
||||
};
|
||||
Op::FunctionDeclare(n + count)
|
||||
}
|
||||
op => op,
|
||||
})));
|
||||
break;
|
||||
}
|
||||
0 => {
|
||||
self.tokens.push_back(Err(TokenizeError::InvalidCharacter(c)));
|
||||
return;
|
||||
}
|
||||
_ => {
|
||||
let next = match iter.next_if(is_expected) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
// at this point, token must be in the hashmap possible, otherwise it wouldnt have any matches
|
||||
self.tokens.push_back(Ok(Token::Operator(possible.get(token.as_str()).unwrap().clone())));
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
token.push(next);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.tokenize(iter)
|
||||
} else if c.is_whitespace() {
|
||||
self.tokenize(iter)
|
||||
} else {
|
||||
self.tokens.push_back(Err(TokenizeError::InvalidCharacter(c)));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user