fix order so keywords get parsed correctly

This commit is contained in:
2024-10-14 23:58:30 -04:00
parent a1b76956dc
commit 7e8009617e

View File

@@ -90,59 +90,57 @@ impl Token {
let identifier = regex::Regex::new(r#"[A-Za-z_][A-Za-z0-9_']*"#).map_err(|e| TokenizeError::Regex(e))?; let identifier = regex::Regex::new(r#"[A-Za-z_][A-Za-z0-9_']*"#).map_err(|e| TokenizeError::Regex(e))?;
let number = regex::Regex::new(r#"([0-9]+\.?[0-9]*)|(\.[0-9])"#).map_err(|e| TokenizeError::Regex(e))?; let number = regex::Regex::new(r#"([0-9]+\.?[0-9]*)|(\.[0-9])"#).map_err(|e| TokenizeError::Regex(e))?;
if string.is_match(s) { match s {
Ok(Token::Constant(Value::String(s[1..s.len() - 1].to_string()))) // First check if s is an operator
} else if identifier.is_match(s) { "+" => Ok(Token::Operator(Op::Add)),
Ok(Token::Identifier(s.to_string())) "-" => Ok(Token::Operator(Op::Sub)),
} else if number.is_match(s) { "*" => Ok(Token::Operator(Op::Mul)),
if let Ok(int) = s.parse::<i64>() { "/" => Ok(Token::Operator(Op::Div)),
Ok(Token::Constant(Value::Int(int))) "**" => Ok(Token::Operator(Op::Exp)),
} else if let Ok(float) = s.parse::<f64>() { "%" => Ok(Token::Operator(Op::Mod)),
Ok(Token::Constant(Value::Float(float))) "=" => Ok(Token::Operator(Op::Equ)),
} else { "." => Ok(Token::Operator(Op::LazyEqu)),
Err(TokenizeError::InvalidNumericConstant(s.to_string())) "~" => Ok(Token::Operator(Op::Compose)),
} "," => Ok(Token::Operator(Op::Id)),
} else { "?" => Ok(Token::Operator(Op::If)),
match s { "??" => Ok(Token::Operator(Op::IfElse)),
// First check if s is an operator ">" => Ok(Token::Operator(Op::GreaterThan)),
"+" => Ok(Token::Operator(Op::Add)), "<" => Ok(Token::Operator(Op::LessThan)),
"-" => Ok(Token::Operator(Op::Sub)), ">=" => Ok(Token::Operator(Op::GreaterThanOrEqualTo)),
"*" => Ok(Token::Operator(Op::Mul)), "<=" => Ok(Token::Operator(Op::LessThanOrEqualTo)),
"/" => Ok(Token::Operator(Op::Div)), "==" => Ok(Token::Operator(Op::EqualTo)),
"**" => Ok(Token::Operator(Op::Exp)),
"%" => Ok(Token::Operator(Op::Mod)), // then some keywords
"=" => Ok(Token::Operator(Op::Equ)), "true" => Ok(Token::Constant(Value::Bool(true))),
"." => Ok(Token::Operator(Op::LazyEqu)), "false" => Ok(Token::Constant(Value::Bool(false))),
"~" => Ok(Token::Operator(Op::Compose)), "not" => Ok(Token::Operator(Op::Not)),
"," => Ok(Token::Operator(Op::Id)),
"?" => Ok(Token::Operator(Op::If)), // Type casting
"??" => Ok(Token::Operator(Op::IfElse)), "int" => Ok(Token::Operator(Op::IntCast)),
">" => Ok(Token::Operator(Op::GreaterThan)), "float" => Ok(Token::Operator(Op::FloatCast)),
"<" => Ok(Token::Operator(Op::LessThan)), "bool" => Ok(Token::Operator(Op::BoolCast)),
">=" => Ok(Token::Operator(Op::GreaterThanOrEqualTo)), "string" => Ok(Token::Operator(Op::StringCast)),
"<=" => Ok(Token::Operator(Op::LessThanOrEqualTo)),
"==" => Ok(Token::Operator(Op::EqualTo)), // then variable length keywords
_ => {
// then some keywords if s.starts_with(":") {
"true" => Ok(Token::Constant(Value::Bool(true))), Ok(Token::Operator(Op::FunctionDeclare(
"false" => Ok(Token::Constant(Value::Bool(false))), get_dot_count(s).map(|x| x - 1).ok_or(TokenizeError::InvalidDynamicOperator(s.to_string()))?
"not" => Ok(Token::Operator(Op::Not)), )))
} else if string.is_match(s) {
// Type casting Ok(Token::Constant(Value::String(s[1..s.len() - 1].to_string())))
"int" => Ok(Token::Operator(Op::IntCast)), } else if identifier.is_match(s) {
"float" => Ok(Token::Operator(Op::FloatCast)), Ok(Token::Identifier(s.to_string()))
"bool" => Ok(Token::Operator(Op::BoolCast)), } else if number.is_match(s) {
"string" => Ok(Token::Operator(Op::StringCast)), if let Ok(int) = s.parse::<i64>() {
Ok(Token::Constant(Value::Int(int)))
// then variable length keywords } else if let Ok(float) = s.parse::<f64>() {
_ => { Ok(Token::Constant(Value::Float(float)))
if s.starts_with(":") {
Ok(Token::Operator(Op::FunctionDeclare(
get_dot_count(s).map(|x| x - 1).ok_or(TokenizeError::InvalidDynamicOperator(s.to_string()))?
)))
} else { } else {
Err(TokenizeError::UnableToMatchToken(s.to_string())) Err(TokenizeError::InvalidNumericConstant(s.to_string()))
} }
} else {
Err(TokenizeError::UnableToMatchToken(s.to_string()))
} }
} }
} }
@@ -258,20 +256,4 @@ impl<R: BufRead> std::iter::Iterator for Tokenizer<R> {
Err(e) => Some(Err(TokenizeError::IO(e))), Err(e) => Some(Err(TokenizeError::IO(e))),
} }
} }
} }
#[cfg(test)]
mod tests {
use super::*;
use std::str::FromStr;
#[test]
fn tokenizer() {
let program = ": function x ** x 2 function 1200";
let tok = Tokenizer::from_str(program).unwrap();
let tokens: Vec<Token> = tok.collect::<Result<_, TokenizeError>>().expect("tokenizer error");
println!("{tokens:?}");
}
}