fix order so keywords get parsed correctly
This commit is contained in:
110
src/tokenizer.rs
110
src/tokenizer.rs
@@ -90,59 +90,57 @@ impl Token {
|
|||||||
let identifier = regex::Regex::new(r#"[A-Za-z_][A-Za-z0-9_']*"#).map_err(|e| TokenizeError::Regex(e))?;
|
let identifier = regex::Regex::new(r#"[A-Za-z_][A-Za-z0-9_']*"#).map_err(|e| TokenizeError::Regex(e))?;
|
||||||
let number = regex::Regex::new(r#"([0-9]+\.?[0-9]*)|(\.[0-9])"#).map_err(|e| TokenizeError::Regex(e))?;
|
let number = regex::Regex::new(r#"([0-9]+\.?[0-9]*)|(\.[0-9])"#).map_err(|e| TokenizeError::Regex(e))?;
|
||||||
|
|
||||||
if string.is_match(s) {
|
match s {
|
||||||
Ok(Token::Constant(Value::String(s[1..s.len() - 1].to_string())))
|
// First check if s is an operator
|
||||||
} else if identifier.is_match(s) {
|
"+" => Ok(Token::Operator(Op::Add)),
|
||||||
Ok(Token::Identifier(s.to_string()))
|
"-" => Ok(Token::Operator(Op::Sub)),
|
||||||
} else if number.is_match(s) {
|
"*" => Ok(Token::Operator(Op::Mul)),
|
||||||
if let Ok(int) = s.parse::<i64>() {
|
"/" => Ok(Token::Operator(Op::Div)),
|
||||||
Ok(Token::Constant(Value::Int(int)))
|
"**" => Ok(Token::Operator(Op::Exp)),
|
||||||
} else if let Ok(float) = s.parse::<f64>() {
|
"%" => Ok(Token::Operator(Op::Mod)),
|
||||||
Ok(Token::Constant(Value::Float(float)))
|
"=" => Ok(Token::Operator(Op::Equ)),
|
||||||
} else {
|
"." => Ok(Token::Operator(Op::LazyEqu)),
|
||||||
Err(TokenizeError::InvalidNumericConstant(s.to_string()))
|
"~" => Ok(Token::Operator(Op::Compose)),
|
||||||
}
|
"," => Ok(Token::Operator(Op::Id)),
|
||||||
} else {
|
"?" => Ok(Token::Operator(Op::If)),
|
||||||
match s {
|
"??" => Ok(Token::Operator(Op::IfElse)),
|
||||||
// First check if s is an operator
|
">" => Ok(Token::Operator(Op::GreaterThan)),
|
||||||
"+" => Ok(Token::Operator(Op::Add)),
|
"<" => Ok(Token::Operator(Op::LessThan)),
|
||||||
"-" => Ok(Token::Operator(Op::Sub)),
|
">=" => Ok(Token::Operator(Op::GreaterThanOrEqualTo)),
|
||||||
"*" => Ok(Token::Operator(Op::Mul)),
|
"<=" => Ok(Token::Operator(Op::LessThanOrEqualTo)),
|
||||||
"/" => Ok(Token::Operator(Op::Div)),
|
"==" => Ok(Token::Operator(Op::EqualTo)),
|
||||||
"**" => Ok(Token::Operator(Op::Exp)),
|
|
||||||
"%" => Ok(Token::Operator(Op::Mod)),
|
|
||||||
"=" => Ok(Token::Operator(Op::Equ)),
|
|
||||||
"." => Ok(Token::Operator(Op::LazyEqu)),
|
|
||||||
"~" => Ok(Token::Operator(Op::Compose)),
|
|
||||||
"," => Ok(Token::Operator(Op::Id)),
|
|
||||||
"?" => Ok(Token::Operator(Op::If)),
|
|
||||||
"??" => Ok(Token::Operator(Op::IfElse)),
|
|
||||||
">" => Ok(Token::Operator(Op::GreaterThan)),
|
|
||||||
"<" => Ok(Token::Operator(Op::LessThan)),
|
|
||||||
">=" => Ok(Token::Operator(Op::GreaterThanOrEqualTo)),
|
|
||||||
"<=" => Ok(Token::Operator(Op::LessThanOrEqualTo)),
|
|
||||||
"==" => Ok(Token::Operator(Op::EqualTo)),
|
|
||||||
|
|
||||||
// then some keywords
|
// then some keywords
|
||||||
"true" => Ok(Token::Constant(Value::Bool(true))),
|
"true" => Ok(Token::Constant(Value::Bool(true))),
|
||||||
"false" => Ok(Token::Constant(Value::Bool(false))),
|
"false" => Ok(Token::Constant(Value::Bool(false))),
|
||||||
"not" => Ok(Token::Operator(Op::Not)),
|
"not" => Ok(Token::Operator(Op::Not)),
|
||||||
|
|
||||||
// Type casting
|
// Type casting
|
||||||
"int" => Ok(Token::Operator(Op::IntCast)),
|
"int" => Ok(Token::Operator(Op::IntCast)),
|
||||||
"float" => Ok(Token::Operator(Op::FloatCast)),
|
"float" => Ok(Token::Operator(Op::FloatCast)),
|
||||||
"bool" => Ok(Token::Operator(Op::BoolCast)),
|
"bool" => Ok(Token::Operator(Op::BoolCast)),
|
||||||
"string" => Ok(Token::Operator(Op::StringCast)),
|
"string" => Ok(Token::Operator(Op::StringCast)),
|
||||||
|
|
||||||
// then variable length keywords
|
// then variable length keywords
|
||||||
_ => {
|
_ => {
|
||||||
if s.starts_with(":") {
|
if s.starts_with(":") {
|
||||||
Ok(Token::Operator(Op::FunctionDeclare(
|
Ok(Token::Operator(Op::FunctionDeclare(
|
||||||
get_dot_count(s).map(|x| x - 1).ok_or(TokenizeError::InvalidDynamicOperator(s.to_string()))?
|
get_dot_count(s).map(|x| x - 1).ok_or(TokenizeError::InvalidDynamicOperator(s.to_string()))?
|
||||||
)))
|
)))
|
||||||
|
} else if string.is_match(s) {
|
||||||
|
Ok(Token::Constant(Value::String(s[1..s.len() - 1].to_string())))
|
||||||
|
} else if identifier.is_match(s) {
|
||||||
|
Ok(Token::Identifier(s.to_string()))
|
||||||
|
} else if number.is_match(s) {
|
||||||
|
if let Ok(int) = s.parse::<i64>() {
|
||||||
|
Ok(Token::Constant(Value::Int(int)))
|
||||||
|
} else if let Ok(float) = s.parse::<f64>() {
|
||||||
|
Ok(Token::Constant(Value::Float(float)))
|
||||||
} else {
|
} else {
|
||||||
Err(TokenizeError::UnableToMatchToken(s.to_string()))
|
Err(TokenizeError::InvalidNumericConstant(s.to_string()))
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
Err(TokenizeError::UnableToMatchToken(s.to_string()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -259,19 +257,3 @@ impl<R: BufRead> std::iter::Iterator for Tokenizer<R> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use std::str::FromStr;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn tokenizer() {
|
|
||||||
let program = ": function x ** x 2 function 1200";
|
|
||||||
|
|
||||||
let tok = Tokenizer::from_str(program).unwrap();
|
|
||||||
let tokens: Vec<Token> = tok.collect::<Result<_, TokenizeError>>().expect("tokenizer error");
|
|
||||||
|
|
||||||
println!("{tokens:?}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user