fix tokenizer on string inputs

This commit is contained in:
2024-10-27 22:56:10 -04:00
parent c29f689252
commit 83ac160a42

View File

@@ -137,14 +137,12 @@ impl Token {
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct Tokenizer<R: BufRead> { pub(crate) struct Tokenizer<R: BufRead> {
reader: Arc<Mutex<CodeIter<R>>>, reader: Arc<Mutex<CodeIter<R>>>,
tokens: VecDeque<Token>,
} }
impl<R: BufRead> Tokenizer<R> { impl<R: BufRead> Tokenizer<R> {
pub fn new(reader: Arc<Mutex<CodeIter<R>>>) -> Self { pub fn new(reader: Arc<Mutex<CodeIter<R>>>) -> Self {
Self { Self {
reader, reader,
tokens: VecDeque::new(),
} }
} }
@@ -181,7 +179,7 @@ impl<R: BufRead> Tokenizer<R> {
} }
/// Tokenizes more input and adds them to the internal queue /// Tokenizes more input and adds them to the internal queue
fn tokenize(&mut self) -> Result<(), Error> { fn tokenize(&mut self) -> Result<Option<Token>, Error> {
let operators: HashMap<&'static str, Op> = HashMap::from([ let operators: HashMap<&'static str, Op> = HashMap::from([
("+", Op::Add), ("+", Op::Add),
("-", Op::Sub), ("-", Op::Sub),
@@ -220,7 +218,7 @@ impl<R: BufRead> Tokenizer<R> {
let c = if let Some(c) = self.next_char() { let c = if let Some(c) = self.next_char() {
c c
} else { } else {
return Ok(()); return Ok(None);
}; };
if c.is_alphanumeric() { if c.is_alphanumeric() {
@@ -232,9 +230,7 @@ impl<R: BufRead> Tokenizer<R> {
let (line, column) = self.getpos(); let (line, column) = self.getpos();
self.tokens.push_back(Token::new(TokenType::parse(&token) Ok(Some(Token::new(TokenType::parse(&token).map_err(|e| e.location(line, column - token.len() + 1..column + 1))?, token.clone(), line, column - token.len() + 1)))
.map_err(|e| e.location(line, column - token.len() + 1..column + 1))?, token.clone(), line, column - token.len() + 1));
self.tokenize()
} else if c == '#' { } else if c == '#' {
while self.next_char_if(|&c| c != '\n').is_some() {} while self.next_char_if(|&c| c != '\n').is_some() {}
self.tokenize() self.tokenize()
@@ -267,11 +263,8 @@ impl<R: BufRead> Tokenizer<R> {
let (line, col) = self.getpos(); let (line, col) = self.getpos();
self.tokens.push_back( Ok(Some(Token::new(TokenType::Constant(
Token::new(TokenType::Constant( Value::String(token.clone())), token, line, col)))
Value::String(token.clone())), token, line, col));
self.tokenize()
} else if operators.keys().any(|x| x.starts_with(c)) { } else if operators.keys().any(|x| x.starts_with(c)) {
let mut token = String::from(c); let mut token = String::from(c);
@@ -309,9 +302,7 @@ impl<R: BufRead> Tokenizer<R> {
let token = Token::new(t, token, line, col); let token = Token::new(t, token, line, col);
self.tokens.push_back(token); return Ok(Some(token));
break;
} else { } else {
let next = match self.next_char_if(is_expected) { let next = match self.next_char_if(is_expected) {
Some(c) => c, Some(c) => c,
@@ -344,9 +335,8 @@ impl<R: BufRead> Tokenizer<R> {
let (line, col) = self.getpos(); let (line, col) = self.getpos();
let token = Token::new(t, token, line, col); let token = Token::new(t, token, line, col);
self.tokens.push_back(token); return Ok(Some(token))
break;
} }
}; };
@@ -354,8 +344,6 @@ impl<R: BufRead> Tokenizer<R> {
} }
} }
} }
self.tokenize()
} else if c.is_whitespace() { } else if c.is_whitespace() {
self.tokenize() self.tokenize()
} else { } else {
@@ -372,16 +360,7 @@ impl<R: BufRead> Iterator for Tokenizer<R> {
type Item = Result<Token, Error>; type Item = Result<Token, Error>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if let Some(token) = self.tokens.pop_front() { self.tokenize().transpose()
return Some(Ok(token));
} else {
match self.tokenize() {
Ok(_) => (),
Err(e) => return Some(Err(e)),
};
self.next()
}
} }
} }
@@ -392,7 +371,7 @@ mod tests {
#[test] #[test]
fn a() { fn a() {
let program = ": f a * 12 a f 12"; let program = ": f a * 12 a f 12\n\n";
let tokenizer = Tokenizer::new(Arc::new(Mutex::new(CodeIter::new(Cursor::new(program))))); let tokenizer = Tokenizer::new(Arc::new(Mutex::new(CodeIter::new(Cursor::new(program)))));