diff --git a/src/lib.rs b/src/lib.rs index 94f4ef8..efeb75f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -95,7 +95,7 @@ impl Value { } impl Display for Value { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Float(x) => write!(f, "{x}"), Self::Int(x) => write!(f, "{x}"), @@ -182,9 +182,9 @@ pub struct Runtime<'a, R: BufRead> { } impl<'a, R: BufRead> Runtime<'a, R> { - pub fn new(reader: R) -> Self { + pub fn new(reader: R, name: &str) -> Self { Self { - tokenizer: Tokenizer::new(reader).peekable(), + tokenizer: Tokenizer::new(reader, name).peekable(), global_types: HashMap::new(), globals: HashMap::new(), parser: None, diff --git a/src/main.rs b/src/main.rs index bf68de1..5e96a83 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,12 @@ use std::io::{self, BufReader}; fn main() { - let mut runtime = lamm::Runtime::new(BufReader::new(io::stdin())); + let mut runtime = lamm::Runtime::new(BufReader::new(io::stdin()), ""); for value in runtime.values() { match value { Ok(v) => println!("=> {v}"), - Err(e) => eprintln!("error: {e}"), + Err(e) => eprintln!("{e}"), } } } diff --git a/src/parser.rs b/src/parser.rs index 61093ef..9ae07c1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,7 +2,7 @@ use crate::executor::Executor; use super::{Value, Type, Function, FunctionType}; -use super::tokenizer::{Token, TokenizeError, Op}; +use super::tokenizer::{Token, TokenType, TokenizeError, Op}; use std::borrow::BorrowMut; use std::error; @@ -14,7 +14,7 @@ use std::iter::Peekable; pub enum ParseError { NoInput, UnexpectedEndInput, - IdentifierUndefined(String), + IdentifierUndefined(Token), InvalidIdentifier(Token), UnmatchedArrayClose, UnwantedToken(Token), @@ -27,7 +27,7 @@ impl Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { ParseError::UnexpectedEndInput => write!(f, "Input ended unexpectedly"), - ParseError::IdentifierUndefined(name) => write!(f, "Undefined identifier `{name}`"), + ParseError::IdentifierUndefined(name) => write!(f, "Undefined identifier `{}` {}:{}:{}", name.lexeme, name.file, name.line, name.location.start), ParseError::InvalidIdentifier(t) => write!(f, "Invalid identifier `{t:?}`"), ParseError::NoInput => write!(f, "No input given"), ParseError::UnmatchedArrayClose => write!(f, "there was an unmatched array closing operator `]`"), @@ -114,12 +114,11 @@ impl<'a, I: Iterator>> Parser<'a, I> { self } - fn get_object_type(&self, ident: &String) -> Result<&Type, ParseError> { + fn get_object_type(&self, ident: &String) -> Option<&Type> { self.locals.get(ident).or(self.globals.get(ident)) - .ok_or(ParseError::IdentifierUndefined(ident.clone())) } - fn _get_object_types>(&self, items: Names) -> impl Iterator> { + fn _get_object_types>(&self, items: Names) -> impl Iterator> { items.map(|x| self.get_object_type(&x)) } @@ -194,10 +193,10 @@ impl<'a, I: Iterator>> Parser<'a, I> { .ok_or(ParseError::NoInput)? .map_err(|e| ParseError::TokenizeError(e))?; - match token { - Token::Constant(c) => Ok(ParseTree::Value(c)), - Token::Identifier(ident) => { - match self.get_object_type(&ident)? { + match token.token() { + TokenType::Constant(c) => Ok(ParseTree::Value(c)), + TokenType::Identifier(ident) => { + match self.get_object_type(&ident).ok_or(ParseError::IdentifierUndefined(token))? { Type::Function(f) => { let f = f.clone(); let args = self.get_args(f.1.len())?; @@ -228,20 +227,23 @@ impl<'a, I: Iterator>> Parser<'a, I> { _ => Ok(ParseTree::Variable(ident)), } }, - Token::Operator(op) => match op { + TokenType::Operator(op) => match op { Op::OpenArray => { let mut depth = 1; // take tokens until we reach the end of this array // if we don't collect them here it causes rust to overflow computing the types let array_tokens = self.tokens.by_ref().take_while(|t| match t { - Ok(Token::Operator(Op::OpenArray)) => { - depth += 1; - true - }, - Ok(Token::Operator(Op::CloseArray)) => { - depth -= 1; - depth > 0 + Ok(t) => match t.token() { + TokenType::Operator(Op::OpenArray) => { + depth += 1; + true + }, + TokenType::Operator(Op::CloseArray) => { + depth -= 1; + depth > 0 + } + _ => true, } _ => true, }).collect::, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?; @@ -270,13 +272,16 @@ impl<'a, I: Iterator>> Parser<'a, I> { // take tokens until we reach the end of this array // if we don't collect them here it causes rust to overflow computing the types let array_tokens = self.tokens.by_ref().take_while(|t| match t { - Ok(Token::Operator(Op::OpenStatement)) => { - depth += 1; - true - }, - Ok(Token::Operator(Op::CloseStatement)) => { - depth -= 1; - depth > 0 + Ok(t) => match t.token() { + TokenType::Operator(Op::OpenStatement) => { + depth += 1; + true + }, + TokenType::Operator(Op::CloseStatement) => { + depth -= 1; + depth > 0 + } + _ => true, } _ => true, }).collect::, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?; @@ -304,7 +309,7 @@ impl<'a, I: Iterator>> Parser<'a, I> { let body = Box::new(self.parse()?); - if let Token::Identifier(ident) = token { + if let TokenType::Identifier(ident) = token.token() { match op { Op::Equ => Ok(ParseTree::Equ( ident.clone(), @@ -359,10 +364,7 @@ impl<'a, I: Iterator>> Parser<'a, I> { }); for name in names.clone() { - let t = match self.locals.remove(&name).ok_or(ParseError::IdentifierUndefined(name.clone())) { - Ok(t) => t, - Err(e) => return Err(e), - }; + let t = self.locals.remove(&name).ok_or(ParseError::IdentifierUndefined(token.clone()))?; self.globals.insert(name, t); } @@ -392,7 +394,7 @@ impl<'a, I: Iterator>> Parser<'a, I> { }, op => self.parse_operator(op), }, - t => Err(ParseError::UnwantedToken(t)), + _ => Err(ParseError::UnwantedToken(token)), } } @@ -436,7 +438,8 @@ impl<'a, I: Iterator>> Parser<'a, I> { .collect::>()?; let (types, names): (Vec<_>, Vec<_>) = args.into_iter().unzip(); - let ret = if tokens.next_if(|x| matches!(x, Ok(Token::Operator(Op::Arrow)))).is_some() { + + let ret = if tokens.next_if(|x| matches!(x.as_ref().unwrap().token(), TokenType::Operator(Op::Arrow))).is_some() { Self::parse_type(tokens)? } else { Type::Any @@ -445,15 +448,16 @@ impl<'a, I: Iterator>> Parser<'a, I> { Ok((FunctionType(Box::new(ret), types), names)) } - fn parse_function_declaration_parameter( - mut tokens: &mut Peekable) -> Result<(Type, String), ParseError> + fn parse_function_declaration_parameter(mut tokens: &mut Peekable) -> Result<(Type, String), ParseError> { - match tokens.next() { + let token = tokens.next().ok_or(ParseError::UnexpectedEndInput)?.map_err(|e| ParseError::TokenizeError(e))?; + + match token.token() { // untyped variable - Some(Ok(Token::Identifier(x))) => Ok((Type::Any, x)), + TokenType::Identifier(x) => Ok((Type::Any, x)), // typed variable - Some(Ok(Token::Operator(Op::TypeDeclaration))) => { + TokenType::Operator(Op::TypeDeclaration) => { let name = Self::get_identifier(tokens.next())?; let t = Self::parse_type(&mut tokens)?; @@ -461,7 +465,7 @@ impl<'a, I: Iterator>> Parser<'a, I> { } // untyped function (all args Any, return type Any) - Some(Ok(Token::Operator(Op::FunctionDefine(n)))) => { + TokenType::Operator(Op::FunctionDefine(n)) => { let name = Self::get_identifier(tokens.next())?; let args = (0..n).map(|_| Type::Any).collect(); @@ -469,7 +473,7 @@ impl<'a, I: Iterator>> Parser<'a, I> { } // typed function - Some(Ok(Token::Operator(Op::FunctionDeclare(n)))) => { + TokenType::Operator(Op::FunctionDeclare(n)) => { let name = Self::get_identifier(tokens.next())?; let args = (0..n).map(|_| Self::parse_type(&mut tokens)).collect::>()?; let mut ret = Type::Any; @@ -477,7 +481,7 @@ impl<'a, I: Iterator>> Parser<'a, I> { // this is annoying // inside the next_if closure, we already can know that its an error // and return it, but we cannot return out of a closure - if let Some(t) = tokens.next_if(|x| matches!(x, Ok(Token::Operator(Op::Arrow)))) + if let Some(t) = tokens.next_if(|x| matches!(x.as_ref().unwrap().token(), TokenType::Operator(Op::Arrow))) { // so we just check for an error here. this is the only reason t exists. if let Err(e) = t { @@ -489,31 +493,33 @@ impl<'a, I: Iterator>> Parser<'a, I> { Ok((Type::Function(FunctionType(Box::new(ret), args)), name)) } - - Some(Ok(t)) => Err(ParseError::UnwantedToken(t)), - Some(Err(e)) => Err(ParseError::TokenizeError(e)), - None => Err(ParseError::UnexpectedEndInput), + _ => Err(ParseError::UnwantedToken(token)), } } // for some dumbass reason, // this is the only code that breaks if it doesn't take an impl Iterator instead of simply I ... fn parse_type(tokens: &mut Peekable>>) -> Result { - match tokens.next() { - Some(Ok(Token::Type(t))) => Ok(t), - Some(Ok(Token::Operator(Op::OpenArray))) => { + let token = tokens.next().ok_or(ParseError::UnexpectedEndInput)?.map_err(|e| ParseError::TokenizeError(e))?; + + match token.token() { + TokenType::Type(t) => Ok(t), + TokenType::Operator(Op::OpenArray) => { let mut depth = 1; // take tokens until we reach the end of this array // if we don't collect them here it causes rust to overflow computing the types let array_tokens = tokens.by_ref().take_while(|t| match t { - Ok(Token::Operator(Op::OpenArray)) => { - depth += 1; - true - }, - Ok(Token::Operator(Op::CloseArray)) => { - depth -= 1; - depth > 0 + Ok(t) => match t.token() { + TokenType::Operator(Op::OpenStatement) => { + depth += 1; + true + }, + TokenType::Operator(Op::CloseStatement) => { + depth -= 1; + depth > 0 + } + _ => true, } _ => true, }).collect::, TokenizeError>>().map_err(|e| ParseError::TokenizeError(e))?; @@ -537,19 +543,17 @@ impl<'a, I: Iterator>> Parser<'a, I> { Ok(Type::Array(Box::new(t))) }, - Some(Ok(t)) => Err(ParseError::UnwantedToken(t.clone())), - Some(Err(e)) => Err(ParseError::TokenizeError(e)), - None => Err(ParseError::UnexpectedEndInput), + _ => Err(ParseError::UnwantedToken(token)), } } fn get_identifier(t: Option>) -> Result { - match t.ok_or(ParseError::UnexpectedEndInput)? - .map_err(|e| ParseError::TokenizeError(e)) - { - Ok(Token::Identifier(ident)) => Ok(ident), - Ok(t) => Err(ParseError::InvalidIdentifier(t)), - Err(e) => Err(e), + let token = t.ok_or(ParseError::UnexpectedEndInput)? + .map_err(|e| ParseError::TokenizeError(e))?; + + match token.token() { + TokenType::Identifier(ident) => Ok(ident), + _ => Err(ParseError::InvalidIdentifier(token)), } } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 6af8bca..3e5269d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -6,7 +6,9 @@ use crate::Type; use super::Value; use std::fmt::{Display, Formatter}; -use std::io::{BufRead, Cursor}; +use std::io::BufRead; +use std::sync::Arc; +use std::ops::Range; #[derive(Debug)] pub enum TokenizeError { @@ -89,93 +91,164 @@ pub enum Op { } #[derive(Debug, Clone, PartialEq)] -pub enum Token { +pub enum TokenType { Identifier(String), Operator(Op), Constant(Value), Type(Type), } -fn get_dot_count>(s: &mut Peekable) -> Option { - let mut total = 0; - - while let Some(n) = s.next_if(|&c| c == ':' || c == '.').map(|c| match c { - ':' => 2, - '.' => 1, - _ => 0, - }) { - total += n; - } - - Some(total) -} - -impl Token { +impl TokenType { /// Parse a single token fn parse(s: &str) -> Result { let identifier = regex::Regex::new(r#"[A-Za-z_][A-Za-z0-9_']*"#).map_err(|e| TokenizeError::Regex(e))?; let number = regex::Regex::new(r#"([0-9]+\.?[0-9]*)|(\.[0-9])"#).map_err(|e| TokenizeError::Regex(e))?; - match s { + Ok(match s { // Match keywords first - "true" => Ok(Token::Constant(Value::Bool(true))), - "false" => Ok(Token::Constant(Value::Bool(false))), - "nil" => Ok(Token::Constant(Value::Nil)), - "int" => Ok(Token::Operator(Op::IntCast)), - "float" => Ok(Token::Operator(Op::FloatCast)), - "bool" => Ok(Token::Operator(Op::BoolCast)), - "string" => Ok(Token::Operator(Op::StringCast)), - "print" => Ok(Token::Operator(Op::Print)), - "empty" => Ok(Token::Operator(Op::Empty)), - "head" => Ok(Token::Operator(Op::Head)), - "tail" => Ok(Token::Operator(Op::Tail)), - "init" => Ok(Token::Operator(Op::Init)), - "fini" => Ok(Token::Operator(Op::Fini)), - "export" => Ok(Token::Operator(Op::Export)), + "true" => TokenType::Constant(Value::Bool(true)), + "false" => TokenType::Constant(Value::Bool(false)), + "nil" => TokenType::Constant(Value::Nil), + "int" => TokenType::Operator(Op::IntCast), + "float" => TokenType::Operator(Op::FloatCast), + "bool" => TokenType::Operator(Op::BoolCast), + "string" => TokenType::Operator(Op::StringCast), + "print" => TokenType::Operator(Op::Print), + "empty" => TokenType::Operator(Op::Empty), + "head" => TokenType::Operator(Op::Head), + "tail" => TokenType::Operator(Op::Tail), + "init" => TokenType::Operator(Op::Init), + "fini" => TokenType::Operator(Op::Fini), + "export" => TokenType::Operator(Op::Export), // Types - "Any" => Ok(Token::Type(Type::Any)), - "Int" => Ok(Token::Type(Type::Int)), - "Float" => Ok(Token::Type(Type::Float)), - "Bool" => Ok(Token::Type(Type::Bool)), - "String" => Ok(Token::Type(Type::String)), + "Any" => TokenType::Type(Type::Any), + "Int" => TokenType::Type(Type::Int), + "Float" => TokenType::Type(Type::Float), + "Bool" => TokenType::Type(Type::Bool), + "String" => TokenType::Type(Type::String), // then identifiers and numbers _ => { if identifier.is_match(s) { - Ok(Token::Identifier(s.to_string())) + TokenType::Identifier(s.to_string()) } else if number.is_match(s) { if let Ok(int) = s.parse::() { - Ok(Token::Constant(Value::Int(int))) + TokenType::Constant(Value::Int(int)) } else if let Ok(float) = s.parse::() { - Ok(Token::Constant(Value::Float(float))) + TokenType::Constant(Value::Float(float)) } else { - Err(TokenizeError::InvalidNumericConstant(s.to_string())) + return Err(TokenizeError::InvalidNumericConstant(s.to_string())); } } else { - Err(TokenizeError::UnableToMatchToken(s.to_string())) + return Err(TokenizeError::UnableToMatchToken(s.to_string())); } } + }) + } +} + +#[derive(Debug, Clone)] +pub struct Token { + t: TokenType, + pub lexeme: String, + pub line: usize, + pub file: Arc, + pub location: Range, +} + +impl Token { + pub fn new(t: TokenType, lexeme: String, file: Arc, line: usize, column: usize) -> Self { + Self { + t, + line, + file, + location: column..column+lexeme.len(), + lexeme, } } + + pub fn token(&self) -> TokenType { + self.t.clone() + } } /// Tokenize an input stream of source code for a Parser pub(crate) struct Tokenizer { reader: R, - tokens: VecDeque>, + line: usize, + column: usize, + code: String, + filename: Arc, + tokens: VecDeque, } impl Tokenizer { - pub fn new(reader: R) -> Self { + pub fn new(reader: R, filename: &str) -> Self { Self { reader, + line: 0, + column: 0, + filename: Arc::new(filename.to_string()), + code: String::new(), tokens: VecDeque::new(), } } + fn get_dot_count>(&mut self, s: &mut Peekable) -> Option { + let mut total = 0; + + while let Some(n) = self.next_char_if(s, |&c| c == ':' || c == '.').map(|c| match c { + ':' => 2, + '.' => 1, + _ => 0, + }) { + total += n; + } + + Some(total) + } + + fn next_char>(&mut self, iter: &mut Peekable) -> Option { + if let Some(c) = iter.next() { + self.column += 1; + Some(c) + } else { + None + } + } + + fn next_char_if>( + &mut self, + iter: &mut Peekable, + pred: impl FnOnce(&char) -> bool) -> Option + { + if let Some(c) = iter.next_if(pred) { + self.column += 1; + Some(c) + } else { + None + } + } + + fn next_char_while>( + &mut self, + iter: &mut Peekable, + mut pred: impl FnMut(&char) -> bool) -> Option + { + if let Some(c) = self.next_char(iter) { + if (pred)(&c) { + Some(c) + } else { + None + } + } else { + None + } + } + /// Tokenizes more input and adds them to the internal queue - fn tokenize>(&mut self, mut iter: Peekable) { + fn tokenize>(&mut self, mut iter: Peekable) -> Result<(), TokenizeError> { let operators: HashMap<&'static str, Op> = HashMap::from([ ("+", Op::Add), ("-", Op::Sub), @@ -211,33 +284,31 @@ impl Tokenizer { ("\\", Op::NonCall), ]); - let c = if let Some(c) = iter.next() { + let c = if let Some(c) = self.next_char(&mut iter) { c } else { - return; + return Ok(()); }; if c.is_alphanumeric() { let mut token = String::from(c); - while let Some(c) = iter.next_if(|&c| c.is_alphanumeric() || c == '.' || c == '\'') { + while let Some(c) = self.next_char_if(&mut iter, |&c| c.is_alphanumeric() || c == '.' || c == '\'') { token.push(c); } - self.tokens.push_back(Token::parse(&token)); + self.tokens.push_back(Token::new(TokenType::parse(&token)?, token, self.filename.clone(), self.line, self.column)); self.tokenize(iter) } else if c == '#' { let _: String = iter.by_ref().take_while(|&c| c != '\n').collect(); + self.tokenize(iter) } else if c == '\"' { let mut token = String::new(); - while let Some(c) = iter.next() { + while let Some(c) = self.next_char(&mut iter) { match c { '"' => break, - '\n' => { - self.tokens.push_back(Err(TokenizeError::UnclosedString)); - return; - } + '\n' => return Err(TokenizeError::UnclosedString), '\\' => match iter.next() { Some('\\') => token.push('\\'), Some('n') => token.push('\n'), @@ -245,16 +316,16 @@ impl Tokenizer { Some('r') => token.push('\r'), Some('\"') => token.push('"'), Some(c) => token.push(c), - None => { - self.tokens.push_back(Err(TokenizeError::UnclosedString)); - return; - }, + None => return Err(TokenizeError::UnclosedString), } _ => token.push(c), } } - self.tokens.push_back(Ok(Token::Constant(Value::String(token)))); + self.tokens.push_back( + Token::new(TokenType::Constant( + Value::String(token.clone())), token, self.filename.clone(), self.line, self.column)); + self.tokenize(iter) } else if operators.keys().any(|x| x.starts_with(c)) { let mut token = String::from(c); @@ -281,49 +352,39 @@ impl Tokenizer { // if not, we need to make sure that the next characters // we grab *actually* match the last operator if let Some(op) = possible.get(token.as_str()) { - self.tokens.push_back(Ok(Token::Operator(match op { + let token = Token::new(TokenType::Operator(match op { // special handling for "dynamic" operators Op::FunctionDefine(n) => { - let count = match get_dot_count(&mut iter) { + let count = match self.get_dot_count(&mut iter) { Some(count) => count, - None => { - self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); - return; - } + None => return Err(TokenizeError::InvalidDynamicOperator(token)), }; Op::FunctionDefine(n + count) } Op::FunctionDeclare(n) => { - let count = match get_dot_count(&mut iter) { + let count = match self.get_dot_count(&mut iter) { Some(count) => count, - None => { - self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); - return; - } + None => return Err(TokenizeError::InvalidDynamicOperator(token)), }; Op::FunctionDeclare(n + count) } Op::LambdaDefine(n) => { - let count = match get_dot_count(&mut iter) { + let count = match self.get_dot_count(&mut iter) { Some(count) => count, - None => { - self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); - return; - } + None => return Err(TokenizeError::InvalidDynamicOperator(token)), }; Op::LambdaDefine(n + count) } op => op.clone(), - }))); + }), token, self.filename.clone(), self.line, self.column); + + self.tokens.push_back(token); break; } else { - let next = match iter.next_if(is_expected) { + let next = match self.next_char_if(&mut iter, is_expected) { Some(c) => c, - None => { - self.tokens.push_back(Err(TokenizeError::UnableToMatchToken(format!("{token}")))); - return; - } + None => return Err(TokenizeError::UnableToMatchToken(format!("{token}"))), }; token.push(next); @@ -331,45 +392,38 @@ impl Tokenizer { } 0 => unreachable!(), _ => { - let next = match iter.next_if(is_expected) { + let next = match self.next_char_if(&mut iter, is_expected) { Some(c) => c, None => { - // at this point, token must be in the hashmap possible, otherwise it wouldnt have any matches - self.tokens.push_back(Ok(Token::Operator(match possible.get(token.as_str()).unwrap() { + let token = Token::new(TokenType::Operator(match possible.get(token.as_str()).unwrap() { // special handling for "dynamic" operators Op::FunctionDefine(n) => { - let count = match get_dot_count(&mut iter) { + let count = match self.get_dot_count(&mut iter) { Some(count) => count, - None => { - self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); - return; - } + None => return Err(TokenizeError::InvalidDynamicOperator(token)), }; Op::FunctionDefine(n + count) } Op::FunctionDeclare(n) => { - let count = match get_dot_count(&mut iter) { + let count = match self.get_dot_count(&mut iter) { Some(count) => count, - None => { - self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); - return; - } + None => return Err(TokenizeError::InvalidDynamicOperator(token)), }; Op::FunctionDeclare(n + count) } Op::LambdaDefine(n) => { - let count = match get_dot_count(&mut iter) { + let count = match self.get_dot_count(&mut iter) { Some(count) => count, - None => { - self.tokens.push_back(Err(TokenizeError::InvalidDynamicOperator(token))); - return; - } + None => return Err(TokenizeError::InvalidDynamicOperator(token)), }; Op::LambdaDefine(n + count) } op => op.clone(), - }))); + }), token, self.filename.clone(), self.line, self.column); + + // at this point, token must be in the hashmap possible, otherwise it wouldn't have any matches + self.tokens.push_back(token); break; } }; @@ -383,27 +437,17 @@ impl Tokenizer { } else if c.is_whitespace() { self.tokenize(iter) } else { - self.tokens.push_back(Err(TokenizeError::InvalidCharacter(c))); - return; + return Err(TokenizeError::InvalidCharacter(c)); } } } -impl std::str::FromStr for Tokenizer> { - type Err = (); - - fn from_str(s: &str) -> Result { - let cursor = Cursor::new(s.to_string()); - Ok(Tokenizer::new(cursor)) - } -} - -impl std::iter::Iterator for Tokenizer { +impl Iterator for Tokenizer { type Item = Result; fn next(&mut self) -> Option { if let Some(token) = self.tokens.pop_front() { - return Some(token); + return Some(Ok(token)); } let mut input = String::new(); @@ -411,7 +455,15 @@ impl std::iter::Iterator for Tokenizer { match self.reader.read_line(&mut input) { Ok(0) => None, Ok(_n) => { - self.tokenize(input.chars().peekable()); + self.code.push_str(&input); + self.line += 1; + self.column = 0; + + match self.tokenize(input.chars().peekable()) { + Ok(()) => (), + Err(e) => return Some(Err(e)), + } + self.next() }, Err(e) => Some(Err(TokenizeError::IO(e))), @@ -421,7 +473,8 @@ impl std::iter::Iterator for Tokenizer { #[cfg(test)] mod tests { - use std::str::FromStr; + use io::Cursor; + use crate::parser::Parser; use super::*; @@ -429,7 +482,7 @@ mod tests { fn tokenizer() { let program = ": length ?. x [] -> Int ?? x + 1 length tail x 0 length [ 1 2 3 ]"; - let tokens: Vec = Tokenizer::from_str(program).unwrap().collect::>().unwrap(); + let tokens: Vec = Tokenizer::new(Cursor::new(program), "").collect::>().unwrap(); println!("{tokens:#?}"); } @@ -438,7 +491,7 @@ mod tests { fn a() { let program = ": length ?. x [] -> Int ?? x + 1 length tail x 0 length [ 1 2 3 ]"; - let mut tokenizer = Tokenizer::from_str(program).unwrap().peekable(); + let mut tokenizer = Tokenizer::new(Cursor::new(program), "").peekable(); let mut globals = HashMap::new(); let mut parser = Parser::new(&mut tokenizer, &mut globals);