// WIP THAT SHIT STILL WONKY AF #[derive(Debug, PartialEq)] pub enum Token { Identifier(String), Keyword(String), IntegerLiteral(i64), FloatLiteral(f64), Operator(String), Symbol(char), Whitespace, Comment(String), Unknown(char), EOF, } pub struct Lexer { input: Vec, position: usize, current_char: Option, } impl Lexer { pub fn new(input: &str) -> Self { let mut lexer = Lexer { input: input.chars().collect(), position: 0, current_char: None, }; lexer.current_char = if lexer.position < lexer.input.len() { Some(lexer.input[lexer.position]) } else { None }; lexer } fn advance(&mut self) { self.position += 1; self.current_char = if self.position < self.input.len() { Some(self.input[self.position]) } else { None }; } /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`]. pub fn get_tokens(&mut self) -> Vec { let mut tokens = Vec::new(); while let Some(c) = self.current_char { if c.is_whitespace() { self.consume_whitespace(); tokens.push(Token::Whitespace); } else if c.is_alphabetic() || c == '_' { tokens.push(self.consume_identifier_or_keyword()); } else if c.is_digit(10) { tokens.push(self.consume_number()); } else if c == '/' && self.peek() == Some('/') { tokens.push(self.consume_comment()); } else { tokens.push(self.consume_symbol()); } } tokens.push(Token::EOF); tokens } fn peek(&self) -> Option { if self.position + 1 < self.input.len() { Some(self.input[self.position + 1]) } else { None } } fn consume_whitespace(&mut self) { while let Some(c) = self.current_char { if !c.is_whitespace() { break; } self.advance(); } } fn consume_identifier_or_keyword(&mut self) -> Token { let mut identifier = String::new(); while let Some(c) = self.current_char { if c.is_alphanumeric() || c == '_' { identifier.push(c); self.advance(); } else { break; } } if is_keyword(&identifier) { Token::Keyword(identifier) } else { Token::Identifier(identifier) } } fn consume_number(&mut self) -> Token { let mut number = String::new(); let mut is_float = false; while let Some(c) = self.current_char { if c.is_digit(10) { number.push(c); self.advance(); } else if c == '.' { number.push(c); is_float = true; self.advance(); } else { break; } } if is_float { Token::FloatLiteral(number.parse().unwrap()) } else { Token::IntegerLiteral(number.parse().unwrap()) } } fn consume_comment(&mut self) -> Token { let mut comment = String::new(); while let Some(c) = self.current_char { if c == '\n' { break; } comment.push(c); self.advance(); } Token::Comment(comment) } fn consume_symbol(&mut self) -> Token { let symbol = self.current_char.unwrap(); self.advance(); Token::Symbol(symbol) } fn error(&self, message: &str) -> ! { panic!("Lexer error at position {}: {}", self.position, message); } } fn is_keyword(word: &str) -> bool { matches!( word, "void" | "int" | "float" | "bool" | "if" | "else" | "for" | "while" | "return" | "struct" | "uniform" | "varying" ) } #[cfg(test)] mod tests { use super::*; use log::info; fn init() { std::env::set_var("RUST_LOG", "INFO"); let _ = env_logger::builder().is_test(true).try_init(); } #[test] fn whitespace() { init(); let source_code = " \t\n"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Whitespace] Tokens: {:#?}", tokens); assert_eq!(tokens, vec![Token::Whitespace, Token::EOF]); } #[test] fn identifier() { init(); let source_code = "variableName"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Identifier] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::Identifier("variableName".to_string()), Token::EOF] ); } #[test] fn keyword() { init(); let source_code = "uniform"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Keyword] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::Keyword("uniform".to_string()), Token::EOF] ); } #[test] fn integer_literal() { init(); let source_code = "12345"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[IntegerLiteral] Tokens: {:#?}", tokens); assert_eq!(tokens, vec![Token::IntegerLiteral(12345), Token::EOF]); } #[test] fn float_literal() { init(); let source_code = "123.45"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[FloatLiteral] Tokens: {:#?}", tokens); assert_eq!(tokens, vec![Token::FloatLiteral(123.45), Token::EOF]); } #[test] fn test_single_line_comment() { init(); let source = "// This is a comment\n"; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Comment] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Comment("// This is a comment".to_string()), Token::Whitespace, Token::EOF, ] ); } // I hope that does it. Writing this test was pain. #[test] fn complex_source() { init(); let source = r#" uniform float time; void main() { gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); } "#; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Complex Source] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Whitespace, Token::Keyword("uniform".to_string()), Token::Whitespace, Token::Keyword("float".to_string()), Token::Whitespace, Token::Identifier("time".to_string()), Token::Symbol(';'), Token::Whitespace, Token::Keyword("void".to_string()), Token::Whitespace, Token::Identifier("main".to_string()), Token::Symbol('('), Token::Symbol(')'), Token::Whitespace, Token::Symbol('{'), Token::Whitespace, Token::Identifier("gl_FragColor".to_string()), Token::Whitespace, Token::Symbol('='), Token::Whitespace, Token::Identifier("vec4".to_string()), Token::Symbol('('), Token::FloatLiteral(1.0), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(0.5), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(0.2), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(1.0), Token::Symbol(')'), Token::Symbol(';'), Token::Whitespace, Token::Symbol('}'), Token::Whitespace, Token::EOF, ] ); } }