//! A simple lexer for GLSL. //! //! Adheres to the GLSL 440. Read the spec //! [here](https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.40.pdf). //! ## Example //! ``` //! use glsl_lexer::*; //! //! fn main() { //! let source = r#" //! #version 440 //! uniform float time; //! void main() { //! gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); //! } //! "#; //! let mut lexer = glsl_lexer::Lexer::new(&source); //! let tokens = lexer.get_tokens(); //! dbg!("{}", tokens); //! } //! ``` //! # WIP THAT SHIT STILL WONKY AF #[derive(Debug, PartialEq)] pub enum Token { /// Something like `float` Identifier(String), /// Something like `uniform` Keyword(String), /// Something like `13` IntegerLiteral(i64), /// Something like `3.5` or `.5` FloatLiteral(f64), /// Something like `+` Operator(String), /// Something like `{` Symbol(char), /// Should be self-explanatory Whitespace, /// Something like `// uwu` Comment(String), /// Shrouded in mystery Unknown(char), /// End Of File EOF, } pub struct Lexer { /// GLSL source pub input: Vec, /// Position in source pub position: usize, /// [`char`] under position pub current_char: Option, } impl Lexer { /// Instantiates the [`Lexer`] pub fn new(input: &str) -> Self { let mut lexer = Lexer { input: input.chars().collect(), position: 0, current_char: None, }; lexer.current_char = if lexer.position < lexer.input.len() { Some(lexer.input[lexer.position]) } else { None }; lexer } fn advance(&mut self) { self.position += 1; self.current_char = if self.position < self.input.len() { Some(self.input[self.position]) } else { None }; } /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`]. pub fn get_tokens(&mut self) -> Vec { let mut tokens = Vec::new(); while let Some(c) = self.current_char { if c.is_whitespace() { self.consume_whitespace(); tokens.push(Token::Whitespace); } else if c.is_alphabetic() || c == '_' { tokens.push(self.consume_identifier_or_keyword()); } else if c.is_digit(10) { tokens.push(self.consume_number()); } else if c == '/' && self.peek() == Some('/') { tokens.push(self.consume_comment()); } else { match c { // TODO Implement operands like += '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => { tokens.push(self.consume_operator()); } '{' | '}' | '(' | ')' | '#' | ',' | ';' => { tokens.push(self.consume_symbol()); } _ => { tokens.push(self.consume_unknown()); } } } } tokens.push(Token::EOF); tokens } fn peek(&self) -> Option { if self.position + 1 < self.input.len() { Some(self.input[self.position + 1]) } else { None } } fn consume_whitespace(&mut self) { while let Some(c) = self.current_char { if !c.is_whitespace() { break; } self.advance(); } } fn consume_unknown(&mut self) -> Token { let unknown = self.current_char.unwrap(); self.advance(); Token::Unknown(unknown) } fn consume_identifier_or_keyword(&mut self) -> Token { let mut identifier = String::new(); while let Some(c) = self.current_char { if c.is_alphanumeric() || c == '_' { identifier.push(c); self.advance(); } else { break; } } if is_keyword(&identifier) { Token::Keyword(identifier) } else { Token::Identifier(identifier) } } fn consume_number(&mut self) -> Token { let mut number = String::new(); let mut is_float = false; while let Some(c) = self.current_char { if c.is_digit(10) { number.push(c); self.advance(); } else if c == '.' { number.push(c); is_float = true; self.advance(); } else { break; } } if is_float { Token::FloatLiteral(number.parse().unwrap()) } else { Token::IntegerLiteral(number.parse().unwrap()) } } fn consume_comment(&mut self) -> Token { let mut comment = String::new(); while let Some(c) = self.current_char { if c == '\n' { break; } comment.push(c); self.advance(); } Token::Comment(comment) } fn consume_symbol(&mut self) -> Token { let symbol = self.current_char.unwrap(); self.advance(); Token::Symbol(symbol) } fn consume_operator(&mut self) -> Token { let operator = self.current_char.unwrap(); self.advance(); Token::Operator(operator.to_string()) } fn error(&self, message: &str) -> ! { panic!("Lexer error at position {}: {}", self.position, message); } } fn is_keyword(word: &str) -> bool { matches!( word, "void" | "int" | "float" | "bool" | "if" | "else" | "for" | "while" | "return" | "struct" | "uniform" | "varying" ) } #[cfg(test)] mod tests { use super::*; use log::info; fn init() { std::env::set_var("RUST_LOG", "INFO"); let _ = env_logger::builder().is_test(true).try_init(); } #[test] fn whitespace() { init(); let source_code = " \t\n"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Whitespace] Tokens: {:#?}", tokens); assert_eq!(tokens, vec![Token::Whitespace, Token::EOF]); } #[test] fn identifier() { init(); let source_code = "variableName"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Identifier] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::Identifier("variableName".to_string()), Token::EOF] ); } #[test] fn keyword() { init(); let source_code = "uniform"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Keyword] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::Keyword("uniform".to_string()), Token::EOF] ); } #[test] fn integer_literal() { init(); let source_code = "12345"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[IntegerLiteral] Tokens: {:#?}", tokens); assert_eq!(tokens, vec![Token::IntegerLiteral(12345), Token::EOF]); } #[test] fn float_literal() { init(); let source_code = "123.45"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[FloatLiteral] Tokens: {:#?}", tokens); assert_eq!(tokens, vec![Token::FloatLiteral(123.45), Token::EOF]); } #[test] fn test_operator() { init(); let source = "+-*/%&|^!=<>?"; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Operator] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Operator("+".to_string()), Token::Operator("-".to_string()), Token::Operator("*".to_string()), Token::Operator("/".to_string()), Token::Operator("%".to_string()), Token::Operator("&".to_string()), Token::Operator("|".to_string()), Token::Operator("^".to_string()), Token::Operator("!".to_string()), Token::Operator("=".to_string()), Token::Operator("<".to_string()), Token::Operator(">".to_string()), Token::Operator("?".to_string()), Token::EOF, ] ); } #[test] fn test_single_line_comment() { init(); let source = "// This is a comment\n"; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Comment] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Comment("// This is a comment".to_string()), Token::Whitespace, Token::EOF, ] ); } // I hope that does it. Writing this test was pain. #[test] fn complex_source() { init(); let source = r#" uniform float time; void main() { gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); } "#; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Complex Source] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Whitespace, Token::Keyword("uniform".to_string()), Token::Whitespace, Token::Keyword("float".to_string()), Token::Whitespace, Token::Identifier("time".to_string()), Token::Symbol(';'), Token::Whitespace, Token::Keyword("void".to_string()), Token::Whitespace, Token::Identifier("main".to_string()), Token::Symbol('('), Token::Symbol(')'), Token::Whitespace, Token::Symbol('{'), Token::Whitespace, Token::Identifier("gl_FragColor".to_string()), Token::Whitespace, Token::Operator('='.to_string()), Token::Whitespace, Token::Identifier("vec4".to_string()), Token::Symbol('('), Token::FloatLiteral(1.0), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(0.5), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(0.2), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(1.0), Token::Symbol(')'), Token::Symbol(';'), Token::Whitespace, Token::Symbol('}'), Token::Whitespace, Token::EOF, ] ); } }