//! A simple lexer for GLSL. //! //! Adheres to the GLSL 440. Read the spec //! [here](https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.40.pdf). //! ## Example //! ``` //! use glsl_lexer::*; //! //! fn main() { //! let source = r#" //! #version 440 //! uniform float time; //! void main() { //! gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); //! } //! "#; //! let mut lexer = glsl_lexer::Lexer::new(&source); //! let tokens = lexer.get_tokens(); //! dbg!("{}", tokens); //! } //! ``` //! # WIP THAT SHIT STILL WONKY AF use std::sync::Arc; mod handlers; #[derive(Debug, PartialEq, Clone)] pub enum Token { /// Something like `float` Identifier(String), /// Something like `uniform` Keyword(String), /// Something like `13` IntegerLiteral(i64), /// Something like `3.5` or `.5` FloatLiteral(f64), /// Something like `+` Operator(String), /// Something like `{` Symbol(char), /// Should be self-explanatory Whitespace, /// Something like `// uwu` Comment(String), /// Shrouded in mystery Unknown(char), /// End Of File EOF, } pub struct Lexer { /// GLSL source pub input: Vec, /// Position in source pub position: usize, /// [`char`] under position pub current_char: Option, } /// Instantiates Lexer and retrieves Tokens of given source. /// Returns Arc<[Token]> /// # Example: /// ``` /// let lexed: Arc<[Token]> = lex!(r#" /// #version 440 /// uniform float time; /// void main() { /// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); /// } /// "#); /// ``` /// Equivalent to: /// ``` /// let source = "some source"; /// let lexed = Lexer::new(source).get_tokens(); /// ``` #[macro_export] macro_rules! lex { ($source:expr) => {{ $crate::Lexer::get_tokens(&mut $crate::Lexer::new($source)) }}; } impl Lexer { /// Instantiates the [`Lexer`] pub fn new(input: &str) -> Self { let mut lexer = Lexer { input: input.chars().collect(), position: 0, current_char: None, }; lexer.current_char = if lexer.position < lexer.input.len() { Some(lexer.input[lexer.position]) } else { None }; lexer } fn advance(&mut self) { self.position += 1; self.current_char = if self.position < self.input.len() { Some(self.input[self.position]) } else { None }; } /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`]. /// # Example: /// ``` /// use glsl_lexer::*; /// let source = r#" /// #version 440 /// uniform float time; /// void main() { /// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); /// } /// "#; /// let mut lexer = glsl_lexer::Lexer::new(&source); /// let tokens = lexer.get_tokens(); /// dbg!("{}", tokens); ///``` // We are using Arc<[Token]> as return type for cheaper cloning of the returned value pub fn get_tokens(&mut self) -> Arc<[Token]> { let mut tokens = Vec::new(); while let Some(c) = self.current_char { if c.is_whitespace() { self.consume_whitespace(); tokens.push(Token::Whitespace); } else if c.is_alphabetic() || c == '_' { tokens.push(self.consume_identifier_or_keyword()); } else if c.is_ascii_digit() { tokens.push(self.consume_number()); } else if c == '/' && self.peek() == Some('/') { tokens.push(self.consume_comment()); } else { match c { // TODO Implement operands like += '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => { tokens.push(self.consume_operator()); } '{' | '}' | '(' | ')' | '#' | ',' | ';' => { tokens.push(self.consume_symbol()); } '.' => { tokens.push(self.consume_number()); } _ => { tokens.push(self.consume_unknown()); } } } } tokens.push(Token::EOF); let ret: Arc<[Token]> = tokens.into(); ret } fn peek(&self) -> Option { if self.position + 1 < self.input.len() { Some(self.input[self.position + 1]) } else { None } } // fn error(&self, message: &str) -> ! { // panic!("Lexer error at position {}: {}", self.position, message); // } } fn is_keyword(word: &str) -> bool { matches!( word, "void" | "int" | "float" | "bool" | "if" | "else" | "for" | "while" | "return" | "struct" | "uniform" | "varying" ) } #[cfg(test)] mod tests { use super::*; use log::info; fn init() { std::env::set_var("RUST_LOG", "INFO"); let _ = env_logger::builder().is_test(true).try_init(); } #[test] fn whitespace() { init(); let source_code = " \t\n"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Whitespace] Tokens: {:#?}", tokens); assert_eq!(tokens, vec![Token::Whitespace, Token::EOF].into()); } #[test] fn identifier() { init(); let source_code = "variableName"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Identifier] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::Identifier("variableName".to_string()), Token::EOF].into() ); } #[test] fn keyword() { init(); let source_code = "uniform"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Keyword] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::Keyword("uniform".to_string()), Token::EOF].into() ); } #[test] fn integer_literal() { init(); let source_code = "12345"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[IntegerLiteral] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::IntegerLiteral(12345), Token::EOF].into() ); } #[test] fn float_literal() { init(); let source_code = "123.4504"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[FloatLiteral] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::FloatLiteral(123.4504), Token::EOF].into() ); } #[test] fn float_shorthand() { init(); let source_code = ".4504"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[FloatLiteral Shorthand] Tokens: {:#?}", tokens); assert_eq!(tokens, vec![Token::FloatLiteral(0.4504), Token::EOF].into()); } #[test] fn swizzling() { init(); let source_code = "abcd.xyz"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Swizzling] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Identifier("abcd".to_string()), Token::Symbol('.'), Token::Identifier("xyz".to_string()), Token::EOF ] .into() ); } #[test] fn test_operator() { init(); let source = "+-*/%&|^!=<>?"; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Operator] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Operator("+".to_string()), Token::Operator("-".to_string()), Token::Operator("*".to_string()), Token::Operator("/".to_string()), Token::Operator("%".to_string()), Token::Operator("&".to_string()), Token::Operator("|".to_string()), Token::Operator("^".to_string()), Token::Operator("!".to_string()), Token::Operator("=".to_string()), Token::Operator("<".to_string()), Token::Operator(">".to_string()), Token::Operator("?".to_string()), Token::EOF, ] .into() ); } #[test] fn test_single_line_comment() { init(); let source = "// This is a comment\n"; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Comment] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Comment("// This is a comment".to_string()), Token::Whitespace, Token::EOF, ] .into() ); } // I hope that does it. Writing this test was pain. #[test] fn complex_source() { init(); let source = r#" uniform float time; void main() { gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); } "#; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Complex Source] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Whitespace, Token::Keyword("uniform".to_string()), Token::Whitespace, Token::Keyword("float".to_string()), Token::Whitespace, Token::Identifier("time".to_string()), Token::Symbol(';'), Token::Whitespace, Token::Keyword("void".to_string()), Token::Whitespace, Token::Identifier("main".to_string()), Token::Symbol('('), Token::Symbol(')'), Token::Whitespace, Token::Symbol('{'), Token::Whitespace, Token::Identifier("gl_FragColor".to_string()), Token::Whitespace, Token::Operator('='.to_string()), Token::Whitespace, Token::Identifier("vec4".to_string()), Token::Symbol('('), Token::FloatLiteral(1.0), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(0.5), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(0.2), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(1.0), Token::Symbol(')'), Token::Symbol(';'), Token::Whitespace, Token::Symbol('}'), Token::Whitespace, Token::EOF, ] .into() ); } }