//! A simple lexer for GLSL. //! //! Adheres to the GLSL 440. Read the spec //! [here](https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.40.pdf). //! ## Example //! ``` //! use glsl_lexer::*; //! //! fn main() { //! let source = r#" //! #version 440 //! uniform float time; //! void main() { //! gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); //! } //! "#; //! let mut lexer = glsl_lexer::Lexer::new(&source); //! let tokens = lexer.get_tokens(); //! dbg!("{}", tokens); //! } //! ``` //! # WIP THAT SHIT STILL WONKY AF #![allow(dead_code)] pub mod lex; mod tokens; mod util; #[derive(Debug, PartialEq, Clone)] pub enum Token { /// Something like `float` Identifier(String), /// Something like `uniform` Keyword(String), /// Something like `13` IntegerLiteral(i64), /// Something like `3.5` or `.5` FloatLiteral(f64), /// Something like `+` Operator(String), /// Something like `{` Symbol(char), /// Should be self-explanatory Whitespace, /// Something like `// uwu` Comment(String), /// Shrouded in mystery Unknown(char), /// End Of File EOF, } pub struct Lexer { /// GLSL source pub input: Vec, /// Position in source pub position: usize, /// [`char`] under position pub current_char: Option, } /// Instantiates Lexer and retrieves Tokens of given source. /// Returns Arc<[Token]> /// # Example: /// ``` /// let lexed: Arc<[Token]> = lex!(r#" /// #version 440 /// uniform float time; /// void main() { /// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); /// } /// "#); /// ``` /// Equivalent to: /// ``` /// let source = "some source"; /// let lexed = Lexer::new(source).get_tokens(); /// ``` // /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`]. // /// # Example: // /// ``` // /// use glsl_lexer::*; // /// let source = r#" // /// #version 440 // /// uniform float time; // /// void main() { // /// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); // /// } // /// "#; // /// let mut lexer = glsl_lexer::Lexer::new(&source); // /// let tokens = lexer.get_tokens(); // /// dbg!("{}", tokens); // ///``` // // We are using Arc<[Token]> as return type for cheaper cloning of the returned value // pub fn get_tokens(&mut self) -> Arc<[Token]> { // let mut tokens = Vec::new(); // while let Some(c) = self.current_char { // if c.is_whitespace() { // self.consume_whitespace(); // tokens.push(Token::Whitespace); // } else if c.is_alphabetic() || c == '_' { // tokens.push(self.consume_identifier_or_keyword()); // } else if c.is_ascii_digit() { // tokens.push(self.consume_number()); // } else if c == '/' && self.peek() == Some('/') { // tokens.push(self.consume_comment()); // } else { // match c { // // TODO Implement operands like += // '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => { // tokens.push(self.consume_operator()); // } // // '{' | '}' | '(' | ')' | '#' | ',' | ';' => { // tokens.push(self.consume_symbol()); // } // // '.' => { // tokens.push(self.consume_number()); // } // // _ => { // tokens.push(self.consume_unknown()); // } // } // } // } // tokens.push(Token::EOF); // let ret: Arc<[Token]> = tokens.into(); // ret // } #[cfg(test)] mod tests { use super::lex::lexer::Lexer; use super::tokens::Token; use super::*; use log::info; fn init() { std::env::set_var("RUST_LOG", "INFO"); let _ = env_logger::builder().is_test(true).try_init(); } #[test] fn whitespace() { init(); let source_code = " \t\n"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Whitespace] Tokens: {:#?}", tokens); assert_eq!(tokens, vec![Token::Whitespace, Token::EOF].into()); } #[test] fn identifier() { init(); let source_code = "variableName"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Identifier] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::Identifier("variableName".to_string()), Token::EOF].into() ); } #[test] fn keyword() { init(); let source_code = "uniform"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Keyword] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::Keyword("uniform".to_string()), Token::EOF].into() ); } #[test] fn integer_literal() { init(); let source_code = "12345"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[IntegerLiteral] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::IntegerLiteral(12345), Token::EOF].into() ); } #[test] fn float_literal() { init(); let source_code = "123.4504"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[FloatLiteral] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![Token::FloatLiteral(123.4504), Token::EOF].into() ); } #[test] fn float_shorthand() { init(); let source_code = ".4504"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[FloatLiteral Shorthand] Tokens: {:#?}", tokens); assert_eq!(tokens, vec![Token::FloatLiteral(0.4504), Token::EOF].into()); } #[test] fn swizzling() { init(); let source_code = "abcd.xyz"; let mut lexer = Lexer::new(source_code); let tokens = lexer.get_tokens(); info!("[Swizzling] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Identifier("abcd".to_string()), Token::Symbol('.'), Token::Identifier("xyz".to_string()), Token::EOF ] .into() ); } #[test] fn test_operator() { init(); let source = "+-*/%&|^!=<>?"; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Operator] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Operator("+".to_string()), Token::Operator("-".to_string()), Token::Operator("*".to_string()), Token::Operator("/".to_string()), Token::Operator("%".to_string()), Token::Operator("&".to_string()), Token::Operator("|".to_string()), Token::Operator("^".to_string()), Token::Operator("!".to_string()), Token::Operator("=".to_string()), Token::Operator("<".to_string()), Token::Operator(">".to_string()), Token::Operator("?".to_string()), Token::EOF, ] .into() ); } #[test] fn test_single_line_comment() { init(); let source = "// This is a comment\n"; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Comment] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Comment("// This is a comment".to_string()), Token::Whitespace, Token::EOF, ] .into() ); } // I hope that does it. Writing this test was pain. #[test] fn complex_source() { init(); let source = r#" uniform float time; void main() { gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); } "#; let mut lexer = Lexer::new(source); let tokens = lexer.get_tokens(); info!("[Complex Source] Tokens: {:#?}", tokens); assert_eq!( tokens, vec![ Token::Whitespace, Token::Keyword("uniform".to_string()), Token::Whitespace, Token::Keyword("float".to_string()), Token::Whitespace, Token::Identifier("time".to_string()), Token::Symbol(';'), Token::Whitespace, Token::Keyword("void".to_string()), Token::Whitespace, Token::Identifier("main".to_string()), Token::Symbol('('), Token::Symbol(')'), Token::Whitespace, Token::Symbol('{'), Token::Whitespace, Token::Identifier("gl_FragColor".to_string()), Token::Whitespace, Token::Operator('='.to_string()), Token::Whitespace, Token::Identifier("vec4".to_string()), Token::Symbol('('), Token::FloatLiteral(1.0), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(0.5), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(0.2), Token::Symbol(','), Token::Whitespace, Token::FloatLiteral(1.0), Token::Symbol(')'), Token::Symbol(';'), Token::Whitespace, Token::Symbol('}'), Token::Whitespace, Token::EOF, ] .into() ); } }