From dac68cfb109c15bcd859745ccad27bfb704b25d4 Mon Sep 17 00:00:00 2001 From: xqtc Date: Sun, 21 Jul 2024 20:40:47 +0200 Subject: [PATCH] [Refactor] Partially split up tokens into respective enums --- src/lex/handlers.rs | 82 ++++-------------- src/lex/{lex.rs => lexer.rs} | 48 +++++++++-- src/lex/mod.rs | 4 +- src/lib.rs | 120 +++++++++++++------------- src/tokens.rs | 158 +++++++++++++++++++++-------------- src/util.rs | 35 ++++---- 6 files changed, 227 insertions(+), 220 deletions(-) rename src/lex/{lex.rs => lexer.rs} (50%) diff --git a/src/lex/handlers.rs b/src/lex/handlers.rs index b69aa0e..e44426c 100644 --- a/src/lex/handlers.rs +++ b/src/lex/handlers.rs @@ -1,4 +1,5 @@ -impl crate::Lexer { +use crate::lex::lexer::Lexer; +impl Lexer { pub fn consume_whitespace(&mut self) { while let Some(c) = self.current_char { if !c.is_whitespace() { @@ -8,81 +9,26 @@ impl crate::Lexer { } } - pub fn consume_unknown(&mut self) -> crate::Token { - let unknown = self.current_char.unwrap(); - self.advance(); - crate::Token::Unknown(unknown) + pub fn consume_unknown(&mut self) -> crate::tokens::Token { + todo!() } - pub fn consume_identifier_or_keyword(&mut self) -> crate::Token { - let mut identifier = String::new(); - while let Some(c) = self.current_char { - if c.is_alphanumeric() || c == '_' { - identifier.push(c); - self.advance(); - } else { - break; - } - } - if crate::is_keyword(&identifier) { - crate::Token::Keyword(identifier) - } else { - crate::Token::Identifier(identifier) - } + pub fn consume_identifier_or_keyword(&mut self) -> crate::tokens::Token { + todo!() } - pub fn consume_number(&mut self) -> crate::Token { - let mut number = String::new(); - let mut is_float = false; - let mut is_swizzle = false; - - while let Some(c) = self.current_char { - if c.is_ascii_digit() { - number.push(c); - self.advance(); - } else if c == '.' && self.peek().map_or(false, |c| c.is_ascii_digit()) - || self.peek() == Some('f') - { - if number.is_empty() { - number.push('0'); - } - number.push(c); - is_float = true; - self.advance(); - } else if c.is_alphabetic() { - is_swizzle = true; - } else { - break; - } - } - - if is_float { - crate::Token::FloatLiteral(number.parse().unwrap()) - } else { - crate::Token::IntegerLiteral(number.parse().unwrap()) - } + pub fn consume_number(&mut self) -> crate::tokens::Token { + todo!() } - pub fn consume_comment(&mut self) -> crate::Token { - let mut comment = String::new(); - while let Some(c) = self.current_char { - if c == '\n' { - break; - } - comment.push(c); - self.advance(); - } - crate::Token::Comment(comment) + pub fn consume_comment(&mut self) -> crate::tokens::Token { + todo!() } - pub fn consume_symbol(&mut self) -> crate::Token { - let symbol = self.current_char.unwrap(); - self.advance(); - crate::Token::Symbol(symbol) + pub fn consume_symbol(&mut self) -> crate::tokens::Token { + todo!() } - pub fn consume_operator(&mut self) -> crate::Token { - let operator = self.current_char.unwrap(); - self.advance(); - crate::Token::Operator(operator.to_string()) + pub fn consume_operator(&mut self) -> crate::tokens::Token { + todo!() } } diff --git a/src/lex/lex.rs b/src/lex/lexer.rs similarity index 50% rename from src/lex/lex.rs rename to src/lex/lexer.rs index 009dc74..3d6e0cc 100644 --- a/src/lex/lex.rs +++ b/src/lex/lexer.rs @@ -10,6 +10,13 @@ pub struct Lexer { pub current_char: Option, } +#[macro_export] +macro_rules! lex { + ($source:expr) => {{ + $crate::lexer::Lexer::get_tokens(&mut $crate::Lexer::new($source)) + }}; +} + impl Lexer { pub fn new(input: &str) -> Self { let mut lexer = Lexer { @@ -24,7 +31,7 @@ impl Lexer { }; lexer } - fn advance(&mut self) { + pub fn advance(&mut self) { self.position += 1; self.current_char = if self.position < self.input.len() { Some(self.input[self.position]) @@ -33,7 +40,8 @@ impl Lexer { }; } - fn peek(&self) -> Option { + /// Peeks the next char in the source without incrementing `self.position`. + pub fn peek(&self) -> Option { if self.position + 1 < self.input.len() { Some(self.input[self.position + 1]) } else { @@ -57,11 +65,41 @@ impl Lexer { /// dbg!("{}", tokens); ///``` // We are using Arc<[Token]> as return type for cheaper cloning of the returned value - fn get_tokens(&mut self) -> Arc<[Token]> { + pub fn get_tokens(&mut self) -> Arc<[Token]> { let mut tokens: Vec = Vec::new(); + while let Some(c) = self.current_char { + if c.is_whitespace() { + self.consume_whitespace(); + tokens.push(Token::Whitespace); + } else if c.is_alphabetic() || c == '_' { + tokens.push(self.consume_identifier_or_keyword()); + } else if c.is_ascii_digit() { + tokens.push(self.consume_number()); + } else if c == '/' && self.peek() == Some('/') { + tokens.push(self.consume_comment()); + } else { + match c { + // TODO Implement operands like += + '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => { + tokens.push(self.consume_operator()); + } + + '{' | '}' | '(' | ')' | '#' | ',' | ';' => { + tokens.push(self.consume_symbol()); + } + + '.' => { + tokens.push(self.consume_number()); + } + + _ => { + tokens.push(self.consume_unknown()); + } + } + } + } + tokens.into() } - - fn match_token(&self, ) -> Token {} } diff --git a/src/lex/mod.rs b/src/lex/mod.rs index 44c1c82..8589472 100644 --- a/src/lex/mod.rs +++ b/src/lex/mod.rs @@ -1,2 +1,2 @@ -mod handlers; -mod lex; +pub mod handlers; +pub mod lexer; diff --git a/src/lib.rs b/src/lib.rs index 9064b1d..9a5cdff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,9 +22,10 @@ //! # WIP THAT SHIT STILL WONKY AF +#![allow(dead_code)] use std::sync::Arc; -mod lex; +pub mod lex; mod tokens; mod util; @@ -78,72 +79,65 @@ pub struct Lexer { /// let source = "some source"; /// let lexed = Lexer::new(source).get_tokens(); /// ``` -#[macro_export] -macro_rules! lex { - ($source:expr) => {{ - $crate::Lexer::get_tokens(&mut $crate::Lexer::new($source)) - }}; -} - - - // /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`]. - // /// # Example: - // /// ``` - // /// use glsl_lexer::*; - // /// let source = r#" - // /// #version 440 - // /// uniform float time; - // /// void main() { - // /// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); - // /// } - // /// "#; - // /// let mut lexer = glsl_lexer::Lexer::new(&source); - // /// let tokens = lexer.get_tokens(); - // /// dbg!("{}", tokens); - // ///``` - // // We are using Arc<[Token]> as return type for cheaper cloning of the returned value - // pub fn get_tokens(&mut self) -> Arc<[Token]> { - // let mut tokens = Vec::new(); - // while let Some(c) = self.current_char { - // if c.is_whitespace() { - // self.consume_whitespace(); - // tokens.push(Token::Whitespace); - // } else if c.is_alphabetic() || c == '_' { - // tokens.push(self.consume_identifier_or_keyword()); - // } else if c.is_ascii_digit() { - // tokens.push(self.consume_number()); - // } else if c == '/' && self.peek() == Some('/') { - // tokens.push(self.consume_comment()); - // } else { - // match c { - // // TODO Implement operands like += - // '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => { - // tokens.push(self.consume_operator()); - // } - // - // '{' | '}' | '(' | ')' | '#' | ',' | ';' => { - // tokens.push(self.consume_symbol()); - // } - // - // '.' => { - // tokens.push(self.consume_number()); - // } - // - // _ => { - // tokens.push(self.consume_unknown()); - // } - // } - // } - // } - // tokens.push(Token::EOF); - // let ret: Arc<[Token]> = tokens.into(); - // ret - // } - +// /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`]. +// /// # Example: +// /// ``` +// /// use glsl_lexer::*; +// /// let source = r#" +// /// #version 440 +// /// uniform float time; +// /// void main() { +// /// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0); +// /// } +// /// "#; +// /// let mut lexer = glsl_lexer::Lexer::new(&source); +// /// let tokens = lexer.get_tokens(); +// /// dbg!("{}", tokens); +// ///``` +// // We are using Arc<[Token]> as return type for cheaper cloning of the returned value +// pub fn get_tokens(&mut self) -> Arc<[Token]> { +// let mut tokens = Vec::new(); +// while let Some(c) = self.current_char { +// if c.is_whitespace() { +// self.consume_whitespace(); +// tokens.push(Token::Whitespace); +// } else if c.is_alphabetic() || c == '_' { +// tokens.push(self.consume_identifier_or_keyword()); +// } else if c.is_ascii_digit() { +// tokens.push(self.consume_number()); +// } else if c == '/' && self.peek() == Some('/') { +// tokens.push(self.consume_comment()); +// } else { +// match c { +// // TODO Implement operands like += +// '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => { +// tokens.push(self.consume_operator()); +// } +// +// '{' | '}' | '(' | ')' | '#' | ',' | ';' => { +// tokens.push(self.consume_symbol()); +// } +// +// '.' => { +// tokens.push(self.consume_number()); +// } +// +// _ => { +// tokens.push(self.consume_unknown()); +// } +// } +// } +// } +// tokens.push(Token::EOF); +// let ret: Arc<[Token]> = tokens.into(); +// ret +// } #[cfg(test)] mod tests { + use super::lex::lexer::Lexer; + use super::tokens::Token; use super::*; use log::info; diff --git a/src/tokens.rs b/src/tokens.rs index 2d8b1d4..66b607b 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -2,41 +2,45 @@ #![allow(non_snake_case)] #![allow(nonstandard_style)] +// https://www.khronos.org/opengl/wiki/Data_Type_(GLSL) + #[derive(Debug, Clone, PartialEq)] pub enum Token { + EOF, + Whitespace, CONST, BOOL, FLOAT, INT, UINT, DOUBLE, - MAT2, - MAT3, - MAT4, - MAT2X2, - MAT2X3, - MAT2X4, - MAT3X2, - MAT3X3, - MAT3X4, - MAT4X2, - MAT4X3, - MAT4X4, - DVEC2, - DVEC3, - DVEC4, - DMAT2, - DMAT3, - DMAT4, - DMAT2X2, - DMAT2X3, - DMAT2X4, - DMAT3X2, - DMAT3X3, - DMAT3X4, - DMAT4X2, - DMAT4X3, - DMAT4X4, + // MAT2, + // MAT3, + // MAT4, + // MAT2X2, + // MAT2X3, + // MAT2X4, + // MAT3X2, + // MAT3X3, + // MAT3X4, + // MAT4X2, + // MAT4X3, + // MAT4X4, + // DVEC2, + // DVEC3, + // DVEC4, + // DMAT2, + // DMAT3, + // DMAT4, + // DMAT2X2, + // DMAT2X3, + // DMAT2X4, + // DMAT3X2, + // DMAT3X3, + // DMAT3X4, + // DMAT4X2, + // DMAT4X3, + // DMAT4X4, CENTROID, IN, OUT, @@ -57,39 +61,9 @@ pub enum Token { LAYOUT, ATOMIC_UINT, SAMPLER(Sampler), - IMAGE2D, - IIMAGE2D, - UIMAGE2D, - IMAGE3D, - IIMAGE3D, - UIMAGE3D, - IMAGECUBE, - IIMAGECUBE, - UIMAGECUBE, - IMAGEBUFFER, - IIMAGEBUFFER, - UIMAGEBUFFER, - IMAGE2DARRAY, - IIMAGE2DARRAY, - UIMAGE2DARRAY, - IMAGECUBEARRAY, - IIMAGECUBEARRAY, - UIMAGECUBEARRAY, - IMAGE1D, - IIMAGE1D, - UIMAGE1D, - IMAGE1DARRAY, - IIMAGE1DARRAY, - UIMAGE1DARRAY, - IMAGE2DRECT, - IIMAGE2DRECT, - UIMAGE2DRECT, - IMAGE2DMS, - IIMAGE2DMS, - UIMAGE2DMS, - IMAGE2DMSARRAY, - IIMAGE2DMSARRAY, - UIMAGE2DMSARRAY, + VECTOR(Vector), + IMAGE(Image), + MATERIAL(Material), STRUCT, VOID, WHILE, @@ -167,7 +141,7 @@ pub enum Token { } #[derive(Debug, Clone, PartialEq)] -enum Sampler { +pub enum Sampler { SAMPLER2D, SAMPLER3D, SAMPLERCUBE, @@ -210,11 +184,11 @@ enum Sampler { USAMPLER2DMSARRAY, } -#[derive(Debug, Clone, PartialEq)] -enum Primitive {} +// #[derive(Debug, Clone, PartialEq)] +// enum Primitive {} #[derive(Debug, Clone, PartialEq)] -enum Material { +pub enum Material { MAT2, MAT3, MAT4, @@ -227,10 +201,60 @@ enum Material { MAT4X2, MAT4X3, MAT4X4, + // D = double precision + DMAT2, + DMAT3, + DMAT4, + DMAT2X2, + DMAT2X3, + DMAT2X4, + DMAT3X2, + DMAT3X3, + DMAT3X4, + DMAT4X2, + DMAT4X3, + DMAT4X4, } #[derive(Debug, Clone, PartialEq)] -enum Vector { +pub enum Image { + IMAGE2D, + IIMAGE2D, + UIMAGE2D, + IMAGE3D, + IIMAGE3D, + UIMAGE3D, + IMAGECUBE, + IIMAGECUBE, + UIMAGECUBE, + IMAGEBUFFER, + IIMAGEBUFFER, + UIMAGEBUFFER, + IMAGE2DARRAY, + IIMAGE2DARRAY, + UIMAGE2DARRAY, + IMAGECUBEARRAY, + IIMAGECUBEARRAY, + UIMAGECUBEARRAY, + IMAGE1D, + IIMAGE1D, + UIMAGE1D, + IMAGE1DARRAY, + IIMAGE1DARRAY, + UIMAGE1DARRAY, + IMAGE2DRECT, + IIMAGE2DRECT, + UIMAGE2DRECT, + IMAGE2DMS, + IIMAGE2DMS, + UIMAGE2DMS, + IMAGE2DMSARRAY, + IIMAGE2DMSARRAY, + UIMAGE2DMSARRAY, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Vector { BVEC2, BVEC3, BVEC4, @@ -243,4 +267,8 @@ enum Vector { VEC2, VEC3, VEC4, + // D stands for double precision + DVEC2, + DVEC3, + DVEC4, } diff --git a/src/util.rs b/src/util.rs index c1e8f4c..2e24a78 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,18 +1,19 @@ - -fn is_keyword(word: &str) -> bool { - matches!( - word, - "void" - | "int" - | "float" - | "bool" - | "if" - | "else" - | "for" - | "while" - | "return" - | "struct" - | "uniform" - | "varying" - ) +impl crate::lex::lexer::Lexer { + pub fn is_keyword(word: &str) -> bool { + matches!( + word, + "void" + | "int" + | "float" + | "bool" + | "if" + | "else" + | "for" + | "while" + | "return" + | "struct" + | "uniform" + | "varying" + ) + } }