[Refactor] Partially split up tokens into respective enums

2024-07-21 20:40:47 +02:00 · 2024-07-21 20:40:47 +02:00 · dac68cfb10
parent b3075d7d84
commit dac68cfb10
6 changed files with 227 additions and 220 deletions
--- a/src/lex/handlers.rs
+++ b/src/lex/handlers.rs
@ -1,4 +1,5 @@
-impl crate::Lexer {
+use crate::lex::lexer::Lexer;
+impl Lexer {
    pub fn consume_whitespace(&mut self) {
        while let Some(c) = self.current_char {
            if !c.is_whitespace() {
@ -8,81 +9,26 @@ impl crate::Lexer {
        }
    }

-    pub fn consume_unknown(&mut self) -> crate::Token {
-        let unknown = self.current_char.unwrap();
-        self.advance();
-        crate::Token::Unknown(unknown)
+    pub fn consume_unknown(&mut self) -> crate::tokens::Token {
+        todo!()
    }

-    pub fn consume_identifier_or_keyword(&mut self) -> crate::Token {
-        let mut identifier = String::new();
-        while let Some(c) = self.current_char {
-            if c.is_alphanumeric() || c == '_' {
-                identifier.push(c);
-                self.advance();
-            } else {
-                break;
-            }
-        }
-        if crate::is_keyword(&identifier) {
-            crate::Token::Keyword(identifier)
-        } else {
-            crate::Token::Identifier(identifier)
-        }
+    pub fn consume_identifier_or_keyword(&mut self) -> crate::tokens::Token {
+        todo!()
    }

-    pub fn consume_number(&mut self) -> crate::Token {
-        let mut number = String::new();
-        let mut is_float = false;
-        let mut is_swizzle = false;
-
-        while let Some(c) = self.current_char {
-            if c.is_ascii_digit() {
-                number.push(c);
-                self.advance();
-            } else if c == '.' && self.peek().map_or(false, |c| c.is_ascii_digit())
-                || self.peek() == Some('f')
-            {
-                if number.is_empty() {
-                    number.push('0');
-                }
-                number.push(c);
-                is_float = true;
-                self.advance();
-            } else if c.is_alphabetic() {
-                is_swizzle = true;
-            } else {
-                break;
-            }
-        }
-
-        if is_float {
-            crate::Token::FloatLiteral(number.parse().unwrap())
-        } else {
-            crate::Token::IntegerLiteral(number.parse().unwrap())
-        }
+    pub fn consume_number(&mut self) -> crate::tokens::Token {
+        todo!()
    }

-    pub fn consume_comment(&mut self) -> crate::Token {
-        let mut comment = String::new();
-        while let Some(c) = self.current_char {
-            if c == '\n' {
-                break;
-            }
-            comment.push(c);
-            self.advance();
-        }
-        crate::Token::Comment(comment)
+    pub fn consume_comment(&mut self) -> crate::tokens::Token {
+        todo!()
    }

-    pub fn consume_symbol(&mut self) -> crate::Token {
-        let symbol = self.current_char.unwrap();
-        self.advance();
-        crate::Token::Symbol(symbol)
+    pub fn consume_symbol(&mut self) -> crate::tokens::Token {
+        todo!()
    }
-    pub fn consume_operator(&mut self) -> crate::Token {
-        let operator = self.current_char.unwrap();
-        self.advance();
-        crate::Token::Operator(operator.to_string())
+    pub fn consume_operator(&mut self) -> crate::tokens::Token {
+        todo!()
    }
 }
--- a/src/lex/lexer.rs
+++ b/src/lex/lexer.rs
@ -10,6 +10,13 @@ pub struct Lexer {
    pub current_char: Option<char>,
 }

+#[macro_export]
+macro_rules! lex {
+    ($source:expr) => {{
+        $crate::lexer::Lexer::get_tokens(&mut $crate::Lexer::new($source))
+    }};
+}
+
 impl Lexer {
    pub fn new(input: &str) -> Self {
        let mut lexer = Lexer {
@ -24,7 +31,7 @@ impl Lexer {
        };
        lexer
    }
-    fn advance(&mut self) {
+    pub fn advance(&mut self) {
        self.position += 1;
        self.current_char = if self.position < self.input.len() {
            Some(self.input[self.position])
@ -33,7 +40,8 @@ impl Lexer {
        };
    }

-    fn peek(&self) -> Option<char> {
+    /// Peeks the next char in the source without incrementing `self.position`.
+    pub fn peek(&self) -> Option<char> {
        if self.position + 1 < self.input.len() {
            Some(self.input[self.position + 1])
        } else {
@ -57,11 +65,41 @@ impl Lexer {
    /// dbg!("{}", tokens);
    ///```
    // We are using Arc<[Token]> as return type for cheaper cloning of the returned value
-    fn get_tokens(&mut self) -> Arc<[Token]> {
+    pub fn get_tokens(&mut self) -> Arc<[Token]> {
        let mut tokens: Vec<Token> = Vec::new();

+        while let Some(c) = self.current_char {
+            if c.is_whitespace() {
+                self.consume_whitespace();
+                tokens.push(Token::Whitespace);
+            } else if c.is_alphabetic() || c == '_' {
+                tokens.push(self.consume_identifier_or_keyword());
+            } else if c.is_ascii_digit() {
+                tokens.push(self.consume_number());
+            } else if c == '/' && self.peek() == Some('/') {
+                tokens.push(self.consume_comment());
+            } else {
+                match c {
+                    // TODO Implement operands like +=
+                    '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
+                        tokens.push(self.consume_operator());
+                    }
+
+                    '{' | '}' | '(' | ')' | '#' | ',' | ';' => {
+                        tokens.push(self.consume_symbol());
+                    }
+
+                    '.' => {
+                        tokens.push(self.consume_number());
+                    }
+
+                    _ => {
+                        tokens.push(self.consume_unknown());
+                    }
+                }
+            }
+        }
+
        tokens.into()
    }
-
-    fn match_token(&self, ) -> Token {}
 }
--- a/src/lex/mod.rs
+++ b/src/lex/mod.rs
@ -1,2 +1,2 @@
-mod handlers;
-mod lex;
+pub mod handlers;
+pub mod lexer;
--- a/src/lib.rs
+++ b/src/lib.rs
@ -22,9 +22,10 @@

 //! # WIP THAT SHIT STILL WONKY AF

+#![allow(dead_code)]
 use std::sync::Arc;

-mod lex;
+pub mod lex;
 mod tokens;
 mod util;

@ -78,72 +79,65 @@ pub struct Lexer {
 /// let source  = "some source";
 /// let lexed = Lexer::new(source).get_tokens();
 /// ```
-#[macro_export]
-macro_rules! lex {
-    ($source:expr) => {{
-        $crate::Lexer::get_tokens(&mut $crate::Lexer::new($source))
-    }};
-}
-
-
-    // /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`].
-    // /// # Example:
-    // /// ```
-    // /// use glsl_lexer::*;
-    // /// let source = r#"
-    // ///     #version 440
-    // ///     uniform float time;
-    // ///     void main() {
-    // ///         gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
-    // ///     }
-    // /// "#;
-    // /// let mut lexer = glsl_lexer::Lexer::new(&source);
-    // /// let tokens = lexer.get_tokens();
-    // /// dbg!("{}", tokens);
-    // ///```
-    // // We are using Arc<[Token]> as return type for cheaper cloning of the returned value
-    // pub fn get_tokens(&mut self) -> Arc<[Token]> {
-    //     let mut tokens = Vec::new();
-    //     while let Some(c) = self.current_char {
-    //         if c.is_whitespace() {
-    //             self.consume_whitespace();
-    //             tokens.push(Token::Whitespace);
-    //         } else if c.is_alphabetic() || c == '_' {
-    //             tokens.push(self.consume_identifier_or_keyword());
-    //         } else if c.is_ascii_digit() {
-    //             tokens.push(self.consume_number());
-    //         } else if c == '/' && self.peek() == Some('/') {
-    //             tokens.push(self.consume_comment());
-    //         } else {
-    //             match c {
-    //                 // TODO Implement operands like +=
-    //                 '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
-    //                     tokens.push(self.consume_operator());
-    //                 }
-    //
-    //                 '{' | '}' | '(' | ')' | '#' | ',' | ';' => {
-    //                     tokens.push(self.consume_symbol());
-    //                 }
-    //
-    //                 '.' => {
-    //                     tokens.push(self.consume_number());
-    //                 }
-    //
-    //                 _ => {
-    //                     tokens.push(self.consume_unknown());
-    //                 }
-    //             }
-    //         }
-    //     }
-    //     tokens.push(Token::EOF);
-    //     let ret: Arc<[Token]> = tokens.into();
-    //     ret
-    // }
-

+// /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`].
+// /// # Example:
+// /// ```
+// /// use glsl_lexer::*;
+// /// let source = r#"
+// ///     #version 440
+// ///     uniform float time;
+// ///     void main() {
+// ///         gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
+// ///     }
+// /// "#;
+// /// let mut lexer = glsl_lexer::Lexer::new(&source);
+// /// let tokens = lexer.get_tokens();
+// /// dbg!("{}", tokens);
+// ///```
+// // We are using Arc<[Token]> as return type for cheaper cloning of the returned value
+// pub fn get_tokens(&mut self) -> Arc<[Token]> {
+//     let mut tokens = Vec::new();
+//     while let Some(c) = self.current_char {
+//         if c.is_whitespace() {
+//             self.consume_whitespace();
+//             tokens.push(Token::Whitespace);
+//         } else if c.is_alphabetic() || c == '_' {
+//             tokens.push(self.consume_identifier_or_keyword());
+//         } else if c.is_ascii_digit() {
+//             tokens.push(self.consume_number());
+//         } else if c == '/' && self.peek() == Some('/') {
+//             tokens.push(self.consume_comment());
+//         } else {
+//             match c {
+//                 // TODO Implement operands like +=
+//                 '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
+//                     tokens.push(self.consume_operator());
+//                 }
+//
+//                 '{' | '}' | '(' | ')' | '#' | ',' | ';' => {
+//                     tokens.push(self.consume_symbol());
+//                 }
+//
+//                 '.' => {
+//                     tokens.push(self.consume_number());
+//                 }
+//
+//                 _ => {
+//                     tokens.push(self.consume_unknown());
+//                 }
+//             }
+//         }
+//     }
+//     tokens.push(Token::EOF);
+//     let ret: Arc<[Token]> = tokens.into();
+//     ret
+// }

 #[cfg(test)]
 mod tests {
+    use super::lex::lexer::Lexer;
+    use super::tokens::Token;
    use super::*;
    use log::info;

--- a/src/tokens.rs
+++ b/src/tokens.rs
@ -2,41 +2,45 @@
 #![allow(non_snake_case)]
 #![allow(nonstandard_style)]

+// https://www.khronos.org/opengl/wiki/Data_Type_(GLSL)
+
 #[derive(Debug, Clone, PartialEq)]
 pub enum Token {
+    EOF,
+    Whitespace,
    CONST,
    BOOL,
    FLOAT,
    INT,
    UINT,
    DOUBLE,
-    MAT2,
-    MAT3,
-    MAT4,
-    MAT2X2,
-    MAT2X3,
-    MAT2X4,
-    MAT3X2,
-    MAT3X3,
-    MAT3X4,
-    MAT4X2,
-    MAT4X3,
-    MAT4X4,
-    DVEC2,
-    DVEC3,
-    DVEC4,
-    DMAT2,
-    DMAT3,
-    DMAT4,
-    DMAT2X2,
-    DMAT2X3,
-    DMAT2X4,
-    DMAT3X2,
-    DMAT3X3,
-    DMAT3X4,
-    DMAT4X2,
-    DMAT4X3,
-    DMAT4X4,
+    // MAT2,
+    // MAT3,
+    // MAT4,
+    // MAT2X2,
+    // MAT2X3,
+    // MAT2X4,
+    // MAT3X2,
+    // MAT3X3,
+    // MAT3X4,
+    // MAT4X2,
+    // MAT4X3,
+    // MAT4X4,
+    // DVEC2,
+    // DVEC3,
+    // DVEC4,
+    // DMAT2,
+    // DMAT3,
+    // DMAT4,
+    // DMAT2X2,
+    // DMAT2X3,
+    // DMAT2X4,
+    // DMAT3X2,
+    // DMAT3X3,
+    // DMAT3X4,
+    // DMAT4X2,
+    // DMAT4X3,
+    // DMAT4X4,
    CENTROID,
    IN,
    OUT,
@ -57,39 +61,9 @@ pub enum Token {
    LAYOUT,
    ATOMIC_UINT,
    SAMPLER(Sampler),
-    IMAGE2D,
-    IIMAGE2D,
-    UIMAGE2D,
-    IMAGE3D,
-    IIMAGE3D,
-    UIMAGE3D,
-    IMAGECUBE,
-    IIMAGECUBE,
-    UIMAGECUBE,
-    IMAGEBUFFER,
-    IIMAGEBUFFER,
-    UIMAGEBUFFER,
-    IMAGE2DARRAY,
-    IIMAGE2DARRAY,
-    UIMAGE2DARRAY,
-    IMAGECUBEARRAY,
-    IIMAGECUBEARRAY,
-    UIMAGECUBEARRAY,
-    IMAGE1D,
-    IIMAGE1D,
-    UIMAGE1D,
-    IMAGE1DARRAY,
-    IIMAGE1DARRAY,
-    UIMAGE1DARRAY,
-    IMAGE2DRECT,
-    IIMAGE2DRECT,
-    UIMAGE2DRECT,
-    IMAGE2DMS,
-    IIMAGE2DMS,
-    UIMAGE2DMS,
-    IMAGE2DMSARRAY,
-    IIMAGE2DMSARRAY,
-    UIMAGE2DMSARRAY,
+    VECTOR(Vector),
+    IMAGE(Image),
+    MATERIAL(Material),
    STRUCT,
    VOID,
    WHILE,
@ -167,7 +141,7 @@ pub enum Token {
 }

 #[derive(Debug, Clone, PartialEq)]
-enum Sampler {
+pub enum Sampler {
    SAMPLER2D,
    SAMPLER3D,
    SAMPLERCUBE,
@ -210,11 +184,11 @@ enum Sampler {
    USAMPLER2DMSARRAY,
 }

-#[derive(Debug, Clone, PartialEq)]
-enum Primitive {}
+// #[derive(Debug, Clone, PartialEq)]
+// enum Primitive {}

 #[derive(Debug, Clone, PartialEq)]
-enum Material {
+pub enum Material {
    MAT2,
    MAT3,
    MAT4,
@ -227,10 +201,60 @@ enum Material {
    MAT4X2,
    MAT4X3,
    MAT4X4,
+    // D = double precision
+    DMAT2,
+    DMAT3,
+    DMAT4,
+    DMAT2X2,
+    DMAT2X3,
+    DMAT2X4,
+    DMAT3X2,
+    DMAT3X3,
+    DMAT3X4,
+    DMAT4X2,
+    DMAT4X3,
+    DMAT4X4,
 }

 #[derive(Debug, Clone, PartialEq)]
-enum Vector {
+pub enum Image {
+    IMAGE2D,
+    IIMAGE2D,
+    UIMAGE2D,
+    IMAGE3D,
+    IIMAGE3D,
+    UIMAGE3D,
+    IMAGECUBE,
+    IIMAGECUBE,
+    UIMAGECUBE,
+    IMAGEBUFFER,
+    IIMAGEBUFFER,
+    UIMAGEBUFFER,
+    IMAGE2DARRAY,
+    IIMAGE2DARRAY,
+    UIMAGE2DARRAY,
+    IMAGECUBEARRAY,
+    IIMAGECUBEARRAY,
+    UIMAGECUBEARRAY,
+    IMAGE1D,
+    IIMAGE1D,
+    UIMAGE1D,
+    IMAGE1DARRAY,
+    IIMAGE1DARRAY,
+    UIMAGE1DARRAY,
+    IMAGE2DRECT,
+    IIMAGE2DRECT,
+    UIMAGE2DRECT,
+    IMAGE2DMS,
+    IIMAGE2DMS,
+    UIMAGE2DMS,
+    IMAGE2DMSARRAY,
+    IIMAGE2DMSARRAY,
+    UIMAGE2DMSARRAY,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum Vector {
    BVEC2,
    BVEC3,
    BVEC4,
@ -243,4 +267,8 @@ enum Vector {
    VEC2,
    VEC3,
    VEC4,
+    // D stands for double precision
+    DVEC2,
+    DVEC3,
+    DVEC4,
 }
--- a/src/util.rs
+++ b/src/util.rs
@ -1,18 +1,19 @@
-
-fn is_keyword(word: &str) -> bool {
-    matches!(
-        word,
-        "void"
-            | "int"
-            | "float"
-            | "bool"
-            | "if"
-            | "else"
-            | "for"
-            | "while"
-            | "return"
-            | "struct"
-            | "uniform"
-            | "varying"
-    )
+impl crate::lex::lexer::Lexer {
+    pub fn is_keyword(word: &str) -> bool {
+        matches!(
+            word,
+            "void"
+                | "int"
+                | "float"
+                | "bool"
+                | "if"
+                | "else"
+                | "for"
+                | "while"
+                | "return"
+                | "struct"
+                | "uniform"
+                | "varying"
+        )
+    }
 }