[Refactor] Partially split up tokens into respective enums
Some checks failed
/ build (push) Failing after 1m16s
/ clippy (push) Successful in 1m18s

This commit is contained in:
xqtc 2024-07-21 20:40:47 +02:00
parent b3075d7d84
commit dac68cfb10
6 changed files with 227 additions and 220 deletions

View file

@ -1,4 +1,5 @@
impl crate::Lexer { use crate::lex::lexer::Lexer;
impl Lexer {
pub fn consume_whitespace(&mut self) { pub fn consume_whitespace(&mut self) {
while let Some(c) = self.current_char { while let Some(c) = self.current_char {
if !c.is_whitespace() { if !c.is_whitespace() {
@ -8,81 +9,26 @@ impl crate::Lexer {
} }
} }
pub fn consume_unknown(&mut self) -> crate::Token { pub fn consume_unknown(&mut self) -> crate::tokens::Token {
let unknown = self.current_char.unwrap(); todo!()
self.advance();
crate::Token::Unknown(unknown)
} }
pub fn consume_identifier_or_keyword(&mut self) -> crate::Token { pub fn consume_identifier_or_keyword(&mut self) -> crate::tokens::Token {
let mut identifier = String::new(); todo!()
while let Some(c) = self.current_char {
if c.is_alphanumeric() || c == '_' {
identifier.push(c);
self.advance();
} else {
break;
}
}
if crate::is_keyword(&identifier) {
crate::Token::Keyword(identifier)
} else {
crate::Token::Identifier(identifier)
}
} }
pub fn consume_number(&mut self) -> crate::Token { pub fn consume_number(&mut self) -> crate::tokens::Token {
let mut number = String::new(); todo!()
let mut is_float = false;
let mut is_swizzle = false;
while let Some(c) = self.current_char {
if c.is_ascii_digit() {
number.push(c);
self.advance();
} else if c == '.' && self.peek().map_or(false, |c| c.is_ascii_digit())
|| self.peek() == Some('f')
{
if number.is_empty() {
number.push('0');
}
number.push(c);
is_float = true;
self.advance();
} else if c.is_alphabetic() {
is_swizzle = true;
} else {
break;
}
}
if is_float {
crate::Token::FloatLiteral(number.parse().unwrap())
} else {
crate::Token::IntegerLiteral(number.parse().unwrap())
}
} }
pub fn consume_comment(&mut self) -> crate::Token { pub fn consume_comment(&mut self) -> crate::tokens::Token {
let mut comment = String::new(); todo!()
while let Some(c) = self.current_char {
if c == '\n' {
break;
}
comment.push(c);
self.advance();
}
crate::Token::Comment(comment)
} }
pub fn consume_symbol(&mut self) -> crate::Token { pub fn consume_symbol(&mut self) -> crate::tokens::Token {
let symbol = self.current_char.unwrap(); todo!()
self.advance();
crate::Token::Symbol(symbol)
} }
pub fn consume_operator(&mut self) -> crate::Token { pub fn consume_operator(&mut self) -> crate::tokens::Token {
let operator = self.current_char.unwrap(); todo!()
self.advance();
crate::Token::Operator(operator.to_string())
} }
} }

View file

@ -10,6 +10,13 @@ pub struct Lexer {
pub current_char: Option<char>, pub current_char: Option<char>,
} }
#[macro_export]
macro_rules! lex {
($source:expr) => {{
$crate::lexer::Lexer::get_tokens(&mut $crate::Lexer::new($source))
}};
}
impl Lexer { impl Lexer {
pub fn new(input: &str) -> Self { pub fn new(input: &str) -> Self {
let mut lexer = Lexer { let mut lexer = Lexer {
@ -24,7 +31,7 @@ impl Lexer {
}; };
lexer lexer
} }
fn advance(&mut self) { pub fn advance(&mut self) {
self.position += 1; self.position += 1;
self.current_char = if self.position < self.input.len() { self.current_char = if self.position < self.input.len() {
Some(self.input[self.position]) Some(self.input[self.position])
@ -33,7 +40,8 @@ impl Lexer {
}; };
} }
fn peek(&self) -> Option<char> { /// Peeks the next char in the source without incrementing `self.position`.
pub fn peek(&self) -> Option<char> {
if self.position + 1 < self.input.len() { if self.position + 1 < self.input.len() {
Some(self.input[self.position + 1]) Some(self.input[self.position + 1])
} else { } else {
@ -57,11 +65,41 @@ impl Lexer {
/// dbg!("{}", tokens); /// dbg!("{}", tokens);
///``` ///```
// We are using Arc<[Token]> as return type for cheaper cloning of the returned value // We are using Arc<[Token]> as return type for cheaper cloning of the returned value
fn get_tokens(&mut self) -> Arc<[Token]> { pub fn get_tokens(&mut self) -> Arc<[Token]> {
let mut tokens: Vec<Token> = Vec::new(); let mut tokens: Vec<Token> = Vec::new();
while let Some(c) = self.current_char {
if c.is_whitespace() {
self.consume_whitespace();
tokens.push(Token::Whitespace);
} else if c.is_alphabetic() || c == '_' {
tokens.push(self.consume_identifier_or_keyword());
} else if c.is_ascii_digit() {
tokens.push(self.consume_number());
} else if c == '/' && self.peek() == Some('/') {
tokens.push(self.consume_comment());
} else {
match c {
// TODO Implement operands like +=
'+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
tokens.push(self.consume_operator());
}
'{' | '}' | '(' | ')' | '#' | ',' | ';' => {
tokens.push(self.consume_symbol());
}
'.' => {
tokens.push(self.consume_number());
}
_ => {
tokens.push(self.consume_unknown());
}
}
}
}
tokens.into() tokens.into()
} }
fn match_token(&self, ) -> Token {}
} }

View file

@ -1,2 +1,2 @@
mod handlers; pub mod handlers;
mod lex; pub mod lexer;

View file

@ -22,9 +22,10 @@
//! # WIP THAT SHIT STILL WONKY AF //! # WIP THAT SHIT STILL WONKY AF
#![allow(dead_code)]
use std::sync::Arc; use std::sync::Arc;
mod lex; pub mod lex;
mod tokens; mod tokens;
mod util; mod util;
@ -78,72 +79,65 @@ pub struct Lexer {
/// let source = "some source"; /// let source = "some source";
/// let lexed = Lexer::new(source).get_tokens(); /// let lexed = Lexer::new(source).get_tokens();
/// ``` /// ```
#[macro_export]
macro_rules! lex {
($source:expr) => {{
$crate::Lexer::get_tokens(&mut $crate::Lexer::new($source))
}};
}
// /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`].
// /// # Example:
// /// ```
// /// use glsl_lexer::*;
// /// let source = r#"
// /// #version 440
// /// uniform float time;
// /// void main() {
// /// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
// /// }
// /// "#;
// /// let mut lexer = glsl_lexer::Lexer::new(&source);
// /// let tokens = lexer.get_tokens();
// /// dbg!("{}", tokens);
// ///```
// // We are using Arc<[Token]> as return type for cheaper cloning of the returned value
// pub fn get_tokens(&mut self) -> Arc<[Token]> {
// let mut tokens = Vec::new();
// while let Some(c) = self.current_char {
// if c.is_whitespace() {
// self.consume_whitespace();
// tokens.push(Token::Whitespace);
// } else if c.is_alphabetic() || c == '_' {
// tokens.push(self.consume_identifier_or_keyword());
// } else if c.is_ascii_digit() {
// tokens.push(self.consume_number());
// } else if c == '/' && self.peek() == Some('/') {
// tokens.push(self.consume_comment());
// } else {
// match c {
// // TODO Implement operands like +=
// '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
// tokens.push(self.consume_operator());
// }
//
// '{' | '}' | '(' | ')' | '#' | ',' | ';' => {
// tokens.push(self.consume_symbol());
// }
//
// '.' => {
// tokens.push(self.consume_number());
// }
//
// _ => {
// tokens.push(self.consume_unknown());
// }
// }
// }
// }
// tokens.push(Token::EOF);
// let ret: Arc<[Token]> = tokens.into();
// ret
// }
// /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`].
// /// # Example:
// /// ```
// /// use glsl_lexer::*;
// /// let source = r#"
// /// #version 440
// /// uniform float time;
// /// void main() {
// /// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
// /// }
// /// "#;
// /// let mut lexer = glsl_lexer::Lexer::new(&source);
// /// let tokens = lexer.get_tokens();
// /// dbg!("{}", tokens);
// ///```
// // We are using Arc<[Token]> as return type for cheaper cloning of the returned value
// pub fn get_tokens(&mut self) -> Arc<[Token]> {
// let mut tokens = Vec::new();
// while let Some(c) = self.current_char {
// if c.is_whitespace() {
// self.consume_whitespace();
// tokens.push(Token::Whitespace);
// } else if c.is_alphabetic() || c == '_' {
// tokens.push(self.consume_identifier_or_keyword());
// } else if c.is_ascii_digit() {
// tokens.push(self.consume_number());
// } else if c == '/' && self.peek() == Some('/') {
// tokens.push(self.consume_comment());
// } else {
// match c {
// // TODO Implement operands like +=
// '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
// tokens.push(self.consume_operator());
// }
//
// '{' | '}' | '(' | ')' | '#' | ',' | ';' => {
// tokens.push(self.consume_symbol());
// }
//
// '.' => {
// tokens.push(self.consume_number());
// }
//
// _ => {
// tokens.push(self.consume_unknown());
// }
// }
// }
// }
// tokens.push(Token::EOF);
// let ret: Arc<[Token]> = tokens.into();
// ret
// }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::lex::lexer::Lexer;
use super::tokens::Token;
use super::*; use super::*;
use log::info; use log::info;

View file

@ -2,41 +2,45 @@
#![allow(non_snake_case)] #![allow(non_snake_case)]
#![allow(nonstandard_style)] #![allow(nonstandard_style)]
// https://www.khronos.org/opengl/wiki/Data_Type_(GLSL)
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Token { pub enum Token {
EOF,
Whitespace,
CONST, CONST,
BOOL, BOOL,
FLOAT, FLOAT,
INT, INT,
UINT, UINT,
DOUBLE, DOUBLE,
MAT2, // MAT2,
MAT3, // MAT3,
MAT4, // MAT4,
MAT2X2, // MAT2X2,
MAT2X3, // MAT2X3,
MAT2X4, // MAT2X4,
MAT3X2, // MAT3X2,
MAT3X3, // MAT3X3,
MAT3X4, // MAT3X4,
MAT4X2, // MAT4X2,
MAT4X3, // MAT4X3,
MAT4X4, // MAT4X4,
DVEC2, // DVEC2,
DVEC3, // DVEC3,
DVEC4, // DVEC4,
DMAT2, // DMAT2,
DMAT3, // DMAT3,
DMAT4, // DMAT4,
DMAT2X2, // DMAT2X2,
DMAT2X3, // DMAT2X3,
DMAT2X4, // DMAT2X4,
DMAT3X2, // DMAT3X2,
DMAT3X3, // DMAT3X3,
DMAT3X4, // DMAT3X4,
DMAT4X2, // DMAT4X2,
DMAT4X3, // DMAT4X3,
DMAT4X4, // DMAT4X4,
CENTROID, CENTROID,
IN, IN,
OUT, OUT,
@ -57,39 +61,9 @@ pub enum Token {
LAYOUT, LAYOUT,
ATOMIC_UINT, ATOMIC_UINT,
SAMPLER(Sampler), SAMPLER(Sampler),
IMAGE2D, VECTOR(Vector),
IIMAGE2D, IMAGE(Image),
UIMAGE2D, MATERIAL(Material),
IMAGE3D,
IIMAGE3D,
UIMAGE3D,
IMAGECUBE,
IIMAGECUBE,
UIMAGECUBE,
IMAGEBUFFER,
IIMAGEBUFFER,
UIMAGEBUFFER,
IMAGE2DARRAY,
IIMAGE2DARRAY,
UIMAGE2DARRAY,
IMAGECUBEARRAY,
IIMAGECUBEARRAY,
UIMAGECUBEARRAY,
IMAGE1D,
IIMAGE1D,
UIMAGE1D,
IMAGE1DARRAY,
IIMAGE1DARRAY,
UIMAGE1DARRAY,
IMAGE2DRECT,
IIMAGE2DRECT,
UIMAGE2DRECT,
IMAGE2DMS,
IIMAGE2DMS,
UIMAGE2DMS,
IMAGE2DMSARRAY,
IIMAGE2DMSARRAY,
UIMAGE2DMSARRAY,
STRUCT, STRUCT,
VOID, VOID,
WHILE, WHILE,
@ -167,7 +141,7 @@ pub enum Token {
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
enum Sampler { pub enum Sampler {
SAMPLER2D, SAMPLER2D,
SAMPLER3D, SAMPLER3D,
SAMPLERCUBE, SAMPLERCUBE,
@ -210,11 +184,11 @@ enum Sampler {
USAMPLER2DMSARRAY, USAMPLER2DMSARRAY,
} }
#[derive(Debug, Clone, PartialEq)] // #[derive(Debug, Clone, PartialEq)]
enum Primitive {} // enum Primitive {}
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
enum Material { pub enum Material {
MAT2, MAT2,
MAT3, MAT3,
MAT4, MAT4,
@ -227,10 +201,60 @@ enum Material {
MAT4X2, MAT4X2,
MAT4X3, MAT4X3,
MAT4X4, MAT4X4,
// D = double precision
DMAT2,
DMAT3,
DMAT4,
DMAT2X2,
DMAT2X3,
DMAT2X4,
DMAT3X2,
DMAT3X3,
DMAT3X4,
DMAT4X2,
DMAT4X3,
DMAT4X4,
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
enum Vector { pub enum Image {
IMAGE2D,
IIMAGE2D,
UIMAGE2D,
IMAGE3D,
IIMAGE3D,
UIMAGE3D,
IMAGECUBE,
IIMAGECUBE,
UIMAGECUBE,
IMAGEBUFFER,
IIMAGEBUFFER,
UIMAGEBUFFER,
IMAGE2DARRAY,
IIMAGE2DARRAY,
UIMAGE2DARRAY,
IMAGECUBEARRAY,
IIMAGECUBEARRAY,
UIMAGECUBEARRAY,
IMAGE1D,
IIMAGE1D,
UIMAGE1D,
IMAGE1DARRAY,
IIMAGE1DARRAY,
UIMAGE1DARRAY,
IMAGE2DRECT,
IIMAGE2DRECT,
UIMAGE2DRECT,
IMAGE2DMS,
IIMAGE2DMS,
UIMAGE2DMS,
IMAGE2DMSARRAY,
IIMAGE2DMSARRAY,
UIMAGE2DMSARRAY,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Vector {
BVEC2, BVEC2,
BVEC3, BVEC3,
BVEC4, BVEC4,
@ -243,4 +267,8 @@ enum Vector {
VEC2, VEC2,
VEC3, VEC3,
VEC4, VEC4,
// D stands for double precision
DVEC2,
DVEC3,
DVEC4,
} }

View file

@ -1,18 +1,19 @@
impl crate::lex::lexer::Lexer {
fn is_keyword(word: &str) -> bool { pub fn is_keyword(word: &str) -> bool {
matches!( matches!(
word, word,
"void" "void"
| "int" | "int"
| "float" | "float"
| "bool" | "bool"
| "if" | "if"
| "else" | "else"
| "for" | "for"
| "while" | "while"
| "return" | "return"
| "struct" | "struct"
| "uniform" | "uniform"
| "varying" | "varying"
) )
}
} }