[Refactor] Partially split up tokens into respective enums
Some checks failed
/ build (push) Failing after 1m16s
/ clippy (push) Successful in 1m18s

This commit is contained in:
xqtc 2024-07-21 20:40:47 +02:00
parent b3075d7d84
commit dac68cfb10
6 changed files with 227 additions and 220 deletions

View file

@ -1,4 +1,5 @@
impl crate::Lexer {
use crate::lex::lexer::Lexer;
impl Lexer {
pub fn consume_whitespace(&mut self) {
while let Some(c) = self.current_char {
if !c.is_whitespace() {
@ -8,81 +9,26 @@ impl crate::Lexer {
}
}
pub fn consume_unknown(&mut self) -> crate::Token {
let unknown = self.current_char.unwrap();
self.advance();
crate::Token::Unknown(unknown)
pub fn consume_unknown(&mut self) -> crate::tokens::Token {
todo!()
}
pub fn consume_identifier_or_keyword(&mut self) -> crate::Token {
let mut identifier = String::new();
while let Some(c) = self.current_char {
if c.is_alphanumeric() || c == '_' {
identifier.push(c);
self.advance();
} else {
break;
}
}
if crate::is_keyword(&identifier) {
crate::Token::Keyword(identifier)
} else {
crate::Token::Identifier(identifier)
}
pub fn consume_identifier_or_keyword(&mut self) -> crate::tokens::Token {
todo!()
}
pub fn consume_number(&mut self) -> crate::Token {
let mut number = String::new();
let mut is_float = false;
let mut is_swizzle = false;
while let Some(c) = self.current_char {
if c.is_ascii_digit() {
number.push(c);
self.advance();
} else if c == '.' && self.peek().map_or(false, |c| c.is_ascii_digit())
|| self.peek() == Some('f')
{
if number.is_empty() {
number.push('0');
}
number.push(c);
is_float = true;
self.advance();
} else if c.is_alphabetic() {
is_swizzle = true;
} else {
break;
}
}
if is_float {
crate::Token::FloatLiteral(number.parse().unwrap())
} else {
crate::Token::IntegerLiteral(number.parse().unwrap())
}
pub fn consume_number(&mut self) -> crate::tokens::Token {
todo!()
}
pub fn consume_comment(&mut self) -> crate::Token {
let mut comment = String::new();
while let Some(c) = self.current_char {
if c == '\n' {
break;
}
comment.push(c);
self.advance();
}
crate::Token::Comment(comment)
pub fn consume_comment(&mut self) -> crate::tokens::Token {
todo!()
}
pub fn consume_symbol(&mut self) -> crate::Token {
let symbol = self.current_char.unwrap();
self.advance();
crate::Token::Symbol(symbol)
pub fn consume_symbol(&mut self) -> crate::tokens::Token {
todo!()
}
pub fn consume_operator(&mut self) -> crate::Token {
let operator = self.current_char.unwrap();
self.advance();
crate::Token::Operator(operator.to_string())
pub fn consume_operator(&mut self) -> crate::tokens::Token {
todo!()
}
}

View file

@ -10,6 +10,13 @@ pub struct Lexer {
pub current_char: Option<char>,
}
#[macro_export]
macro_rules! lex {
($source:expr) => {{
$crate::lexer::Lexer::get_tokens(&mut $crate::Lexer::new($source))
}};
}
impl Lexer {
pub fn new(input: &str) -> Self {
let mut lexer = Lexer {
@ -24,7 +31,7 @@ impl Lexer {
};
lexer
}
fn advance(&mut self) {
pub fn advance(&mut self) {
self.position += 1;
self.current_char = if self.position < self.input.len() {
Some(self.input[self.position])
@ -33,7 +40,8 @@ impl Lexer {
};
}
fn peek(&self) -> Option<char> {
/// Peeks the next char in the source without incrementing `self.position`.
pub fn peek(&self) -> Option<char> {
if self.position + 1 < self.input.len() {
Some(self.input[self.position + 1])
} else {
@ -57,11 +65,41 @@ impl Lexer {
/// dbg!("{}", tokens);
///```
// We are using Arc<[Token]> as return type for cheaper cloning of the returned value
fn get_tokens(&mut self) -> Arc<[Token]> {
pub fn get_tokens(&mut self) -> Arc<[Token]> {
let mut tokens: Vec<Token> = Vec::new();
while let Some(c) = self.current_char {
if c.is_whitespace() {
self.consume_whitespace();
tokens.push(Token::Whitespace);
} else if c.is_alphabetic() || c == '_' {
tokens.push(self.consume_identifier_or_keyword());
} else if c.is_ascii_digit() {
tokens.push(self.consume_number());
} else if c == '/' && self.peek() == Some('/') {
tokens.push(self.consume_comment());
} else {
match c {
// TODO Implement operands like +=
'+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
tokens.push(self.consume_operator());
}
'{' | '}' | '(' | ')' | '#' | ',' | ';' => {
tokens.push(self.consume_symbol());
}
'.' => {
tokens.push(self.consume_number());
}
_ => {
tokens.push(self.consume_unknown());
}
}
}
}
tokens.into()
}
fn match_token(&self, ) -> Token {}
}

View file

@ -1,2 +1,2 @@
mod handlers;
mod lex;
pub mod handlers;
pub mod lexer;

View file

@ -22,9 +22,10 @@
//! # WIP THAT SHIT STILL WONKY AF
#![allow(dead_code)]
use std::sync::Arc;
mod lex;
pub mod lex;
mod tokens;
mod util;
@ -78,72 +79,65 @@ pub struct Lexer {
/// let source = "some source";
/// let lexed = Lexer::new(source).get_tokens();
/// ```
#[macro_export]
macro_rules! lex {
($source:expr) => {{
$crate::Lexer::get_tokens(&mut $crate::Lexer::new($source))
}};
}
// /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`].
// /// # Example:
// /// ```
// /// use glsl_lexer::*;
// /// let source = r#"
// /// #version 440
// /// uniform float time;
// /// void main() {
// /// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
// /// }
// /// "#;
// /// let mut lexer = glsl_lexer::Lexer::new(&source);
// /// let tokens = lexer.get_tokens();
// /// dbg!("{}", tokens);
// ///```
// // We are using Arc<[Token]> as return type for cheaper cloning of the returned value
// pub fn get_tokens(&mut self) -> Arc<[Token]> {
// let mut tokens = Vec::new();
// while let Some(c) = self.current_char {
// if c.is_whitespace() {
// self.consume_whitespace();
// tokens.push(Token::Whitespace);
// } else if c.is_alphabetic() || c == '_' {
// tokens.push(self.consume_identifier_or_keyword());
// } else if c.is_ascii_digit() {
// tokens.push(self.consume_number());
// } else if c == '/' && self.peek() == Some('/') {
// tokens.push(self.consume_comment());
// } else {
// match c {
// // TODO Implement operands like +=
// '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
// tokens.push(self.consume_operator());
// }
//
// '{' | '}' | '(' | ')' | '#' | ',' | ';' => {
// tokens.push(self.consume_symbol());
// }
//
// '.' => {
// tokens.push(self.consume_number());
// }
//
// _ => {
// tokens.push(self.consume_unknown());
// }
// }
// }
// }
// tokens.push(Token::EOF);
// let ret: Arc<[Token]> = tokens.into();
// ret
// }
// /// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`].
// /// # Example:
// /// ```
// /// use glsl_lexer::*;
// /// let source = r#"
// /// #version 440
// /// uniform float time;
// /// void main() {
// /// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
// /// }
// /// "#;
// /// let mut lexer = glsl_lexer::Lexer::new(&source);
// /// let tokens = lexer.get_tokens();
// /// dbg!("{}", tokens);
// ///```
// // We are using Arc<[Token]> as return type for cheaper cloning of the returned value
// pub fn get_tokens(&mut self) -> Arc<[Token]> {
// let mut tokens = Vec::new();
// while let Some(c) = self.current_char {
// if c.is_whitespace() {
// self.consume_whitespace();
// tokens.push(Token::Whitespace);
// } else if c.is_alphabetic() || c == '_' {
// tokens.push(self.consume_identifier_or_keyword());
// } else if c.is_ascii_digit() {
// tokens.push(self.consume_number());
// } else if c == '/' && self.peek() == Some('/') {
// tokens.push(self.consume_comment());
// } else {
// match c {
// // TODO Implement operands like +=
// '+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
// tokens.push(self.consume_operator());
// }
//
// '{' | '}' | '(' | ')' | '#' | ',' | ';' => {
// tokens.push(self.consume_symbol());
// }
//
// '.' => {
// tokens.push(self.consume_number());
// }
//
// _ => {
// tokens.push(self.consume_unknown());
// }
// }
// }
// }
// tokens.push(Token::EOF);
// let ret: Arc<[Token]> = tokens.into();
// ret
// }
#[cfg(test)]
mod tests {
use super::lex::lexer::Lexer;
use super::tokens::Token;
use super::*;
use log::info;

View file

@ -2,41 +2,45 @@
#![allow(non_snake_case)]
#![allow(nonstandard_style)]
// https://www.khronos.org/opengl/wiki/Data_Type_(GLSL)
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
EOF,
Whitespace,
CONST,
BOOL,
FLOAT,
INT,
UINT,
DOUBLE,
MAT2,
MAT3,
MAT4,
MAT2X2,
MAT2X3,
MAT2X4,
MAT3X2,
MAT3X3,
MAT3X4,
MAT4X2,
MAT4X3,
MAT4X4,
DVEC2,
DVEC3,
DVEC4,
DMAT2,
DMAT3,
DMAT4,
DMAT2X2,
DMAT2X3,
DMAT2X4,
DMAT3X2,
DMAT3X3,
DMAT3X4,
DMAT4X2,
DMAT4X3,
DMAT4X4,
// MAT2,
// MAT3,
// MAT4,
// MAT2X2,
// MAT2X3,
// MAT2X4,
// MAT3X2,
// MAT3X3,
// MAT3X4,
// MAT4X2,
// MAT4X3,
// MAT4X4,
// DVEC2,
// DVEC3,
// DVEC4,
// DMAT2,
// DMAT3,
// DMAT4,
// DMAT2X2,
// DMAT2X3,
// DMAT2X4,
// DMAT3X2,
// DMAT3X3,
// DMAT3X4,
// DMAT4X2,
// DMAT4X3,
// DMAT4X4,
CENTROID,
IN,
OUT,
@ -57,39 +61,9 @@ pub enum Token {
LAYOUT,
ATOMIC_UINT,
SAMPLER(Sampler),
IMAGE2D,
IIMAGE2D,
UIMAGE2D,
IMAGE3D,
IIMAGE3D,
UIMAGE3D,
IMAGECUBE,
IIMAGECUBE,
UIMAGECUBE,
IMAGEBUFFER,
IIMAGEBUFFER,
UIMAGEBUFFER,
IMAGE2DARRAY,
IIMAGE2DARRAY,
UIMAGE2DARRAY,
IMAGECUBEARRAY,
IIMAGECUBEARRAY,
UIMAGECUBEARRAY,
IMAGE1D,
IIMAGE1D,
UIMAGE1D,
IMAGE1DARRAY,
IIMAGE1DARRAY,
UIMAGE1DARRAY,
IMAGE2DRECT,
IIMAGE2DRECT,
UIMAGE2DRECT,
IMAGE2DMS,
IIMAGE2DMS,
UIMAGE2DMS,
IMAGE2DMSARRAY,
IIMAGE2DMSARRAY,
UIMAGE2DMSARRAY,
VECTOR(Vector),
IMAGE(Image),
MATERIAL(Material),
STRUCT,
VOID,
WHILE,
@ -167,7 +141,7 @@ pub enum Token {
}
#[derive(Debug, Clone, PartialEq)]
enum Sampler {
pub enum Sampler {
SAMPLER2D,
SAMPLER3D,
SAMPLERCUBE,
@ -210,11 +184,11 @@ enum Sampler {
USAMPLER2DMSARRAY,
}
#[derive(Debug, Clone, PartialEq)]
enum Primitive {}
// #[derive(Debug, Clone, PartialEq)]
// enum Primitive {}
#[derive(Debug, Clone, PartialEq)]
enum Material {
pub enum Material {
MAT2,
MAT3,
MAT4,
@ -227,10 +201,60 @@ enum Material {
MAT4X2,
MAT4X3,
MAT4X4,
// D = double precision
DMAT2,
DMAT3,
DMAT4,
DMAT2X2,
DMAT2X3,
DMAT2X4,
DMAT3X2,
DMAT3X3,
DMAT3X4,
DMAT4X2,
DMAT4X3,
DMAT4X4,
}
#[derive(Debug, Clone, PartialEq)]
enum Vector {
pub enum Image {
IMAGE2D,
IIMAGE2D,
UIMAGE2D,
IMAGE3D,
IIMAGE3D,
UIMAGE3D,
IMAGECUBE,
IIMAGECUBE,
UIMAGECUBE,
IMAGEBUFFER,
IIMAGEBUFFER,
UIMAGEBUFFER,
IMAGE2DARRAY,
IIMAGE2DARRAY,
UIMAGE2DARRAY,
IMAGECUBEARRAY,
IIMAGECUBEARRAY,
UIMAGECUBEARRAY,
IMAGE1D,
IIMAGE1D,
UIMAGE1D,
IMAGE1DARRAY,
IIMAGE1DARRAY,
UIMAGE1DARRAY,
IMAGE2DRECT,
IIMAGE2DRECT,
UIMAGE2DRECT,
IMAGE2DMS,
IIMAGE2DMS,
UIMAGE2DMS,
IMAGE2DMSARRAY,
IIMAGE2DMSARRAY,
UIMAGE2DMSARRAY,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Vector {
BVEC2,
BVEC3,
BVEC4,
@ -243,4 +267,8 @@ enum Vector {
VEC2,
VEC3,
VEC4,
// D stands for double precision
DVEC2,
DVEC3,
DVEC4,
}

View file

@ -1,18 +1,19 @@
fn is_keyword(word: &str) -> bool {
matches!(
word,
"void"
| "int"
| "float"
| "bool"
| "if"
| "else"
| "for"
| "while"
| "return"
| "struct"
| "uniform"
| "varying"
)
impl crate::lex::lexer::Lexer {
pub fn is_keyword(word: &str) -> bool {
matches!(
word,
"void"
| "int"
| "float"
| "bool"
| "if"
| "else"
| "for"
| "while"
| "return"
| "struct"
| "uniform"
| "varying"
)
}
}