glsl-lexer/src/lib.rs

365 lines
10 KiB
Rust
Raw Normal View History

2024-07-11 12:03:42 +02:00
//! A simple lexer for GLSL.
//!
//! Adheres to the GLSL 440. Read the spec
//! [here](https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.40.pdf).
//! ## Example
//! ```
//! use glsl_lexer::*;
//!
//! fn main() {
//! let source = r#"
//! #version 440
//! uniform float time;
//! void main() {
//! gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
//! }
//! "#;
//! let mut lexer = glsl_lexer::Lexer::new(&source);
//! let tokens = lexer.get_tokens();
//! dbg!("{}", tokens);
//! }
//! ```
//! # WIP THAT SHIT STILL WONKY AF
2024-07-10 19:06:08 +02:00
2024-07-11 19:33:56 +02:00
mod handlers;
2024-07-10 19:06:08 +02:00
#[derive(Debug, PartialEq)]
pub enum Token {
2024-07-11 12:03:42 +02:00
/// Something like `float`
2024-07-10 19:06:08 +02:00
Identifier(String),
2024-07-11 12:03:42 +02:00
/// Something like `uniform`
2024-07-10 19:06:08 +02:00
Keyword(String),
2024-07-11 12:03:42 +02:00
/// Something like `13`
2024-07-10 19:06:08 +02:00
IntegerLiteral(i64),
2024-07-11 12:03:42 +02:00
/// Something like `3.5` or `.5`
2024-07-10 19:06:08 +02:00
FloatLiteral(f64),
2024-07-11 12:03:42 +02:00
/// Something like `+`
2024-07-10 19:06:08 +02:00
Operator(String),
2024-07-11 12:03:42 +02:00
/// Something like `{`
2024-07-10 19:06:08 +02:00
Symbol(char),
2024-07-11 12:03:42 +02:00
/// Should be self-explanatory
2024-07-10 19:06:08 +02:00
Whitespace,
2024-07-11 12:03:42 +02:00
/// Something like `// uwu`
2024-07-10 19:06:08 +02:00
Comment(String),
2024-07-11 12:03:42 +02:00
/// Shrouded in mystery
2024-07-10 19:06:08 +02:00
Unknown(char),
2024-07-11 12:03:42 +02:00
/// End Of File
2024-07-10 21:37:34 +02:00
EOF,
2024-07-10 19:06:08 +02:00
}
pub struct Lexer {
2024-07-11 12:03:42 +02:00
/// GLSL source
pub input: Vec<char>,
/// Position in source
pub position: usize,
/// [`char`] under position
pub current_char: Option<char>,
2024-07-10 19:06:08 +02:00
}
impl Lexer {
2024-07-11 12:03:42 +02:00
/// Instantiates the [`Lexer`]
2024-07-10 19:06:08 +02:00
pub fn new(input: &str) -> Self {
let mut lexer = Lexer {
input: input.chars().collect(),
position: 0,
current_char: None,
};
lexer.current_char = if lexer.position < lexer.input.len() {
Some(lexer.input[lexer.position])
} else {
None
};
lexer
}
fn advance(&mut self) {
self.position += 1;
self.current_char = if self.position < self.input.len() {
Some(self.input[self.position])
} else {
None
};
}
2024-07-10 20:47:10 +02:00
/// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`].
2024-07-12 00:13:32 +02:00
/// # Example:
/// ```
/// use glsl_lexer::*;
/// let source = r#"
/// #version 440
/// uniform float time;
/// void main() {
/// gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
/// }
/// "#;
/// let mut lexer = glsl_lexer::Lexer::new(&source);
/// let tokens = lexer.get_tokens();
/// dbg!("{}", tokens);
/// ```
2024-07-10 19:06:08 +02:00
pub fn get_tokens(&mut self) -> Vec<Token> {
let mut tokens = Vec::new();
while let Some(c) = self.current_char {
if c.is_whitespace() {
self.consume_whitespace();
tokens.push(Token::Whitespace);
} else if c.is_alphabetic() || c == '_' {
tokens.push(self.consume_identifier_or_keyword());
2024-07-12 02:07:39 +02:00
} else if c.is_ascii_digit() {
2024-07-10 19:06:08 +02:00
tokens.push(self.consume_number());
} else if c == '/' && self.peek() == Some('/') {
tokens.push(self.consume_comment());
} else {
2024-07-11 17:59:11 +02:00
match c {
2024-07-11 18:58:13 +02:00
// TODO Implement operands like +=
2024-07-11 17:59:11 +02:00
'+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
tokens.push(self.consume_operator());
}
2024-07-12 00:13:32 +02:00
2024-07-11 18:58:13 +02:00
'{' | '}' | '(' | ')' | '#' | ',' | ';' => {
2024-07-11 17:59:11 +02:00
tokens.push(self.consume_symbol());
}
2024-07-12 00:13:32 +02:00
2024-07-11 19:33:56 +02:00
'.' => {
tokens.push(self.consume_number());
}
2024-07-11 18:58:13 +02:00
_ => {
tokens.push(self.consume_unknown());
}
2024-07-11 17:59:11 +02:00
}
2024-07-10 19:06:08 +02:00
}
}
2024-07-10 21:37:34 +02:00
tokens.push(Token::EOF);
2024-07-10 19:06:08 +02:00
tokens
}
fn peek(&self) -> Option<char> {
if self.position + 1 < self.input.len() {
Some(self.input[self.position + 1])
} else {
None
}
}
2024-07-11 19:33:56 +02:00
// fn error(&self, message: &str) -> ! {
// panic!("Lexer error at position {}: {}", self.position, message);
// }
2024-07-10 19:06:08 +02:00
}
fn is_keyword(word: &str) -> bool {
matches!(
word,
"void"
| "int"
| "float"
| "bool"
| "if"
| "else"
| "for"
| "while"
| "return"
| "struct"
| "uniform"
| "varying"
)
}
#[cfg(test)]
mod tests {
use super::*;
2024-07-10 21:37:34 +02:00
use log::info;
fn init() {
std::env::set_var("RUST_LOG", "INFO");
let _ = env_logger::builder().is_test(true).try_init();
}
2024-07-10 19:06:08 +02:00
#[test]
fn whitespace() {
2024-07-10 21:37:34 +02:00
init();
2024-07-10 19:06:08 +02:00
let source_code = " \t\n";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
2024-07-10 21:37:34 +02:00
info!("[Whitespace] Tokens: {:#?}", tokens);
assert_eq!(tokens, vec![Token::Whitespace, Token::EOF]);
2024-07-10 19:06:08 +02:00
}
#[test]
fn identifier() {
2024-07-10 21:37:34 +02:00
init();
2024-07-10 19:06:08 +02:00
let source_code = "variableName";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
2024-07-10 21:37:34 +02:00
info!("[Identifier] Tokens: {:#?}", tokens);
2024-07-10 19:06:08 +02:00
assert_eq!(
tokens,
2024-07-10 21:37:34 +02:00
vec![Token::Identifier("variableName".to_string()), Token::EOF]
2024-07-10 19:06:08 +02:00
);
}
#[test]
fn keyword() {
2024-07-10 21:37:34 +02:00
init();
2024-07-10 19:06:08 +02:00
let source_code = "uniform";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
2024-07-10 21:37:34 +02:00
info!("[Keyword] Tokens: {:#?}", tokens);
2024-07-10 19:06:08 +02:00
assert_eq!(
tokens,
2024-07-10 21:37:34 +02:00
vec![Token::Keyword("uniform".to_string()), Token::EOF]
2024-07-10 19:06:08 +02:00
);
}
#[test]
fn integer_literal() {
2024-07-10 21:37:34 +02:00
init();
2024-07-10 19:06:08 +02:00
let source_code = "12345";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
2024-07-10 21:37:34 +02:00
info!("[IntegerLiteral] Tokens: {:#?}", tokens);
assert_eq!(tokens, vec![Token::IntegerLiteral(12345), Token::EOF]);
2024-07-10 19:06:08 +02:00
}
#[test]
fn float_literal() {
2024-07-10 21:37:34 +02:00
init();
2024-07-11 19:33:56 +02:00
let source_code = "123.4504";
2024-07-10 19:06:08 +02:00
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
2024-07-10 21:37:34 +02:00
info!("[FloatLiteral] Tokens: {:#?}", tokens);
2024-07-11 19:33:56 +02:00
assert_eq!(tokens, vec![Token::FloatLiteral(123.4504), Token::EOF]);
}
#[test]
fn float_shorthand() {
init();
let source_code = ".4504";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
info!("[FloatLiteral Shorthand] Tokens: {:#?}", tokens);
assert_eq!(tokens, vec![Token::FloatLiteral(0.4504), Token::EOF]);
2024-07-10 19:06:08 +02:00
}
2024-07-10 20:47:10 +02:00
2024-07-12 00:13:32 +02:00
#[test]
fn test_swizzling() {
init();
let source_code = "abcd.xyz";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
info!("[Swizzling] Tokens: {:#?}", tokens);
assert_eq!(
tokens,
vec![
Token::Identifier("abcd".to_string()),
Token::Symbol('.'),
Token::Identifier("xyz".to_string()),
Token::EOF
]
);
}
2024-07-11 17:59:11 +02:00
#[test]
fn test_operator() {
init();
let source = "+-*/%&|^!=<>?";
let mut lexer = Lexer::new(source);
let tokens = lexer.get_tokens();
info!("[Operator] Tokens: {:#?}", tokens);
assert_eq!(
tokens,
vec![
Token::Operator("+".to_string()),
Token::Operator("-".to_string()),
Token::Operator("*".to_string()),
Token::Operator("/".to_string()),
Token::Operator("%".to_string()),
Token::Operator("&".to_string()),
Token::Operator("|".to_string()),
Token::Operator("^".to_string()),
Token::Operator("!".to_string()),
Token::Operator("=".to_string()),
Token::Operator("<".to_string()),
Token::Operator(">".to_string()),
Token::Operator("?".to_string()),
Token::EOF,
]
);
}
2024-07-10 20:47:10 +02:00
#[test]
fn test_single_line_comment() {
2024-07-10 21:37:34 +02:00
init();
2024-07-10 20:47:10 +02:00
let source = "// This is a comment\n";
let mut lexer = Lexer::new(source);
let tokens = lexer.get_tokens();
2024-07-10 21:37:34 +02:00
info!("[Comment] Tokens: {:#?}", tokens);
2024-07-10 20:47:10 +02:00
assert_eq!(
tokens,
vec![
Token::Comment("// This is a comment".to_string()),
Token::Whitespace,
2024-07-10 21:37:34 +02:00
Token::EOF,
2024-07-10 20:47:10 +02:00
]
);
}
// I hope that does it. Writing this test was pain.
#[test]
fn complex_source() {
2024-07-10 21:37:34 +02:00
init();
2024-07-10 20:47:10 +02:00
let source = r#"
uniform float time;
void main() {
gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
}
"#;
let mut lexer = Lexer::new(source);
let tokens = lexer.get_tokens();
2024-07-10 21:37:34 +02:00
info!("[Complex Source] Tokens: {:#?}", tokens);
2024-07-10 20:47:10 +02:00
assert_eq!(
tokens,
vec![
Token::Whitespace,
Token::Keyword("uniform".to_string()),
Token::Whitespace,
Token::Keyword("float".to_string()),
Token::Whitespace,
Token::Identifier("time".to_string()),
Token::Symbol(';'),
Token::Whitespace,
Token::Keyword("void".to_string()),
Token::Whitespace,
Token::Identifier("main".to_string()),
Token::Symbol('('),
Token::Symbol(')'),
Token::Whitespace,
Token::Symbol('{'),
Token::Whitespace,
Token::Identifier("gl_FragColor".to_string()),
Token::Whitespace,
2024-07-11 17:59:11 +02:00
Token::Operator('='.to_string()),
2024-07-10 20:47:10 +02:00
Token::Whitespace,
Token::Identifier("vec4".to_string()),
Token::Symbol('('),
Token::FloatLiteral(1.0),
Token::Symbol(','),
Token::Whitespace,
Token::FloatLiteral(0.5),
Token::Symbol(','),
Token::Whitespace,
Token::FloatLiteral(0.2),
Token::Symbol(','),
Token::Whitespace,
Token::FloatLiteral(1.0),
Token::Symbol(')'),
Token::Symbol(';'),
Token::Whitespace,
Token::Symbol('}'),
Token::Whitespace,
2024-07-10 21:37:34 +02:00
Token::EOF,
2024-07-10 20:47:10 +02:00
]
);
}
2024-07-10 19:06:08 +02:00
}