2024-07-11 12:03:42 +02:00
|
|
|
//! A simple lexer for GLSL.
|
|
|
|
//!
|
|
|
|
//! Adheres to the GLSL 440. Read the spec
|
|
|
|
//! [here](https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.40.pdf).
|
|
|
|
//! ## Example
|
|
|
|
//! ```
|
|
|
|
//! use glsl_lexer::*;
|
|
|
|
//!
|
|
|
|
//! fn main() {
|
|
|
|
//! let source = r#"
|
|
|
|
//! #version 440
|
|
|
|
//! uniform float time;
|
|
|
|
//! void main() {
|
|
|
|
//! gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
|
|
|
|
//! }
|
|
|
|
//! "#;
|
|
|
|
//! let mut lexer = glsl_lexer::Lexer::new(&source);
|
|
|
|
//! let tokens = lexer.get_tokens();
|
|
|
|
//! dbg!("{}", tokens);
|
|
|
|
//! }
|
|
|
|
//! ```
|
|
|
|
|
|
|
|
//! # WIP THAT SHIT STILL WONKY AF
|
2024-07-10 19:06:08 +02:00
|
|
|
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
|
|
pub enum Token {
|
2024-07-11 12:03:42 +02:00
|
|
|
/// Something like `float`
|
2024-07-10 19:06:08 +02:00
|
|
|
Identifier(String),
|
2024-07-11 12:03:42 +02:00
|
|
|
/// Something like `uniform`
|
2024-07-10 19:06:08 +02:00
|
|
|
Keyword(String),
|
2024-07-11 12:03:42 +02:00
|
|
|
/// Something like `13`
|
2024-07-10 19:06:08 +02:00
|
|
|
IntegerLiteral(i64),
|
2024-07-11 12:03:42 +02:00
|
|
|
/// Something like `3.5` or `.5`
|
2024-07-10 19:06:08 +02:00
|
|
|
FloatLiteral(f64),
|
2024-07-11 12:03:42 +02:00
|
|
|
/// Something like `+`
|
2024-07-10 19:06:08 +02:00
|
|
|
Operator(String),
|
2024-07-11 12:03:42 +02:00
|
|
|
/// Something like `{`
|
2024-07-10 19:06:08 +02:00
|
|
|
Symbol(char),
|
2024-07-11 12:03:42 +02:00
|
|
|
/// Should be self-explanatory
|
2024-07-10 19:06:08 +02:00
|
|
|
Whitespace,
|
2024-07-11 12:03:42 +02:00
|
|
|
/// Something like `// uwu`
|
2024-07-10 19:06:08 +02:00
|
|
|
Comment(String),
|
2024-07-11 12:03:42 +02:00
|
|
|
/// Shrouded in mystery
|
2024-07-10 19:06:08 +02:00
|
|
|
Unknown(char),
|
2024-07-11 12:03:42 +02:00
|
|
|
/// End Of File
|
2024-07-10 21:37:34 +02:00
|
|
|
EOF,
|
2024-07-10 19:06:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub struct Lexer {
|
2024-07-11 12:03:42 +02:00
|
|
|
/// GLSL source
|
|
|
|
pub input: Vec<char>,
|
|
|
|
/// Position in source
|
|
|
|
pub position: usize,
|
|
|
|
/// [`char`] under position
|
|
|
|
pub current_char: Option<char>,
|
2024-07-10 19:06:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Lexer {
|
2024-07-11 12:03:42 +02:00
|
|
|
/// Instantiates the [`Lexer`]
|
2024-07-10 19:06:08 +02:00
|
|
|
pub fn new(input: &str) -> Self {
|
|
|
|
let mut lexer = Lexer {
|
|
|
|
input: input.chars().collect(),
|
|
|
|
position: 0,
|
|
|
|
current_char: None,
|
|
|
|
};
|
|
|
|
lexer.current_char = if lexer.position < lexer.input.len() {
|
|
|
|
Some(lexer.input[lexer.position])
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
};
|
|
|
|
lexer
|
|
|
|
}
|
|
|
|
|
|
|
|
fn advance(&mut self) {
|
|
|
|
self.position += 1;
|
|
|
|
self.current_char = if self.position < self.input.len() {
|
|
|
|
Some(self.input[self.position])
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2024-07-10 20:47:10 +02:00
|
|
|
/// Parses the source given the [`Lexer`] upon initialization and returns a vector of [`Token`].
|
2024-07-10 19:06:08 +02:00
|
|
|
pub fn get_tokens(&mut self) -> Vec<Token> {
|
|
|
|
let mut tokens = Vec::new();
|
|
|
|
while let Some(c) = self.current_char {
|
|
|
|
if c.is_whitespace() {
|
|
|
|
self.consume_whitespace();
|
|
|
|
tokens.push(Token::Whitespace);
|
|
|
|
} else if c.is_alphabetic() || c == '_' {
|
|
|
|
tokens.push(self.consume_identifier_or_keyword());
|
|
|
|
} else if c.is_digit(10) {
|
|
|
|
tokens.push(self.consume_number());
|
|
|
|
} else if c == '/' && self.peek() == Some('/') {
|
|
|
|
tokens.push(self.consume_comment());
|
|
|
|
} else {
|
2024-07-11 17:59:11 +02:00
|
|
|
match c {
|
2024-07-11 18:58:13 +02:00
|
|
|
// TODO Implement operands like +=
|
2024-07-11 17:59:11 +02:00
|
|
|
'+' | '-' | '*' | '/' | '%' | '&' | '|' | '^' | '!' | '=' | '<' | '>' | '?' => {
|
|
|
|
tokens.push(self.consume_operator());
|
|
|
|
}
|
2024-07-11 18:58:13 +02:00
|
|
|
'{' | '}' | '(' | ')' | '#' | ',' | ';' => {
|
2024-07-11 17:59:11 +02:00
|
|
|
tokens.push(self.consume_symbol());
|
|
|
|
}
|
2024-07-11 18:58:13 +02:00
|
|
|
|
|
|
|
_ => {
|
|
|
|
tokens.push(self.consume_unknown());
|
|
|
|
}
|
2024-07-11 17:59:11 +02:00
|
|
|
}
|
2024-07-10 19:06:08 +02:00
|
|
|
}
|
|
|
|
}
|
2024-07-10 21:37:34 +02:00
|
|
|
tokens.push(Token::EOF);
|
2024-07-10 19:06:08 +02:00
|
|
|
tokens
|
|
|
|
}
|
|
|
|
|
|
|
|
fn peek(&self) -> Option<char> {
|
|
|
|
if self.position + 1 < self.input.len() {
|
|
|
|
Some(self.input[self.position + 1])
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_whitespace(&mut self) {
|
|
|
|
while let Some(c) = self.current_char {
|
|
|
|
if !c.is_whitespace() {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-07-11 18:58:13 +02:00
|
|
|
fn consume_unknown(&mut self) -> Token {
|
|
|
|
let unknown = self.current_char.unwrap();
|
|
|
|
self.advance();
|
|
|
|
Token::Unknown(unknown)
|
|
|
|
}
|
|
|
|
|
2024-07-10 19:06:08 +02:00
|
|
|
fn consume_identifier_or_keyword(&mut self) -> Token {
|
|
|
|
let mut identifier = String::new();
|
|
|
|
while let Some(c) = self.current_char {
|
|
|
|
if c.is_alphanumeric() || c == '_' {
|
|
|
|
identifier.push(c);
|
|
|
|
self.advance();
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if is_keyword(&identifier) {
|
|
|
|
Token::Keyword(identifier)
|
|
|
|
} else {
|
|
|
|
Token::Identifier(identifier)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_number(&mut self) -> Token {
|
|
|
|
let mut number = String::new();
|
|
|
|
let mut is_float = false;
|
|
|
|
|
|
|
|
while let Some(c) = self.current_char {
|
|
|
|
if c.is_digit(10) {
|
|
|
|
number.push(c);
|
|
|
|
self.advance();
|
|
|
|
} else if c == '.' {
|
|
|
|
number.push(c);
|
|
|
|
is_float = true;
|
|
|
|
self.advance();
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if is_float {
|
|
|
|
Token::FloatLiteral(number.parse().unwrap())
|
|
|
|
} else {
|
|
|
|
Token::IntegerLiteral(number.parse().unwrap())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_comment(&mut self) -> Token {
|
|
|
|
let mut comment = String::new();
|
|
|
|
while let Some(c) = self.current_char {
|
|
|
|
if c == '\n' {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
comment.push(c);
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
Token::Comment(comment)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_symbol(&mut self) -> Token {
|
|
|
|
let symbol = self.current_char.unwrap();
|
|
|
|
self.advance();
|
|
|
|
Token::Symbol(symbol)
|
|
|
|
}
|
2024-07-11 17:59:11 +02:00
|
|
|
fn consume_operator(&mut self) -> Token {
|
|
|
|
let operator = self.current_char.unwrap();
|
|
|
|
self.advance();
|
|
|
|
Token::Operator(operator.to_string())
|
|
|
|
}
|
2024-07-10 20:47:10 +02:00
|
|
|
|
|
|
|
fn error(&self, message: &str) -> ! {
|
|
|
|
panic!("Lexer error at position {}: {}", self.position, message);
|
|
|
|
}
|
2024-07-10 19:06:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
fn is_keyword(word: &str) -> bool {
|
|
|
|
matches!(
|
|
|
|
word,
|
|
|
|
"void"
|
|
|
|
| "int"
|
|
|
|
| "float"
|
|
|
|
| "bool"
|
|
|
|
| "if"
|
|
|
|
| "else"
|
|
|
|
| "for"
|
|
|
|
| "while"
|
|
|
|
| "return"
|
|
|
|
| "struct"
|
|
|
|
| "uniform"
|
|
|
|
| "varying"
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
2024-07-10 21:37:34 +02:00
|
|
|
use log::info;
|
|
|
|
|
|
|
|
fn init() {
|
|
|
|
std::env::set_var("RUST_LOG", "INFO");
|
|
|
|
let _ = env_logger::builder().is_test(true).try_init();
|
|
|
|
}
|
2024-07-10 19:06:08 +02:00
|
|
|
|
|
|
|
#[test]
|
2024-07-10 19:28:54 +02:00
|
|
|
fn whitespace() {
|
2024-07-10 21:37:34 +02:00
|
|
|
init();
|
2024-07-10 19:06:08 +02:00
|
|
|
let source_code = " \t\n";
|
|
|
|
let mut lexer = Lexer::new(source_code);
|
|
|
|
let tokens = lexer.get_tokens();
|
2024-07-10 21:37:34 +02:00
|
|
|
info!("[Whitespace] Tokens: {:#?}", tokens);
|
|
|
|
assert_eq!(tokens, vec![Token::Whitespace, Token::EOF]);
|
2024-07-10 19:06:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2024-07-10 19:28:54 +02:00
|
|
|
fn identifier() {
|
2024-07-10 21:37:34 +02:00
|
|
|
init();
|
2024-07-10 19:06:08 +02:00
|
|
|
let source_code = "variableName";
|
|
|
|
let mut lexer = Lexer::new(source_code);
|
|
|
|
let tokens = lexer.get_tokens();
|
2024-07-10 21:37:34 +02:00
|
|
|
info!("[Identifier] Tokens: {:#?}", tokens);
|
2024-07-10 19:06:08 +02:00
|
|
|
assert_eq!(
|
|
|
|
tokens,
|
2024-07-10 21:37:34 +02:00
|
|
|
vec![Token::Identifier("variableName".to_string()), Token::EOF]
|
2024-07-10 19:06:08 +02:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2024-07-10 19:28:54 +02:00
|
|
|
fn keyword() {
|
2024-07-10 21:37:34 +02:00
|
|
|
init();
|
2024-07-10 19:06:08 +02:00
|
|
|
let source_code = "uniform";
|
|
|
|
let mut lexer = Lexer::new(source_code);
|
|
|
|
let tokens = lexer.get_tokens();
|
2024-07-10 21:37:34 +02:00
|
|
|
info!("[Keyword] Tokens: {:#?}", tokens);
|
2024-07-10 19:06:08 +02:00
|
|
|
assert_eq!(
|
|
|
|
tokens,
|
2024-07-10 21:37:34 +02:00
|
|
|
vec![Token::Keyword("uniform".to_string()), Token::EOF]
|
2024-07-10 19:06:08 +02:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2024-07-10 19:28:54 +02:00
|
|
|
fn integer_literal() {
|
2024-07-10 21:37:34 +02:00
|
|
|
init();
|
2024-07-10 19:06:08 +02:00
|
|
|
let source_code = "12345";
|
|
|
|
let mut lexer = Lexer::new(source_code);
|
|
|
|
let tokens = lexer.get_tokens();
|
2024-07-10 21:37:34 +02:00
|
|
|
info!("[IntegerLiteral] Tokens: {:#?}", tokens);
|
|
|
|
assert_eq!(tokens, vec![Token::IntegerLiteral(12345), Token::EOF]);
|
2024-07-10 19:06:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2024-07-10 19:28:54 +02:00
|
|
|
fn float_literal() {
|
2024-07-10 21:37:34 +02:00
|
|
|
init();
|
2024-07-10 19:06:08 +02:00
|
|
|
let source_code = "123.45";
|
|
|
|
let mut lexer = Lexer::new(source_code);
|
|
|
|
let tokens = lexer.get_tokens();
|
2024-07-10 21:37:34 +02:00
|
|
|
info!("[FloatLiteral] Tokens: {:#?}", tokens);
|
|
|
|
assert_eq!(tokens, vec![Token::FloatLiteral(123.45), Token::EOF]);
|
2024-07-10 19:06:08 +02:00
|
|
|
}
|
2024-07-10 20:47:10 +02:00
|
|
|
|
2024-07-11 17:59:11 +02:00
|
|
|
#[test]
|
|
|
|
fn test_operator() {
|
|
|
|
init();
|
|
|
|
let source = "+-*/%&|^!=<>?";
|
|
|
|
let mut lexer = Lexer::new(source);
|
|
|
|
let tokens = lexer.get_tokens();
|
|
|
|
info!("[Operator] Tokens: {:#?}", tokens);
|
|
|
|
assert_eq!(
|
|
|
|
tokens,
|
|
|
|
vec![
|
|
|
|
Token::Operator("+".to_string()),
|
|
|
|
Token::Operator("-".to_string()),
|
|
|
|
Token::Operator("*".to_string()),
|
|
|
|
Token::Operator("/".to_string()),
|
|
|
|
Token::Operator("%".to_string()),
|
|
|
|
Token::Operator("&".to_string()),
|
|
|
|
Token::Operator("|".to_string()),
|
|
|
|
Token::Operator("^".to_string()),
|
|
|
|
Token::Operator("!".to_string()),
|
|
|
|
Token::Operator("=".to_string()),
|
|
|
|
Token::Operator("<".to_string()),
|
|
|
|
Token::Operator(">".to_string()),
|
|
|
|
Token::Operator("?".to_string()),
|
|
|
|
Token::EOF,
|
|
|
|
]
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2024-07-10 20:47:10 +02:00
|
|
|
#[test]
|
|
|
|
fn test_single_line_comment() {
|
2024-07-10 21:37:34 +02:00
|
|
|
init();
|
2024-07-10 20:47:10 +02:00
|
|
|
let source = "// This is a comment\n";
|
|
|
|
let mut lexer = Lexer::new(source);
|
|
|
|
let tokens = lexer.get_tokens();
|
2024-07-10 21:37:34 +02:00
|
|
|
info!("[Comment] Tokens: {:#?}", tokens);
|
2024-07-10 20:47:10 +02:00
|
|
|
assert_eq!(
|
|
|
|
tokens,
|
|
|
|
vec![
|
|
|
|
Token::Comment("// This is a comment".to_string()),
|
|
|
|
Token::Whitespace,
|
2024-07-10 21:37:34 +02:00
|
|
|
Token::EOF,
|
2024-07-10 20:47:10 +02:00
|
|
|
]
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// I hope that does it. Writing this test was pain.
|
|
|
|
#[test]
|
|
|
|
fn complex_source() {
|
2024-07-10 21:37:34 +02:00
|
|
|
init();
|
2024-07-10 20:47:10 +02:00
|
|
|
let source = r#"
|
|
|
|
uniform float time;
|
|
|
|
void main() {
|
|
|
|
gl_FragColor = vec4(1.0, 0.5, 0.2, 1.0);
|
|
|
|
}
|
|
|
|
"#;
|
|
|
|
let mut lexer = Lexer::new(source);
|
|
|
|
let tokens = lexer.get_tokens();
|
2024-07-10 21:37:34 +02:00
|
|
|
info!("[Complex Source] Tokens: {:#?}", tokens);
|
2024-07-10 20:47:10 +02:00
|
|
|
assert_eq!(
|
|
|
|
tokens,
|
|
|
|
vec![
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::Keyword("uniform".to_string()),
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::Keyword("float".to_string()),
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::Identifier("time".to_string()),
|
|
|
|
Token::Symbol(';'),
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::Keyword("void".to_string()),
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::Identifier("main".to_string()),
|
|
|
|
Token::Symbol('('),
|
|
|
|
Token::Symbol(')'),
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::Symbol('{'),
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::Identifier("gl_FragColor".to_string()),
|
|
|
|
Token::Whitespace,
|
2024-07-11 17:59:11 +02:00
|
|
|
Token::Operator('='.to_string()),
|
2024-07-10 20:47:10 +02:00
|
|
|
Token::Whitespace,
|
|
|
|
Token::Identifier("vec4".to_string()),
|
|
|
|
Token::Symbol('('),
|
|
|
|
Token::FloatLiteral(1.0),
|
|
|
|
Token::Symbol(','),
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::FloatLiteral(0.5),
|
|
|
|
Token::Symbol(','),
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::FloatLiteral(0.2),
|
|
|
|
Token::Symbol(','),
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::FloatLiteral(1.0),
|
|
|
|
Token::Symbol(')'),
|
|
|
|
Token::Symbol(';'),
|
|
|
|
Token::Whitespace,
|
|
|
|
Token::Symbol('}'),
|
|
|
|
Token::Whitespace,
|
2024-07-10 21:37:34 +02:00
|
|
|
Token::EOF,
|
2024-07-10 20:47:10 +02:00
|
|
|
]
|
|
|
|
);
|
|
|
|
}
|
2024-07-10 19:06:08 +02:00
|
|
|
}
|