glsl-lexer/src/lib.rs

223 lines
5.5 KiB
Rust
Raw Normal View History

2024-07-10 19:06:08 +02:00
// WIP THAT SHIT STILL WONKY AF
#[derive(Debug, PartialEq)]
pub enum Token {
Identifier(String),
Keyword(String),
IntegerLiteral(i64),
FloatLiteral(f64),
Operator(String),
Symbol(char),
Whitespace,
Comment(String),
Unknown(char),
EndOfFile,
}
pub struct Lexer {
input: Vec<char>,
position: usize,
current_char: Option<char>,
}
impl Lexer {
pub fn new(input: &str) -> Self {
let mut lexer = Lexer {
input: input.chars().collect(),
position: 0,
current_char: None,
};
lexer.current_char = if lexer.position < lexer.input.len() {
Some(lexer.input[lexer.position])
} else {
None
};
lexer
}
fn advance(&mut self) {
self.position += 1;
self.current_char = if self.position < self.input.len() {
Some(self.input[self.position])
} else {
None
};
}
pub fn get_tokens(&mut self) -> Vec<Token> {
let mut tokens = Vec::new();
while let Some(c) = self.current_char {
if c.is_whitespace() {
self.consume_whitespace();
tokens.push(Token::Whitespace);
} else if c.is_alphabetic() || c == '_' {
tokens.push(self.consume_identifier_or_keyword());
} else if c.is_digit(10) {
tokens.push(self.consume_number());
} else if c == '/' && self.peek() == Some('/') {
tokens.push(self.consume_comment());
} else {
tokens.push(self.consume_symbol());
}
}
tokens.push(Token::EndOfFile);
tokens
}
fn peek(&self) -> Option<char> {
if self.position + 1 < self.input.len() {
Some(self.input[self.position + 1])
} else {
None
}
}
}
impl Lexer {
fn consume_whitespace(&mut self) {
while let Some(c) = self.current_char {
if !c.is_whitespace() {
break;
}
self.advance();
}
}
fn consume_identifier_or_keyword(&mut self) -> Token {
let mut identifier = String::new();
while let Some(c) = self.current_char {
if c.is_alphanumeric() || c == '_' {
identifier.push(c);
self.advance();
} else {
break;
}
}
if is_keyword(&identifier) {
Token::Keyword(identifier)
} else {
Token::Identifier(identifier)
}
}
fn consume_number(&mut self) -> Token {
let mut number = String::new();
let mut is_float = false;
while let Some(c) = self.current_char {
if c.is_digit(10) {
number.push(c);
self.advance();
} else if c == '.' {
number.push(c);
is_float = true;
self.advance();
} else {
break;
}
}
if is_float {
Token::FloatLiteral(number.parse().unwrap())
} else {
Token::IntegerLiteral(number.parse().unwrap())
}
}
fn consume_comment(&mut self) -> Token {
let mut comment = String::new();
while let Some(c) = self.current_char {
if c == '\n' {
break;
}
comment.push(c);
self.advance();
}
Token::Comment(comment)
}
fn consume_symbol(&mut self) -> Token {
let symbol = self.current_char.unwrap();
self.advance();
Token::Symbol(symbol)
}
}
fn is_keyword(word: &str) -> bool {
matches!(
word,
"void"
| "int"
| "float"
| "bool"
| "if"
| "else"
| "for"
| "while"
| "return"
| "struct"
| "uniform"
| "varying"
)
}
impl Lexer {
fn error(&self, message: &str) -> ! {
panic!("Lexer error at position {}: {}", self.position, message);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_whitespace() {
let source_code = " \t\n";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
assert_eq!(tokens, vec![Token::Whitespace, Token::EndOfFile]);
}
#[test]
fn test_identifier() {
let source_code = "variableName";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
assert_eq!(
tokens,
vec![
Token::Identifier("variableName".to_string()),
Token::EndOfFile
]
);
}
#[test]
fn test_keyword() {
let source_code = "uniform";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
assert_eq!(
tokens,
vec![Token::Keyword("uniform".to_string()), Token::EndOfFile]
);
}
#[test]
fn test_integer_literal() {
let source_code = "12345";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
assert_eq!(tokens, vec![Token::IntegerLiteral(12345), Token::EndOfFile]);
}
#[test]
fn test_float_literal() {
let source_code = "123.45";
let mut lexer = Lexer::new(source_code);
let tokens = lexer.get_tokens();
assert_eq!(tokens, vec![Token::FloatLiteral(123.45), Token::EndOfFile]);
}
}