223 lines
5.5 KiB
Rust
223 lines
5.5 KiB
Rust
|
// WIP THAT SHIT STILL WONKY AF
|
||
|
|
||
|
#[derive(Debug, PartialEq)]
|
||
|
pub enum Token {
|
||
|
Identifier(String),
|
||
|
Keyword(String),
|
||
|
IntegerLiteral(i64),
|
||
|
FloatLiteral(f64),
|
||
|
Operator(String),
|
||
|
Symbol(char),
|
||
|
Whitespace,
|
||
|
Comment(String),
|
||
|
Unknown(char),
|
||
|
EndOfFile,
|
||
|
}
|
||
|
|
||
|
pub struct Lexer {
|
||
|
input: Vec<char>,
|
||
|
position: usize,
|
||
|
current_char: Option<char>,
|
||
|
}
|
||
|
|
||
|
impl Lexer {
|
||
|
pub fn new(input: &str) -> Self {
|
||
|
let mut lexer = Lexer {
|
||
|
input: input.chars().collect(),
|
||
|
position: 0,
|
||
|
current_char: None,
|
||
|
};
|
||
|
lexer.current_char = if lexer.position < lexer.input.len() {
|
||
|
Some(lexer.input[lexer.position])
|
||
|
} else {
|
||
|
None
|
||
|
};
|
||
|
lexer
|
||
|
}
|
||
|
|
||
|
fn advance(&mut self) {
|
||
|
self.position += 1;
|
||
|
self.current_char = if self.position < self.input.len() {
|
||
|
Some(self.input[self.position])
|
||
|
} else {
|
||
|
None
|
||
|
};
|
||
|
}
|
||
|
|
||
|
pub fn get_tokens(&mut self) -> Vec<Token> {
|
||
|
let mut tokens = Vec::new();
|
||
|
while let Some(c) = self.current_char {
|
||
|
if c.is_whitespace() {
|
||
|
self.consume_whitespace();
|
||
|
tokens.push(Token::Whitespace);
|
||
|
} else if c.is_alphabetic() || c == '_' {
|
||
|
tokens.push(self.consume_identifier_or_keyword());
|
||
|
} else if c.is_digit(10) {
|
||
|
tokens.push(self.consume_number());
|
||
|
} else if c == '/' && self.peek() == Some('/') {
|
||
|
tokens.push(self.consume_comment());
|
||
|
} else {
|
||
|
tokens.push(self.consume_symbol());
|
||
|
}
|
||
|
}
|
||
|
tokens.push(Token::EndOfFile);
|
||
|
tokens
|
||
|
}
|
||
|
|
||
|
fn peek(&self) -> Option<char> {
|
||
|
if self.position + 1 < self.input.len() {
|
||
|
Some(self.input[self.position + 1])
|
||
|
} else {
|
||
|
None
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
impl Lexer {
|
||
|
fn consume_whitespace(&mut self) {
|
||
|
while let Some(c) = self.current_char {
|
||
|
if !c.is_whitespace() {
|
||
|
break;
|
||
|
}
|
||
|
self.advance();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fn consume_identifier_or_keyword(&mut self) -> Token {
|
||
|
let mut identifier = String::new();
|
||
|
while let Some(c) = self.current_char {
|
||
|
if c.is_alphanumeric() || c == '_' {
|
||
|
identifier.push(c);
|
||
|
self.advance();
|
||
|
} else {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if is_keyword(&identifier) {
|
||
|
Token::Keyword(identifier)
|
||
|
} else {
|
||
|
Token::Identifier(identifier)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fn consume_number(&mut self) -> Token {
|
||
|
let mut number = String::new();
|
||
|
let mut is_float = false;
|
||
|
|
||
|
while let Some(c) = self.current_char {
|
||
|
if c.is_digit(10) {
|
||
|
number.push(c);
|
||
|
self.advance();
|
||
|
} else if c == '.' {
|
||
|
number.push(c);
|
||
|
is_float = true;
|
||
|
self.advance();
|
||
|
} else {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if is_float {
|
||
|
Token::FloatLiteral(number.parse().unwrap())
|
||
|
} else {
|
||
|
Token::IntegerLiteral(number.parse().unwrap())
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fn consume_comment(&mut self) -> Token {
|
||
|
let mut comment = String::new();
|
||
|
while let Some(c) = self.current_char {
|
||
|
if c == '\n' {
|
||
|
break;
|
||
|
}
|
||
|
comment.push(c);
|
||
|
self.advance();
|
||
|
}
|
||
|
Token::Comment(comment)
|
||
|
}
|
||
|
|
||
|
fn consume_symbol(&mut self) -> Token {
|
||
|
let symbol = self.current_char.unwrap();
|
||
|
self.advance();
|
||
|
Token::Symbol(symbol)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fn is_keyword(word: &str) -> bool {
|
||
|
matches!(
|
||
|
word,
|
||
|
"void"
|
||
|
| "int"
|
||
|
| "float"
|
||
|
| "bool"
|
||
|
| "if"
|
||
|
| "else"
|
||
|
| "for"
|
||
|
| "while"
|
||
|
| "return"
|
||
|
| "struct"
|
||
|
| "uniform"
|
||
|
| "varying"
|
||
|
)
|
||
|
}
|
||
|
|
||
|
impl Lexer {
|
||
|
fn error(&self, message: &str) -> ! {
|
||
|
panic!("Lexer error at position {}: {}", self.position, message);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod tests {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_whitespace() {
|
||
|
let source_code = " \t\n";
|
||
|
let mut lexer = Lexer::new(source_code);
|
||
|
let tokens = lexer.get_tokens();
|
||
|
assert_eq!(tokens, vec![Token::Whitespace, Token::EndOfFile]);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_identifier() {
|
||
|
let source_code = "variableName";
|
||
|
let mut lexer = Lexer::new(source_code);
|
||
|
let tokens = lexer.get_tokens();
|
||
|
assert_eq!(
|
||
|
tokens,
|
||
|
vec![
|
||
|
Token::Identifier("variableName".to_string()),
|
||
|
Token::EndOfFile
|
||
|
]
|
||
|
);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_keyword() {
|
||
|
let source_code = "uniform";
|
||
|
let mut lexer = Lexer::new(source_code);
|
||
|
let tokens = lexer.get_tokens();
|
||
|
assert_eq!(
|
||
|
tokens,
|
||
|
vec![Token::Keyword("uniform".to_string()), Token::EndOfFile]
|
||
|
);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_integer_literal() {
|
||
|
let source_code = "12345";
|
||
|
let mut lexer = Lexer::new(source_code);
|
||
|
let tokens = lexer.get_tokens();
|
||
|
assert_eq!(tokens, vec![Token::IntegerLiteral(12345), Token::EndOfFile]);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_float_literal() {
|
||
|
let source_code = "123.45";
|
||
|
let mut lexer = Lexer::new(source_code);
|
||
|
let tokens = lexer.get_tokens();
|
||
|
assert_eq!(tokens, vec![Token::FloatLiteral(123.45), Token::EndOfFile]);
|
||
|
}
|
||
|
}
|