kalker/kalk/src/lexer.rs

233 lines
5.4 KiB
Rust
Raw Normal View History

2020-05-28 23:46:35 +02:00
use std::str;
#[derive(Clone, Debug, PartialEq)]
2020-05-28 23:46:35 +02:00
pub enum TokenKind {
Unknown,
Literal,
Identifier,
2020-05-28 23:46:35 +02:00
Plus,
Minus,
Star,
Slash,
Power,
Equals,
2020-05-30 20:28:13 +02:00
Exclamation,
2020-05-28 23:46:35 +02:00
Deg,
Rad,
2020-05-28 23:46:35 +02:00
Pipe,
OpenParenthesis,
ClosedParenthesis,
Comma,
2020-05-28 23:46:35 +02:00
EOF,
}
#[derive(Clone, Debug, PartialEq)]
2020-05-28 23:46:35 +02:00
pub struct Token {
pub kind: TokenKind,
pub value: String,
}
pub struct Lexer<'a> {
source: &'a [u8],
index: usize,
}
impl<'a> Lexer<'a> {
pub fn lex(source: &str) -> Vec<Token> {
let mut lexer = Lexer {
source: source.as_bytes(),
index: 0,
};
let mut tokens = Vec::new();
while !lexer.is_at_end() {
tokens.push(lexer.next());
}
// If there isn't already an EOF token, add it.
if let TokenKind::EOF = tokens.last().unwrap().kind {
} else {
2020-06-04 14:46:45 +02:00
tokens.push(build(TokenKind::EOF, ""));
2020-05-28 23:46:35 +02:00
}
tokens
}
fn next(&mut self) -> Token {
let mut c = self.peek();
while c == ' ' || c == '\t' || c == '\r' || c == '\n' {
self.advance();
if self.is_at_end() {
2020-06-04 14:46:45 +02:00
return build(TokenKind::EOF, "");
2020-05-28 23:46:35 +02:00
} else {
c = self.peek();
}
}
if c.is_digit(10) {
return self.next_number_literal();
}
if c.is_alphabetic() {
return self.next_identifier();
}
2020-05-28 23:46:35 +02:00
let token = match c {
'+' => build(TokenKind::Plus, ""),
'-' => build(TokenKind::Minus, ""),
'*' => build(TokenKind::Star, ""),
'/' => build(TokenKind::Slash, ""),
'^' => build(TokenKind::Power, ""),
'(' => build(TokenKind::OpenParenthesis, ""),
')' => build(TokenKind::ClosedParenthesis, ""),
'|' => build(TokenKind::Pipe, ""),
'=' => build(TokenKind::Equals, ""),
'!' => build(TokenKind::Exclamation, ""),
',' => build(TokenKind::Comma, ""),
_ => build(TokenKind::Unknown, ""),
2020-05-28 23:46:35 +02:00
};
self.advance();
2020-05-28 23:46:35 +02:00
token
}
fn next_number_literal(&mut self) -> Token {
let start = self.index;
let mut end = start;
while !self.is_at_end() && (self.peek().is_digit(10) || self.peek() == '.') {
end += 1;
self.advance();
}
if let Ok(value) = str::from_utf8(&self.source[start..end]) {
2020-06-04 14:46:45 +02:00
build(TokenKind::Literal, value)
2020-05-28 23:46:35 +02:00
} else {
2020-06-04 14:46:45 +02:00
build(TokenKind::Unknown, "")
2020-05-28 23:46:35 +02:00
}
}
fn next_identifier(&mut self) -> Token {
let start = self.index;
let mut end = start;
while !self.is_at_end() && is_valid_identifier(self.peek()) {
end += 1;
self.advance();
}
if let Ok(value) = str::from_utf8(&self.source[start..end]) {
let kind = match value {
"deg" | "°" => TokenKind::Deg,
"rad" => TokenKind::Rad,
_ => TokenKind::Identifier,
};
2020-06-04 14:46:45 +02:00
build(kind, value)
2020-05-28 23:46:35 +02:00
} else {
2020-06-04 14:46:45 +02:00
build(TokenKind::Unknown, "")
2020-05-28 23:46:35 +02:00
}
}
fn peek(&self) -> char {
self.source[self.index].into()
}
fn advance(&mut self) {
self.index = self.index + 1;
}
fn is_at_end(&self) -> bool {
self.index >= self.source.len()
}
}
2020-06-04 14:46:45 +02:00
fn build(kind: TokenKind, value: &str) -> Token {
Token {
kind,
value: value.to_string(),
}
}
2020-05-28 23:46:35 +02:00
fn is_valid_identifier(c: char) -> bool {
2020-06-04 14:46:45 +02:00
c.is_alphabetic() || c == '°' || c == '√' || c == '\'' || c == '¨' || c == 'Σ'
2020-05-28 23:46:35 +02:00
}
2020-06-04 21:53:45 +02:00
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::compare_enums;
use test_case::test_case;
fn match_tokens(tokens: Vec<Token>, expected: Vec<TokenKind>) {
let mut expected_iter = expected.iter();
for token in tokens {
assert!(compare_enums(&token.kind, &expected_iter.next().unwrap()));
}
}
#[test]
fn test_token_kinds() {
let tokens = Lexer::lex("+-*/^()|=!,");
let expected = vec![
TokenKind::Plus,
TokenKind::Minus,
TokenKind::Star,
TokenKind::Slash,
TokenKind::Power,
TokenKind::OpenParenthesis,
TokenKind::ClosedParenthesis,
TokenKind::Pipe,
TokenKind::Equals,
TokenKind::Exclamation,
TokenKind::Comma,
TokenKind::EOF,
];
match_tokens(tokens, expected);
}
#[test_case("1")]
#[test_case("24")]
#[test_case("56.4")]
fn test_number_literal(input: &str) {
let tokens = Lexer::lex(input);
let expected = vec![TokenKind::Literal, TokenKind::EOF];
assert_eq!(&tokens[0].value, input);
match_tokens(tokens, expected);
}
#[test_case("x")]
#[test_case("xy")]
fn test_identifier(input: &str) {
let tokens = Lexer::lex(input);
let expected = vec![TokenKind::Identifier, TokenKind::EOF];
assert_eq!(&tokens[0].value, input);
match_tokens(tokens, expected);
}
#[test]
fn test_function_call() {
let tokens = Lexer::lex("f(x)");
let expected = vec![
TokenKind::Identifier,
TokenKind::OpenParenthesis,
TokenKind::Identifier,
TokenKind::ClosedParenthesis,
TokenKind::EOF,
];
match_tokens(tokens, expected);
}
}