2020-05-28 23:46:35 +02:00
|
|
|
use std::str;
|
|
|
|
|
2020-06-05 13:36:11 +02:00
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
2020-05-28 23:46:35 +02:00
|
|
|
pub enum TokenKind {
|
|
|
|
Unknown,
|
|
|
|
Literal,
|
|
|
|
Identifier,
|
2020-05-29 00:27:08 +02:00
|
|
|
|
2020-05-28 23:46:35 +02:00
|
|
|
Plus,
|
|
|
|
Minus,
|
|
|
|
Star,
|
|
|
|
Slash,
|
|
|
|
Power,
|
|
|
|
Equals,
|
2020-05-30 20:28:13 +02:00
|
|
|
Exclamation,
|
2020-05-29 00:27:08 +02:00
|
|
|
|
2020-05-28 23:46:35 +02:00
|
|
|
Deg,
|
|
|
|
Rad,
|
2020-05-29 00:27:08 +02:00
|
|
|
|
2020-05-28 23:46:35 +02:00
|
|
|
Pipe,
|
|
|
|
OpenParenthesis,
|
|
|
|
ClosedParenthesis,
|
2020-05-29 00:27:08 +02:00
|
|
|
Comma,
|
|
|
|
|
2020-05-28 23:46:35 +02:00
|
|
|
EOF,
|
|
|
|
}
|
|
|
|
|
2020-06-05 13:36:11 +02:00
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
2020-05-28 23:46:35 +02:00
|
|
|
pub struct Token {
|
|
|
|
pub kind: TokenKind,
|
|
|
|
pub value: String,
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct Lexer<'a> {
|
|
|
|
source: &'a [u8],
|
|
|
|
index: usize,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Lexer<'a> {
|
|
|
|
pub fn lex(source: &str) -> Vec<Token> {
|
|
|
|
let mut lexer = Lexer {
|
|
|
|
source: source.as_bytes(),
|
|
|
|
index: 0,
|
|
|
|
};
|
|
|
|
let mut tokens = Vec::new();
|
|
|
|
|
|
|
|
while !lexer.is_at_end() {
|
|
|
|
tokens.push(lexer.next());
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there isn't already an EOF token, add it.
|
|
|
|
if let TokenKind::EOF = tokens.last().unwrap().kind {
|
|
|
|
} else {
|
2020-06-04 14:46:45 +02:00
|
|
|
tokens.push(build(TokenKind::EOF, ""));
|
2020-05-28 23:46:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
tokens
|
|
|
|
}
|
|
|
|
|
|
|
|
fn next(&mut self) -> Token {
|
|
|
|
let mut c = self.peek();
|
|
|
|
|
|
|
|
while c == ' ' || c == '\t' || c == '\r' || c == '\n' {
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
if self.is_at_end() {
|
2020-06-04 14:46:45 +02:00
|
|
|
return build(TokenKind::EOF, "");
|
2020-05-28 23:46:35 +02:00
|
|
|
} else {
|
|
|
|
c = self.peek();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-04 20:09:43 +02:00
|
|
|
if c.is_digit(10) {
|
|
|
|
return self.next_number_literal();
|
|
|
|
}
|
|
|
|
|
|
|
|
if c.is_alphabetic() {
|
|
|
|
return self.next_identifier();
|
|
|
|
}
|
|
|
|
|
2020-05-28 23:46:35 +02:00
|
|
|
let token = match c {
|
2020-06-04 20:09:43 +02:00
|
|
|
'+' => build(TokenKind::Plus, ""),
|
|
|
|
'-' => build(TokenKind::Minus, ""),
|
|
|
|
'*' => build(TokenKind::Star, ""),
|
|
|
|
'/' => build(TokenKind::Slash, ""),
|
|
|
|
'^' => build(TokenKind::Power, ""),
|
|
|
|
'(' => build(TokenKind::OpenParenthesis, ""),
|
|
|
|
')' => build(TokenKind::ClosedParenthesis, ""),
|
|
|
|
'|' => build(TokenKind::Pipe, ""),
|
|
|
|
'=' => build(TokenKind::Equals, ""),
|
|
|
|
'!' => build(TokenKind::Exclamation, ""),
|
|
|
|
',' => build(TokenKind::Comma, ""),
|
|
|
|
_ => build(TokenKind::Unknown, ""),
|
2020-05-28 23:46:35 +02:00
|
|
|
};
|
|
|
|
|
2020-06-04 20:09:43 +02:00
|
|
|
self.advance();
|
|
|
|
|
2020-05-28 23:46:35 +02:00
|
|
|
token
|
|
|
|
}
|
|
|
|
|
|
|
|
fn next_number_literal(&mut self) -> Token {
|
|
|
|
let start = self.index;
|
|
|
|
let mut end = start;
|
|
|
|
|
|
|
|
while !self.is_at_end() && (self.peek().is_digit(10) || self.peek() == '.') {
|
|
|
|
end += 1;
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Ok(value) = str::from_utf8(&self.source[start..end]) {
|
2020-06-04 14:46:45 +02:00
|
|
|
build(TokenKind::Literal, value)
|
2020-05-28 23:46:35 +02:00
|
|
|
} else {
|
2020-06-04 14:46:45 +02:00
|
|
|
build(TokenKind::Unknown, "")
|
2020-05-28 23:46:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn next_identifier(&mut self) -> Token {
|
|
|
|
let start = self.index;
|
|
|
|
let mut end = start;
|
|
|
|
|
|
|
|
while !self.is_at_end() && is_valid_identifier(self.peek()) {
|
|
|
|
end += 1;
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Ok(value) = str::from_utf8(&self.source[start..end]) {
|
|
|
|
let kind = match value {
|
|
|
|
"deg" | "°" => TokenKind::Deg,
|
|
|
|
"rad" => TokenKind::Rad,
|
|
|
|
_ => TokenKind::Identifier,
|
|
|
|
};
|
|
|
|
|
2020-06-04 14:46:45 +02:00
|
|
|
build(kind, value)
|
2020-05-28 23:46:35 +02:00
|
|
|
} else {
|
2020-06-04 14:46:45 +02:00
|
|
|
build(TokenKind::Unknown, "")
|
2020-05-28 23:46:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn peek(&self) -> char {
|
|
|
|
self.source[self.index].into()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn advance(&mut self) {
|
|
|
|
self.index = self.index + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_at_end(&self) -> bool {
|
|
|
|
self.index >= self.source.len()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-04 14:46:45 +02:00
|
|
|
fn build(kind: TokenKind, value: &str) -> Token {
|
|
|
|
Token {
|
|
|
|
kind,
|
|
|
|
value: value.to_string(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-28 23:46:35 +02:00
|
|
|
fn is_valid_identifier(c: char) -> bool {
|
2020-06-04 14:46:45 +02:00
|
|
|
c.is_alphabetic() || c == '°' || c == '√' || c == '\'' || c == '¨' || c == 'Σ'
|
2020-05-28 23:46:35 +02:00
|
|
|
}
|
2020-06-04 21:53:45 +02:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
use crate::ast::compare_enums;
|
|
|
|
use test_case::test_case;
|
|
|
|
|
|
|
|
fn match_tokens(tokens: Vec<Token>, expected: Vec<TokenKind>) {
|
|
|
|
let mut expected_iter = expected.iter();
|
|
|
|
|
|
|
|
for token in tokens {
|
|
|
|
assert!(compare_enums(&token.kind, &expected_iter.next().unwrap()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_token_kinds() {
|
|
|
|
let tokens = Lexer::lex("+-*/^()|=!,");
|
|
|
|
let expected = vec![
|
|
|
|
TokenKind::Plus,
|
|
|
|
TokenKind::Minus,
|
|
|
|
TokenKind::Star,
|
|
|
|
TokenKind::Slash,
|
|
|
|
TokenKind::Power,
|
|
|
|
TokenKind::OpenParenthesis,
|
|
|
|
TokenKind::ClosedParenthesis,
|
|
|
|
TokenKind::Pipe,
|
|
|
|
TokenKind::Equals,
|
|
|
|
TokenKind::Exclamation,
|
|
|
|
TokenKind::Comma,
|
|
|
|
TokenKind::EOF,
|
|
|
|
];
|
|
|
|
|
|
|
|
match_tokens(tokens, expected);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test_case("1")]
|
|
|
|
#[test_case("24")]
|
|
|
|
#[test_case("56.4")]
|
|
|
|
fn test_number_literal(input: &str) {
|
|
|
|
let tokens = Lexer::lex(input);
|
|
|
|
let expected = vec![TokenKind::Literal, TokenKind::EOF];
|
|
|
|
|
|
|
|
assert_eq!(&tokens[0].value, input);
|
|
|
|
match_tokens(tokens, expected);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test_case("x")]
|
|
|
|
#[test_case("xy")]
|
|
|
|
fn test_identifier(input: &str) {
|
|
|
|
let tokens = Lexer::lex(input);
|
|
|
|
let expected = vec![TokenKind::Identifier, TokenKind::EOF];
|
|
|
|
|
|
|
|
assert_eq!(&tokens[0].value, input);
|
|
|
|
match_tokens(tokens, expected);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_function_call() {
|
|
|
|
let tokens = Lexer::lex("f(x)");
|
|
|
|
let expected = vec![
|
|
|
|
TokenKind::Identifier,
|
|
|
|
TokenKind::OpenParenthesis,
|
|
|
|
TokenKind::Identifier,
|
|
|
|
TokenKind::ClosedParenthesis,
|
|
|
|
TokenKind::EOF,
|
|
|
|
];
|
|
|
|
|
|
|
|
match_tokens(tokens, expected);
|
|
|
|
}
|
|
|
|
}
|