kalker/kalk/src/lexer.rs

use std::iter::Peekable;
use std::str;
use std::str::Chars;

#[derive(Clone, Debug, PartialEq, Copy)]
pub enum TokenKind {
    Unknown,
    Literal,
    Identifier,

    Plus,
    Minus,
    Star,
    Slash,
    Power,
    Exclamation,
    Percent,
    Tick,
    GreaterThan,
    LessThan,
    Equals,
    NotEquals,
    GreaterOrEquals,
    LessOrEquals,

    UnitKeyword,
    ToKeyword,
    IfKeyword,
    OtherwiseKeyword,

    Pipe,
    OpenCeil,
    ClosedCeil,
    OpenFloor,
    ClosedFloor,
    OpenParenthesis,
    ClosedParenthesis,
    OpenBracket,
    ClosedBracket,
    OpenBrace,
    ClosedBrace,
    Comma,
    Semicolon,

    EOF,
}

#[derive(Clone, Debug, PartialEq)]
pub struct Token {
    pub kind: TokenKind,
    pub value: String,
    pub span: (usize, usize),
}

pub struct Lexer<'a> {
    chars: Peekable<Chars<'a>>,
    index: usize,
}

impl<'a> Lexer<'a> {
    pub fn lex(source: &str) -> Vec<Token> {
        let mut lexer = Lexer {
            chars: source.chars().peekable(),
            index: 0,
        };
        let mut tokens = Vec::new();

        loop {
            let next = lexer.next();

            if let TokenKind::EOF = next.kind {
                tokens.push(next);
                break;
            } else {
                tokens.push(next);
            }
        }

        tokens
    }

    fn next(&mut self) -> Token {
        let eof = build(TokenKind::EOF, "", (self.index, self.index));
        let mut c = if let Some(c) = self.peek() {
            *c
        } else {
            return eof;
        };

        while c == ' ' || c == '\t' || c == '\r' || c == '\n' {
            if let None = self.advance() {
                return eof;
            }

            c = if let Some(c) = self.peek() {
                *c
            } else {
                return eof;
            }
        }

        if c.is_digit(10) {
            return self.next_number_literal();
        }

        if is_valid_identifier(Some(&c)) {
            return self.next_identifier();
        }

        let span = (self.index, self.index + 1);
        let token = match c {
            '+' => build(TokenKind::Plus, "", span),
            '-' => build(TokenKind::Minus, "", span),
            '*' => build(TokenKind::Star, "", span),
            '/' => build(TokenKind::Slash, "", span),
            '^' => build(TokenKind::Power, "", span),
            '|' => build(TokenKind::Pipe, "", span),
            '⌈' => build(TokenKind::OpenCeil, "", span),
            '⌉' => build(TokenKind::ClosedCeil, "", span),
            '⌊' => build(TokenKind::OpenFloor, "", span),
            '⌋' => build(TokenKind::ClosedFloor, "", span),
            '(' => build(TokenKind::OpenParenthesis, "", span),
            ')' => build(TokenKind::ClosedParenthesis, "", span),
            '[' => build(TokenKind::OpenBracket, "", span),
            ']' => build(TokenKind::ClosedBracket, "", span),
            '{' => build(TokenKind::OpenBrace, "", span),
            '}' => build(TokenKind::ClosedBrace, "", span),
            '!' => build(TokenKind::Exclamation, "", span),
            '=' => build(TokenKind::Equals, "", span),
            '>' => build(TokenKind::GreaterThan, "", span),
            '<' => build(TokenKind::LessThan, "", span),
            ',' => build(TokenKind::Comma, "", span),
            ';' => build(TokenKind::Semicolon, "", span),
            '%' => build(TokenKind::Percent, "", span),
            '\'' => build(TokenKind::Tick, "", span),
            '≠' => build(TokenKind::NotEquals, "", span),
            '≥' => build(TokenKind::GreaterOrEquals, "", span),
            '≤' => build(TokenKind::LessOrEquals, "", span),
            // Some of the special symbols will be lexed here,
            // so that they don't merge with other symbols.
            'π' => build(TokenKind::Identifier, "π", span),
            '√' => build(TokenKind::Identifier, "√", span),
            'τ' => build(TokenKind::Identifier, "τ", span),
            'ϕ' => build(TokenKind::Identifier, "ϕ", span),
            'Γ' => build(TokenKind::Identifier, "Γ", span),
            '∏' => build(TokenKind::Identifier, "Γ", span),
            _ => build(TokenKind::Unknown, "", span),
        };

        self.advance();

        // Handle tokens with two characters
        match (token.kind, self.peek()) {
            (TokenKind::Star, Some('*')) => {
                self.advance();
                return build(TokenKind::Power, "", span);
            }
            (TokenKind::Exclamation, Some('=')) => {
                self.advance();
                return build(TokenKind::NotEquals, "", span);
            }
            (TokenKind::GreaterThan, Some('=')) => {
                self.advance();
                return build(TokenKind::GreaterOrEquals, "", span);
            }
            (TokenKind::LessThan, Some('=')) => {
                self.advance();
                return build(TokenKind::LessOrEquals, "", span);
            }
            _ => (),
        }

        token
    }

    fn next_number_literal(&mut self) -> Token {
        let start = self.index;
        let mut end = start;
        let mut value = String::new();

        loop {
            let c = if let Some(c) = self.peek() {
                *c
            } else {
                break;
            };

            if !c.is_digit(10) && c != '.' && !c.is_whitespace() || c == '\n' || c == '\r' {
                break;
            }

            end += 1;
            value.push(c);
            self.advance();
        }

        build(TokenKind::Literal, &value, (start, end))
    }

    fn next_identifier(&mut self) -> Token {
        let start = self.index;
        let mut end = start;
        let mut value = String::new();

        while is_valid_identifier(self.peek()) {
            let c = *self.peek().unwrap();

            // If the current character is an underscore, allow a number next.
            // This is to allow the notation like the following: x_1
            if c == '_' {
                self.advance();
                let num = self.next().value;
                value.push('_');
                value.push_str(&num.trim_end()); // Trim, since the number_literal function allows whitespace, which identifiers should not contain.
                break;
            }

            // Only allow identifiers with a special character to have *one* character. No more.
            // Break the loop if it isn't the first run and the current character is a special character.
            if end - start > 0 && !(c.is_ascii_alphabetic() || c == '\'' || c == '_') {
                break;
            }

            end += 1;
            value.push(c);
            self.advance();
        }

        let kind = match value.as_ref() {
            "unit" => TokenKind::UnitKeyword,
            "to" => TokenKind::ToKeyword,
            "if" => TokenKind::IfKeyword,
            "otherwise" => TokenKind::OtherwiseKeyword,
            _ => TokenKind::Identifier,
        };

        if &value == "°" {
            value = String::from("deg");
        }

        build(kind, &value, (start, end))
    }

    fn peek(&mut self) -> Option<&char> {
        self.chars.peek()
    }

    fn advance(&mut self) -> Option<char> {
        self.index += 1;
        self.chars.next()
    }
}

fn build(kind: TokenKind, value: &str, span: (usize, usize)) -> Token {
    Token {
        kind,
        value: value.to_string(),
        span,
    }
}

fn is_valid_identifier(c: Option<&char>) -> bool {
    if let Some(c) = c {
        match c {
            '+' | '-' | '/' | '*' | '%' | '^' | '!' | '(' | ')' | '=' | '.' | ',' | ';' | '|'
            | '⌊' | '⌋' | '⌈' | '⌉' | '[' | ']' | '{' | '}' | 'π' | '√' | 'τ' | 'ϕ' | 'Γ' | '<'
            | '>' | '≠' | '≥' | '≤' => false,
            _ => !c.is_digit(10),
        }
    } else {
        false
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use test_case::test_case;
    use wasm_bindgen_test::*;
    wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);

    fn match_tokens(tokens: Vec<Token>, expected: Vec<TokenKind>) {
        let mut expected_iter = expected.iter();

        for token in tokens {
            assert_eq!(token.kind, *expected_iter.next().unwrap());
        }
    }

    #[test]
    #[wasm_bindgen_test]
    fn test_token_kinds() {
        let tokens = Lexer::lex("+-*/%^()|=!,");
        let expected = vec![
            TokenKind::Plus,
            TokenKind::Minus,
            TokenKind::Star,
            TokenKind::Slash,
            TokenKind::Percent,
            TokenKind::Power,
            TokenKind::OpenParenthesis,
            TokenKind::ClosedParenthesis,
            TokenKind::Pipe,
            TokenKind::Equals,
            TokenKind::Exclamation,
            TokenKind::Comma,
            TokenKind::EOF,
        ];

        match_tokens(tokens, expected);
    }

    #[test]
    #[wasm_bindgen_test]
    fn test_brackets() {
        let tokens = Lexer::lex("[1 < 2]");
        let expected = vec![
            TokenKind::OpenBracket,
            TokenKind::Literal,
            TokenKind::LessThan,
            TokenKind::Literal,
            TokenKind::ClosedBracket,
            TokenKind::EOF,
        ];

        match_tokens(tokens, expected);
    }

    #[test]
    #[wasm_bindgen_test]
    fn test_empty() {
        // test_case macro doesn't seem to work with spaces.
        let test_cases = vec![" ", "     ", "test ", " test     "];

        for input in test_cases {
            let tokens = Lexer::lex(input);

            if regex::Regex::new(r"^\s*$").unwrap().is_match(input) {
                let expected = vec![TokenKind::EOF];
                match_tokens(tokens, expected);
            } else {
                let expected = vec![TokenKind::Identifier, TokenKind::EOF];
                match_tokens(tokens, expected);
            }
        }
    }

    #[test_case("1")]
    #[test_case("24")]
    #[test_case("56.4")]
    fn test_number_literal(input: &str) {
        let tokens = Lexer::lex(input);
        let expected = vec![TokenKind::Literal, TokenKind::EOF];

        assert_eq!(&tokens[0].value, input);
        match_tokens(tokens, expected);
    }

    #[test_case("x")]
    #[test_case("xy")]
    fn test_identifier(input: &str) {
        let tokens = Lexer::lex(input);
        let expected = vec![TokenKind::Identifier, TokenKind::EOF];

        assert_eq!(&tokens[0].value, input);
        match_tokens(tokens, expected);
    }

    #[test]
    fn test_function_call() {
        let tokens = Lexer::lex("f(x)");
        let expected = vec![
            TokenKind::Identifier,
            TokenKind::OpenParenthesis,
            TokenKind::Identifier,
            TokenKind::ClosedParenthesis,
            TokenKind::EOF,
        ];

        match_tokens(tokens, expected);
    }
}
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`use std::iter::Peekable;`
Initial commit 2020-05-28 23:46:35 +02:00			`use std::str;`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`use std::str::Chars;`
Initial commit 2020-05-28 23:46:35 +02:00
Added 'Copy' to TokenKind, and added a missing line related to the sum function 2020-12-13 22:09:49 +01:00			`#[derive(Clone, Debug, PartialEq, Copy)]`
Initial commit 2020-05-28 23:46:35 +02:00			`pub enum TokenKind {`
			`Unknown,`
			`Literal,`
			`Identifier,`
Prepared for supporting multiple function arguments. The types now support several arguments, and the only thing left is to parse this. 2020-05-29 00:27:08 +02:00
Initial commit 2020-05-28 23:46:35 +02:00			`Plus,`
			`Minus,`
			`Star,`
			`Slash,`
			`Power,`
Implemented factorial. 2020-05-30 20:28:13 +02:00			`Exclamation,`
added percentage unit and modulo 2020-12-09 22:18:00 +01:00			`Percent,`
Basics of derivation Derivation implemented for function calls (only). Eg. f'(2). It is not yet possible to do something like f''(2), but this should be implemented in the future. It should also be possible to derive normal expressions, but this is not yet possible. 2021-05-17 20:36:53 +02:00			`Tick,`
Comparison operators and Iverson brackets 2021-05-31 13:46:06 +02:00			`GreaterThan,`
			`LessThan,`
			`Equals,`
			`NotEquals,`
			`GreaterOrEquals,`
			`LessOrEquals,`
Prepared for supporting multiple function arguments. The types now support several arguments, and the only thing left is to parse this. 2020-05-29 00:27:08 +02:00
Added the `unit` statement (very basic and experimental). 2020-06-13 16:19:32 +02:00			`UnitKeyword,`
Integrated the angle unit system with then new dynamic unit system. 2020-06-15 19:10:55 +02:00			`ToKeyword,`
Implemented piecewise 2021-05-31 18:55:37 +02:00			`IfKeyword,`
			`OtherwiseKeyword,`
Prepared for supporting multiple function arguments. The types now support several arguments, and the only thing left is to parse this. 2020-05-29 00:27:08 +02:00
Initial commit 2020-05-28 23:46:35 +02:00			`Pipe,`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`OpenCeil,`
			`ClosedCeil,`
			`OpenFloor,`
			`ClosedFloor,`
Initial commit 2020-05-28 23:46:35 +02:00			`OpenParenthesis,`
			`ClosedParenthesis,`
Comparison operators and Iverson brackets 2021-05-31 13:46:06 +02:00			`OpenBracket,`
			`ClosedBracket,`
Implemented piecewise 2021-05-31 18:55:37 +02:00			`OpenBrace,`
			`ClosedBrace,`
Prepared for supporting multiple function arguments. The types now support several arguments, and the only thing left is to parse this. 2020-05-29 00:27:08 +02:00			`Comma,`
Added semicolon support to combine several statements in one line. 2020-06-14 22:03:22 +02:00			`Semicolon,`
Prepared for supporting multiple function arguments. The types now support several arguments, and the only thing left is to parse this. 2020-05-29 00:27:08 +02:00
Initial commit 2020-05-28 23:46:35 +02:00			`EOF,`
			`}`

Set up foundation for parser unit testing and added test_var() and test_precedence() tests. 2020-06-05 13:36:11 +02:00			`#[derive(Clone, Debug, PartialEq)]`
Initial commit 2020-05-28 23:46:35 +02:00			`pub struct Token {`
			`pub kind: TokenKind,`
			`pub value: String,`
Added position data to tokens. 2020-06-06 20:15:32 +02:00			`pub span: (usize, usize),`
Initial commit 2020-05-28 23:46:35 +02:00			`}`

			`pub struct Lexer<'a> {`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`chars: Peekable<Chars<'a>>,`
Initial commit 2020-05-28 23:46:35 +02:00			`index: usize,`
			`}`

			`impl<'a> Lexer<'a> {`
			`pub fn lex(source: &str) -> Vec<Token> {`
			`let mut lexer = Lexer {`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`chars: source.chars().peekable(),`
Initial commit 2020-05-28 23:46:35 +02:00			`index: 0,`
			`};`
			`let mut tokens = Vec::new();`

Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`loop {`
			`let next = lexer.next();`
Initial commit 2020-05-28 23:46:35 +02:00
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`if let TokenKind::EOF = next.kind {`
			`tokens.push(next);`
			`break;`
			`} else {`
			`tokens.push(next);`
			`}`
Initial commit 2020-05-28 23:46:35 +02:00			`}`

			`tokens`
			`}`

			`fn next(&mut self) -> Token {`
fixed panic on trailing spaces 2020-12-09 09:36:49 +01:00			`let eof = build(TokenKind::EOF, "", (self.index, self.index));`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`let mut c = if let Some(c) = self.peek() {`
			`*c`
			`} else {`
fixed panic on trailing spaces 2020-12-09 09:36:49 +01:00			`return eof;`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`};`
Initial commit 2020-05-28 23:46:35 +02:00
			`while c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n' {`
Fixed lexer not ignoring whitespace properly. 2020-06-09 10:34:39 +02:00			`if let None = self.advance() {`
fixed panic on trailing spaces 2020-12-09 09:36:49 +01:00			`return eof;`
Initial commit 2020-05-28 23:46:35 +02:00			`}`
Fixed lexer not ignoring whitespace properly. 2020-06-09 10:34:39 +02:00
fixed panic on trailing spaces 2020-12-09 09:36:49 +01:00			`c = if let Some(c) = self.peek() {`
			`*c`
			`} else {`
			`return eof;`
			`}`
Initial commit 2020-05-28 23:46:35 +02:00			`}`

Removed `self.advance()` code duplication in lexer. 2020-06-04 20:09:43 +02:00			`if c.is_digit(10) {`
			`return self.next_number_literal();`
			`}`

Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`if is_valid_identifier(Some(&c)) {`
Removed `self.advance()` code duplication in lexer. 2020-06-04 20:09:43 +02:00			`return self.next_identifier();`
			`}`

Added position data to tokens. 2020-06-06 20:15:32 +02:00			`let span = (self.index, self.index + 1);`
Initial commit 2020-05-28 23:46:35 +02:00			`let token = match c {`
Added position data to tokens. 2020-06-06 20:15:32 +02:00			`'+' => build(TokenKind::Plus, "", span),`
			`'-' => build(TokenKind::Minus, "", span),`
			`'*' => build(TokenKind::Star, "", span),`
			`'/' => build(TokenKind::Slash, "", span),`
			`'^' => build(TokenKind::Power, "", span),`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`'\|' => build(TokenKind::Pipe, "", span),`
			`'⌈' => build(TokenKind::OpenCeil, "", span),`
			`'⌉' => build(TokenKind::ClosedCeil, "", span),`
			`'⌊' => build(TokenKind::OpenFloor, "", span),`
			`'⌋' => build(TokenKind::ClosedFloor, "", span),`
Added position data to tokens. 2020-06-06 20:15:32 +02:00			`'(' => build(TokenKind::OpenParenthesis, "", span),`
			`')' => build(TokenKind::ClosedParenthesis, "", span),`
Comparison operators and Iverson brackets 2021-05-31 13:46:06 +02:00			`'[' => build(TokenKind::OpenBracket, "", span),`
			`']' => build(TokenKind::ClosedBracket, "", span),`
Implemented piecewise 2021-05-31 18:55:37 +02:00			`'{' => build(TokenKind::OpenBrace, "", span),`
			`'}' => build(TokenKind::ClosedBrace, "", span),`
Added position data to tokens. 2020-06-06 20:15:32 +02:00			`'!' => build(TokenKind::Exclamation, "", span),`
Comparison operators and Iverson brackets 2021-05-31 13:46:06 +02:00			`'=' => build(TokenKind::Equals, "", span),`
			`'>' => build(TokenKind::GreaterThan, "", span),`
			`'<' => build(TokenKind::LessThan, "", span),`
Added position data to tokens. 2020-06-06 20:15:32 +02:00			`',' => build(TokenKind::Comma, "", span),`
Added semicolon support to combine several statements in one line. 2020-06-14 22:03:22 +02:00			`';' => build(TokenKind::Semicolon, "", span),`
added percentage unit and modulo 2020-12-09 22:18:00 +01:00			`'%' => build(TokenKind::Percent, "", span),`
Basics of derivation Derivation implemented for function calls (only). Eg. f'(2). It is not yet possible to do something like f''(2), but this should be implemented in the future. It should also be possible to derive normal expressions, but this is not yet possible. 2021-05-17 20:36:53 +02:00			`'\'' => build(TokenKind::Tick, "", span),`
Comparison operators and Iverson brackets 2021-05-31 13:46:06 +02:00			`'≠' => build(TokenKind::NotEquals, "", span),`
			`'≥' => build(TokenKind::GreaterOrEquals, "", span),`
			`'≤' => build(TokenKind::LessOrEquals, "", span),`
Lex special symbols as one token 2021-05-18 17:49:31 +02:00			`// Some of the special symbols will be lexed here,`
			`// so that they don't merge with other symbols.`
			`'π' => build(TokenKind::Identifier, "π", span),`
			`'√' => build(TokenKind::Identifier, "√", span),`
			`'τ' => build(TokenKind::Identifier, "τ", span),`
			`'ϕ' => build(TokenKind::Identifier, "ϕ", span),`
			`'Γ' => build(TokenKind::Identifier, "Γ", span),`
Added prod function and proper sum symbol 2021-06-01 15:52:41 +02:00			`'∏' => build(TokenKind::Identifier, "Γ", span),`
Added position data to tokens. 2020-06-06 20:15:32 +02:00			`_ => build(TokenKind::Unknown, "", span),`
Initial commit 2020-05-28 23:46:35 +02:00			`};`

Removed `self.advance()` code duplication in lexer. 2020-06-04 20:09:43 +02:00			`self.advance();`

Comparison operators and Iverson brackets 2021-05-31 13:46:06 +02:00			`// Handle tokens with two characters`
			`match (token.kind, self.peek()) {`
			`(TokenKind::Star, Some('*')) => {`
Lex '**' as power sign 2021-05-17 18:14:48 +02:00			`self.advance();`
			`return build(TokenKind::Power, "", span);`
			`}`
Comparison operators and Iverson brackets 2021-05-31 13:46:06 +02:00			`(TokenKind::Exclamation, Some('=')) => {`
			`self.advance();`
			`return build(TokenKind::NotEquals, "", span);`
			`}`
			`(TokenKind::GreaterThan, Some('=')) => {`
			`self.advance();`
			`return build(TokenKind::GreaterOrEquals, "", span);`
			`}`
			`(TokenKind::LessThan, Some('=')) => {`
			`self.advance();`
			`return build(TokenKind::LessOrEquals, "", span);`
			`}`
			`_ => (),`
Lex '**' as power sign 2021-05-17 18:14:48 +02:00			`}`

Initial commit 2020-05-28 23:46:35 +02:00			`token`
			`}`

			`fn next_number_literal(&mut self) -> Token {`
			`let start = self.index;`
			`let mut end = start;`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`let mut value = String::new();`

			`loop {`
			`let c = if let Some(c) = self.peek() {`
			`*c`
			`} else {`
			`break;`
			`};`

Handle new lines on windows 2021-09-04 12:14:49 +02:00			`if !c.is_digit(10) && c != '.' && !c.is_whitespace() \|\| c == '\n' \|\| c == '\r' {`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`break;`
			`}`
Initial commit 2020-05-28 23:46:35 +02:00
			`end += 1;`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`value.push(c);`
Initial commit 2020-05-28 23:46:35 +02:00			`self.advance();`
			`}`

Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`build(TokenKind::Literal, &value, (start, end))`
Initial commit 2020-05-28 23:46:35 +02:00			`}`

			`fn next_identifier(&mut self) -> Token {`
			`let start = self.index;`
			`let mut end = start;`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`let mut value = String::new();`
Initial commit 2020-05-28 23:46:35 +02:00
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`while is_valid_identifier(self.peek()) {`
			`let c = *self.peek().unwrap();`
Somewhat fixed UTF-8 support in the lexer. 2020-06-07 19:48:52 +02:00
allow letters after underscore in variables, eg. x_a 2020-12-09 10:47:46 +01:00			`// If the current character is an underscore, allow a number next.`
Made the `x_1` notation possible. 2020-06-09 14:59:29 +02:00			`// This is to allow the notation like the following: x_1`
			`if c == '_' {`
			`self.advance();`
allow letters after underscore in variables, eg. x_a 2020-12-09 10:47:46 +01:00			`let num = self.next().value;`
Made the `x_1` notation possible. 2020-06-09 14:59:29 +02:00			`value.push('_');`
			`value.push_str(&num.trim_end()); // Trim, since the number_literal function allows whitespace, which identifiers should not contain.`
			`break;`
			`}`

Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`// Only allow identifiers with a special character to have one character. No more.`
			`// Break the loop if it isn't the first run and the current character is a special character.`
Removed regex dependency from kalk crate and made 'test-case' a dev-dependency 2021-01-03 00:07:30 +01:00			`if end - start > 0 && !(c.is_ascii_alphabetic() \|\| c == '\'' \|\| c == '_') {`
Somewhat fixed UTF-8 support in the lexer. 2020-06-07 19:48:52 +02:00			`break;`
			`}`

Initial commit 2020-05-28 23:46:35 +02:00			`end += 1;`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`value.push(c);`
Initial commit 2020-05-28 23:46:35 +02:00			`self.advance();`
			`}`

Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`let kind = match value.as_ref() {`
Added the `unit` statement (very basic and experimental). 2020-06-13 16:19:32 +02:00			`"unit" => TokenKind::UnitKeyword,`
Integrated the angle unit system with then new dynamic unit system. 2020-06-15 19:10:55 +02:00			`"to" => TokenKind::ToKeyword,`
Implemented piecewise 2021-05-31 18:55:37 +02:00			`"if" => TokenKind::IfKeyword,`
			`"otherwise" => TokenKind::OtherwiseKeyword,`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`_ => TokenKind::Identifier,`
			`};`
Initial commit 2020-05-28 23:46:35 +02:00
Made lexer lex '°' as 'deg'. 2020-06-18 18:06:17 +02:00			`if &value == "°" {`
			`value = String::from("deg");`
			`}`

Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`build(kind, &value, (start, end))`
Initial commit 2020-05-28 23:46:35 +02:00			`}`

Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`fn peek(&mut self) -> Option<&char> {`
			`self.chars.peek()`
Somewhat fixed UTF-8 support in the lexer. 2020-06-07 19:48:52 +02:00			`}`

Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`fn advance(&mut self) -> Option<char> {`
Cleaned up redundant code and replaced compare_enums() with PartialEq. 2020-06-05 13:47:39 +02:00			`self.index += 1;`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`self.chars.next()`
Initial commit 2020-05-28 23:46:35 +02:00			`}`
			`}`

Added position data to tokens. 2020-06-06 20:15:32 +02:00			`fn build(kind: TokenKind, value: &str, span: (usize, usize)) -> Token {`
Replaced f64 with rug::Float. 2020-06-04 14:46:45 +02:00			`Token {`
			`kind,`
			`value: value.to_string(),`
Added position data to tokens. 2020-06-06 20:15:32 +02:00			`span,`
Replaced f64 with rug::Float. 2020-06-04 14:46:45 +02:00			`}`
			`}`

Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`fn is_valid_identifier(c: Option<&char>) -> bool {`
			`if let Some(c) = c {`
Removed regex dependency from kalk crate and made 'test-case' a dev-dependency 2021-01-03 00:07:30 +01:00			`match c {`
			`'+' \| '-' \| '/' \| '*' \| '%' \| '^' \| '!' \| '(' \| ')' \| '=' \| '.' \| ',' \| ';' \| '\|'`
Implemented piecewise 2021-05-31 18:55:37 +02:00			`\| '⌊' \| '⌋' \| '⌈' \| '⌉' \| '[' \| ']' \| '{' \| '}' \| 'π' \| '√' \| 'τ' \| 'ϕ' \| 'Γ' \| '<'`
			`\| '>' \| '≠' \| '≥' \| '≤' => false,`
Removed regex dependency from kalk crate and made 'test-case' a dev-dependency 2021-01-03 00:07:30 +01:00			`_ => !c.is_digit(10),`
			`}`
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support. 2020-06-08 21:51:45 +02:00			`} else {`
			`false`
			`}`
Initial commit 2020-05-28 23:46:35 +02:00			`}`
Added unit tests for lexer. 2020-06-04 21:53:45 +02:00
			`#[cfg(test)]`
			`mod tests {`
			`use super::*;`
			`use test_case::test_case;`
WebAssembly foundation 2020-12-30 22:50:39 +01:00			`use wasm_bindgen_test::*;`
			`wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);`
Added unit tests for lexer. 2020-06-04 21:53:45 +02:00
			`fn match_tokens(tokens: Vec<Token>, expected: Vec<TokenKind>) {`
			`let mut expected_iter = expected.iter();`

			`for token in tokens {`
Cleaned up redundant code and replaced compare_enums() with PartialEq. 2020-06-05 13:47:39 +02:00			`assert_eq!(token.kind, *expected_iter.next().unwrap());`
Added unit tests for lexer. 2020-06-04 21:53:45 +02:00			`}`
			`}`

			`#[test]`
WebAssembly foundation 2020-12-30 22:50:39 +01:00			`#[wasm_bindgen_test]`
Added unit tests for lexer. 2020-06-04 21:53:45 +02:00			`fn test_token_kinds() {`
added percentage unit and modulo 2020-12-09 22:18:00 +01:00			`let tokens = Lexer::lex("+-*/%^()\|=!,");`
Added unit tests for lexer. 2020-06-04 21:53:45 +02:00			`let expected = vec![`
			`TokenKind::Plus,`
			`TokenKind::Minus,`
			`TokenKind::Star,`
			`TokenKind::Slash,`
added percentage unit and modulo 2020-12-09 22:18:00 +01:00			`TokenKind::Percent,`
Added unit tests for lexer. 2020-06-04 21:53:45 +02:00			`TokenKind::Power,`
			`TokenKind::OpenParenthesis,`
			`TokenKind::ClosedParenthesis,`
			`TokenKind::Pipe,`
			`TokenKind::Equals,`
			`TokenKind::Exclamation,`
			`TokenKind::Comma,`
			`TokenKind::EOF,`
			`];`

			`match_tokens(tokens, expected);`
			`}`

Comparison operators and Iverson brackets 2021-05-31 13:46:06 +02:00			`#[test]`
			`#[wasm_bindgen_test]`
			`fn test_brackets() {`
			`let tokens = Lexer::lex("[1 < 2]");`
			`let expected = vec![`
			`TokenKind::OpenBracket,`
			`TokenKind::Literal,`
			`TokenKind::LessThan,`
			`TokenKind::Literal,`
			`TokenKind::ClosedBracket,`
			`TokenKind::EOF,`
			`];`

			`match_tokens(tokens, expected);`
			`}`

fixed panic on trailing spaces 2020-12-09 09:36:49 +01:00			`#[test]`
WebAssembly foundation 2020-12-30 22:50:39 +01:00			`#[wasm_bindgen_test]`
fixed panic on trailing spaces 2020-12-09 09:36:49 +01:00			`fn test_empty() {`
			`// test_case macro doesn't seem to work with spaces.`
			`let test_cases = vec![" ", " ", "test ", " test "];`

			`for input in test_cases {`
			`let tokens = Lexer::lex(input);`

			`if regex::Regex::new(r"^\s*$").unwrap().is_match(input) {`
			`let expected = vec![TokenKind::EOF];`
			`match_tokens(tokens, expected);`
			`} else {`
			`let expected = vec![TokenKind::Identifier, TokenKind::EOF];`
			`match_tokens(tokens, expected);`
			`}`
			`}`
			`}`

Added unit tests for lexer. 2020-06-04 21:53:45 +02:00			`#[test_case("1")]`
			`#[test_case("24")]`
			`#[test_case("56.4")]`
			`fn test_number_literal(input: &str) {`
			`let tokens = Lexer::lex(input);`
			`let expected = vec![TokenKind::Literal, TokenKind::EOF];`

			`assert_eq!(&tokens[0].value, input);`
			`match_tokens(tokens, expected);`
			`}`

			`#[test_case("x")]`
			`#[test_case("xy")]`
			`fn test_identifier(input: &str) {`
			`let tokens = Lexer::lex(input);`
			`let expected = vec![TokenKind::Identifier, TokenKind::EOF];`

			`assert_eq!(&tokens[0].value, input);`
			`match_tokens(tokens, expected);`
			`}`

			`#[test]`
			`fn test_function_call() {`
			`let tokens = Lexer::lex("f(x)");`
			`let expected = vec![`
			`TokenKind::Identifier,`
			`TokenKind::OpenParenthesis,`
			`TokenKind::Identifier,`
			`TokenKind::ClosedParenthesis,`
			`TokenKind::EOF,`
			`];`

			`match_tokens(tokens, expected);`
			`}`
			`}`