Somewhat fixed UTF-8 support in the lexer.

2025-06-30 22:40:01 +02:00 · 2020-06-07 19:48:52 +02:00
parent ce65eb79a8
commit 42524eefa3
3 changed files with 23 additions and 2 deletions
--- a/kalk/Cargo.toml
+++ b/kalk/Cargo.toml
@ -14,3 +14,4 @@ panic = "abort"
 phf = { version = "0.8", features = ["macros"] }
 rug = "1.9.0"
 test-case = "1.0.0"
 regex = "1"
--- a/kalk/src/lexer.rs
+++ b/kalk/src/lexer.rs
@ -75,7 +75,7 @@ impl<'a> Lexer<'a> {
            return self.next_number_literal();
        }
-        if c.is_alphabetic() {
+        if is_valid_identifier(c) {
            return self.next_identifier();
        }
@ -121,8 +121,21 @@ impl<'a> Lexer<'a> {
    fn next_identifier(&mut self) -> Token {
        let start = self.index;
        let mut end = start;
        let letter_reg = regex::Regex::new(r"[A-z']").unwrap();
        while !self.is_at_end() && is_valid_identifier(self.peek()) {
            let c = self.peek();
            // Separate special characters from normal characters
            // in order to allow eg. x√64
            if end - start > 0 // If this isn't the first run
                && letter_reg.is_match(&(self.previous() as char).to_string()) // and the previous char was a normal one
                && !letter_reg.is_match(&c.to_string())
            // and this one is a special character (why did rustfmt put this on a new line??)
            {
                break;
            }
            end += 1;
            self.advance();
        }
@ -144,6 +157,10 @@ impl<'a> Lexer<'a> {
        self.source[self.index].into()
    }
    fn previous(&self) -> char {
        self.source[self.index - 1].into()
    }
    fn advance(&mut self) {
        self.index += 1;
    }
@ -162,7 +179,9 @@ fn build(kind: TokenKind, value: &str, span: (usize, usize)) -> Token {
 }
 fn is_valid_identifier(c: char) -> bool {
-    c.is_alphabetic() || c == '°' || c == '√' || c == '\'' || c == '¨' || c == 'Σ'
+    regex::Regex::new(r"[^\s\n\r0-9\+-/\*\^!\(\)=\.,|]")
        .unwrap()
        .is_match(&c.to_string())
 }
 #[cfg(test)]
--- a/kalk/src/prelude.rs
+++ b/kalk/src/prelude.rs
@ -50,6 +50,7 @@ pub const UNARY_FUNCS: phf::Map<&'static str, UnaryFuncInfo> = phf::phf_map! {
    "ln" => UnaryFuncInfo(ln, Other),
    "round" => UnaryFuncInfo(round, Other),
    "sqrt" => UnaryFuncInfo(sqrt, Other),
    "√" => UnaryFuncInfo(sqrt, Other),
    "trunc" => UnaryFuncInfo(trunc, Other),
 };
 pub const BINARY_FUNCS: phf::Map<&'static str, BinaryFuncInfo> = phf::phf_map! {