From 42524eefa3a232d4f653ffe28e662264093c3bf1 Mon Sep 17 00:00:00 2001 From: PaddiM8 Date: Sun, 7 Jun 2020 19:48:52 +0200 Subject: [PATCH] Somewhat fixed UTF-8 support in the lexer. --- kalk/Cargo.toml | 1 + kalk/src/lexer.rs | 23 +++++++++++++++++++++-- kalk/src/prelude.rs | 1 + 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/kalk/Cargo.toml b/kalk/Cargo.toml index 26f7984..da57cbb 100644 --- a/kalk/Cargo.toml +++ b/kalk/Cargo.toml @@ -14,3 +14,4 @@ panic = "abort" phf = { version = "0.8", features = ["macros"] } rug = "1.9.0" test-case = "1.0.0" +regex = "1" diff --git a/kalk/src/lexer.rs b/kalk/src/lexer.rs index ff9c2c5..1f8dedd 100644 --- a/kalk/src/lexer.rs +++ b/kalk/src/lexer.rs @@ -75,7 +75,7 @@ impl<'a> Lexer<'a> { return self.next_number_literal(); } - if c.is_alphabetic() { + if is_valid_identifier(c) { return self.next_identifier(); } @@ -121,8 +121,21 @@ impl<'a> Lexer<'a> { fn next_identifier(&mut self) -> Token { let start = self.index; let mut end = start; + let letter_reg = regex::Regex::new(r"[A-z']").unwrap(); while !self.is_at_end() && is_valid_identifier(self.peek()) { + let c = self.peek(); + + // Separate special characters from normal characters + // in order to allow eg. x√64 + if end - start > 0 // If this isn't the first run + && letter_reg.is_match(&(self.previous() as char).to_string()) // and the previous char was a normal one + && !letter_reg.is_match(&c.to_string()) + // and this one is a special character (why did rustfmt put this on a new line??) + { + break; + } + end += 1; self.advance(); } @@ -144,6 +157,10 @@ impl<'a> Lexer<'a> { self.source[self.index].into() } + fn previous(&self) -> char { + self.source[self.index - 1].into() + } + fn advance(&mut self) { self.index += 1; } @@ -162,7 +179,9 @@ fn build(kind: TokenKind, value: &str, span: (usize, usize)) -> Token { } fn is_valid_identifier(c: char) -> bool { - c.is_alphabetic() || c == '°' || c == '√' || c == '\'' || c == '¨' || c == 'Σ' + regex::Regex::new(r"[^\s\n\r0-9\+-/\*\^!\(\)=\.,|]") + .unwrap() + .is_match(&c.to_string()) } #[cfg(test)] diff --git a/kalk/src/prelude.rs b/kalk/src/prelude.rs index b498e5d..977452c 100644 --- a/kalk/src/prelude.rs +++ b/kalk/src/prelude.rs @@ -50,6 +50,7 @@ pub const UNARY_FUNCS: phf::Map<&'static str, UnaryFuncInfo> = phf::phf_map! { "ln" => UnaryFuncInfo(ln, Other), "round" => UnaryFuncInfo(round, Other), "sqrt" => UnaryFuncInfo(sqrt, Other), + "√" => UnaryFuncInfo(sqrt, Other), "trunc" => UnaryFuncInfo(trunc, Other), }; pub const BINARY_FUNCS: phf::Map<&'static str, BinaryFuncInfo> = phf::phf_map! {