Somewhat fixed UTF-8 support in the lexer.

This commit is contained in:
PaddiM8 2020-06-07 19:48:52 +02:00
parent ce65eb79a8
commit 42524eefa3
3 changed files with 23 additions and 2 deletions

View File

@ -14,3 +14,4 @@ panic = "abort"
phf = { version = "0.8", features = ["macros"] }
rug = "1.9.0"
test-case = "1.0.0"
regex = "1"

View File

@ -75,7 +75,7 @@ impl<'a> Lexer<'a> {
return self.next_number_literal();
}
if c.is_alphabetic() {
if is_valid_identifier(c) {
return self.next_identifier();
}
@ -121,8 +121,21 @@ impl<'a> Lexer<'a> {
fn next_identifier(&mut self) -> Token {
let start = self.index;
let mut end = start;
let letter_reg = regex::Regex::new(r"[A-z']").unwrap();
while !self.is_at_end() && is_valid_identifier(self.peek()) {
let c = self.peek();
// Separate special characters from normal characters
// in order to allow eg. x√64
if end - start > 0 // If this isn't the first run
&& letter_reg.is_match(&(self.previous() as char).to_string()) // and the previous char was a normal one
&& !letter_reg.is_match(&c.to_string())
// and this one is a special character (why did rustfmt put this on a new line??)
{
break;
}
end += 1;
self.advance();
}
@ -144,6 +157,10 @@ impl<'a> Lexer<'a> {
self.source[self.index].into()
}
fn previous(&self) -> char {
self.source[self.index - 1].into()
}
fn advance(&mut self) {
self.index += 1;
}
@ -162,7 +179,9 @@ fn build(kind: TokenKind, value: &str, span: (usize, usize)) -> Token {
}
fn is_valid_identifier(c: char) -> bool {
c.is_alphabetic() || c == '°' || c == '√' || c == '\'' || c == '¨' || c == 'Σ'
regex::Regex::new(r"[^\s\n\r0-9\+-/\*\^!\(\)=\.,|]")
.unwrap()
.is_match(&c.to_string())
}
#[cfg(test)]

View File

@ -50,6 +50,7 @@ pub const UNARY_FUNCS: phf::Map<&'static str, UnaryFuncInfo> = phf::phf_map! {
"ln" => UnaryFuncInfo(ln, Other),
"round" => UnaryFuncInfo(round, Other),
"sqrt" => UnaryFuncInfo(sqrt, Other),
"" => UnaryFuncInfo(sqrt, Other),
"trunc" => UnaryFuncInfo(trunc, Other),
};
pub const BINARY_FUNCS: phf::Map<&'static str, BinaryFuncInfo> = phf::phf_map! {