diff --git a/kalk/src/lexer.rs b/kalk/src/lexer.rs index f4ff48e..dfc2674 100644 --- a/kalk/src/lexer.rs +++ b/kalk/src/lexer.rs @@ -66,6 +66,8 @@ pub struct Lexer<'a> { chars: Peekable>, index: usize, other_radix: Option, + buffer: Option, + has_backtracked: bool, } impl<'a> Lexer<'a> { @@ -74,6 +76,8 @@ impl<'a> Lexer<'a> { chars: source.chars().peekable(), index: 0, other_radix: None, + buffer: None, + has_backtracked: false, } } @@ -105,7 +109,7 @@ impl<'a> Lexer<'a> { fn next(&mut self) -> Token { let eof = build(TokenKind::Eof, "", (self.index, self.index)); let mut c = if let Some(c) = self.peek() { - *c + c } else { return eof; }; @@ -116,7 +120,7 @@ impl<'a> Lexer<'a> { } c = if let Some(c) = self.peek() { - *c + c } else { return eof; } @@ -126,7 +130,7 @@ impl<'a> Lexer<'a> { return self.next_number_literal(); } - if is_valid_identifier(Some(&c)) { + if is_valid_identifier(Some(c)) { return self.next_identifier(); } @@ -221,10 +225,12 @@ impl<'a> Lexer<'a> { let mut start = self.index; let mut end = start; let mut value = String::new(); - let mut leading_zero = self.peek().unwrap_or(&'\0') == &'0'; + let mut leading_zero = self.peek().unwrap_or('\0') == '0'; let mut base = 10u8; + let mut is_e_notation = false; while let Some(c) = self.peek() { + let c = c.clone(); // If at the second character and // the first character is a zero, // allow a letter @@ -247,22 +253,46 @@ impl<'a> Lexer<'a> { } } - if !c.is_digit(base as u32) && *c != '.' && *c != '_' && !c.is_whitespace() - || *c == '\n' - || *c == '\r' + if is_e_notation && c == 'E' { + break; + } + + if end != start && c == 'E' { + is_e_notation = true; + end += 1; + value.push(c); + self.advance(); + + if let Some('-') = self.peek() { + end += 1; + value.push('-'); + self.advance(); + } else if !self.peek().unwrap_or('\0').is_ascii_digit() { + end -= 1; + value.pop(); + self.backtrack(); + break; + } + + continue; + } + + if !c.is_digit(base as u32) && c != '.' && c != '_' && !c.is_whitespace() + || c == '\n' + || c == '\r' { break; } end += 1; - value.push(*c); + value.push(c); self.advance(); } // Subscript unicode symbols after the literal, eg. 11₂ let mut base_str = String::new(); - while crate::text_utils::is_subscript(self.peek().unwrap_or(&'\0')) { - base_str.push(*self.peek().unwrap()); + while crate::text_utils::is_subscript(&self.peek().unwrap_or('\0')) { + base_str.push(self.peek().unwrap()); self.advance(); } @@ -295,7 +325,7 @@ impl<'a> Lexer<'a> { let mut subscript = String::new(); while is_valid_identifier(self.peek()) { - let c = *self.peek().unwrap(); + let c = self.peek().unwrap(); // If the current character is an underscore, allow a number next. // This is to allow the notation like the following: x_1 @@ -379,14 +409,30 @@ impl<'a> Lexer<'a> { } } - fn peek(&mut self) -> Option<&char> { - self.chars.peek() + fn peek(&mut self) -> Option { + if self.has_backtracked { + self.buffer + } else { + self.chars.peek().copied() + } } fn advance(&mut self) -> Option { self.index += 1; + if self.has_backtracked { + self.has_backtracked = false; + + return self.buffer; + } + + self.buffer = self.peek(); self.chars.next() } + + fn backtrack(&mut self) { + self.has_backtracked = true; + self.index -= 1; + } } fn build(kind: TokenKind, value: &str, span: (usize, usize)) -> Token { @@ -397,14 +443,14 @@ fn build(kind: TokenKind, value: &str, span: (usize, usize)) -> Token { } } -fn is_valid_identifier(c: Option<&char>) -> bool { +fn is_valid_identifier(c: Option) -> bool { if let Some(c) = c { match c { '+' | '-' | '/' | '*' | '%' | '^' | '!' | '(' | ')' | '=' | '.' | ',' | ';' | '|' | '⌊' | '⌋' | '⌈' | '⌉' | '[' | ']' | '{' | '}' | 'π' | '√' | 'τ' | 'ϕ' | 'Γ' | '<' | '>' | '≠' | '≥' | '≤' | '×' | '÷' | '⋅' | '⟦' | '⟧' | '∧' | '∨' | '¬' | ':' | 'ᵀ' | '\n' => false, - _ => !c.is_ascii_digit() || is_superscript(c) || is_subscript(c), + _ => !c.is_ascii_digit() || is_superscript(&c) || is_subscript(&c), } } else { false diff --git a/kalk/src/parser.rs b/kalk/src/parser.rs index 241c092..b8f4b54 100644 --- a/kalk/src/parser.rs +++ b/kalk/src/parser.rs @@ -528,7 +528,7 @@ fn parse_primary(context: &mut Context) -> Result { TokenKind::OpenParenthesis | TokenKind::OpenBracket => parse_vector(context)?, TokenKind::Pipe | TokenKind::OpenCeil | TokenKind::OpenFloor => parse_group_fn(context)?, TokenKind::Identifier => parse_identifier(context)?, - TokenKind::Literal => Expr::Literal(crate::float!(string_to_num(&advance(context).value)?)), + TokenKind::Literal => Expr::Literal(string_to_num(&advance(context).value)?), TokenKind::True => { advance(context); Expr::Boolean(true) @@ -770,10 +770,39 @@ fn skip_newlines(context: &mut Context) { } } -fn string_to_num(value: &str) -> Result { +#[cfg(feature = "rug")] +fn string_to_num(value: &str) -> Result { + use rug::ops::Pow; + + if value.contains('E') { + let parts = value.split('E').collect::>(); + let left = crate::float!(string_to_num(parts[0])?); + let right = crate::float!(string_to_num(parts[1])?); + + return Ok(left * 10.pow(right)); + } + let base = get_base(value)?; if let Some(result) = crate::radix::parse_float_radix(&value.replace(' ', ""), base) { - Ok(result) + Ok(crate::float!(result)) + } else { + Err(KalkError::InvalidNumberLiteral(value.into())) + } +} + +#[cfg(not(feature = "rug"))] +fn string_to_num(value: &str) -> Result { + if value.contains('E') { + let parts = value.split('E').collect::>(); + let left = crate::float!(string_to_num(parts[0])?); + let right = crate::float!(string_to_num(parts[1])?); + + return Ok(left * 10_f64.powf(right)); + } + + let base = get_base(value)?; + if let Some(result) = crate::radix::parse_float_radix(&value.replace(' ', ""), base) { + Ok(crate::float!(result)) } else { Err(KalkError::InvalidNumberLiteral(value.into())) } diff --git a/tests/basics.kalker b/tests/basics.kalker index 24bd943..4c3a466 100644 --- a/tests/basics.kalker +++ b/tests/basics.kalker @@ -5,3 +5,11 @@ g(x) = x^x 2f(f(x) + y) * 2 = 13600 and g(2) = 4 + +E = 3 + +1E-2 = 0.01 and +1.23E2 = 123 and +1E1 = 10 and +2E = 6 and +0E0 = 0 \ No newline at end of file