mirror of
https://github.com/PaddiM8/kalker.git
synced 2025-01-23 21:48:35 +01:00
Changed lexer source to a peekable iterator, resulting in (hopefully) proper UTF-8 support.
This commit is contained in:
parent
42524eefa3
commit
165df6d3f3
@ -1,4 +1,6 @@
|
|||||||
|
use std::iter::Peekable;
|
||||||
use std::str;
|
use std::str;
|
||||||
|
use std::str::Chars;
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub enum TokenKind {
|
pub enum TokenKind {
|
||||||
@ -18,6 +20,10 @@ pub enum TokenKind {
|
|||||||
Rad,
|
Rad,
|
||||||
|
|
||||||
Pipe,
|
Pipe,
|
||||||
|
OpenCeil,
|
||||||
|
ClosedCeil,
|
||||||
|
OpenFloor,
|
||||||
|
ClosedFloor,
|
||||||
OpenParenthesis,
|
OpenParenthesis,
|
||||||
ClosedParenthesis,
|
ClosedParenthesis,
|
||||||
Comma,
|
Comma,
|
||||||
@ -33,41 +39,44 @@ pub struct Token {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct Lexer<'a> {
|
pub struct Lexer<'a> {
|
||||||
source: &'a [u8],
|
chars: Peekable<Chars<'a>>,
|
||||||
index: usize,
|
index: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Lexer<'a> {
|
impl<'a> Lexer<'a> {
|
||||||
pub fn lex(source: &str) -> Vec<Token> {
|
pub fn lex(source: &str) -> Vec<Token> {
|
||||||
let mut lexer = Lexer {
|
let mut lexer = Lexer {
|
||||||
source: source.as_bytes(),
|
chars: source.chars().peekable(),
|
||||||
index: 0,
|
index: 0,
|
||||||
};
|
};
|
||||||
let mut tokens = Vec::new();
|
let mut tokens = Vec::new();
|
||||||
|
|
||||||
while !lexer.is_at_end() {
|
loop {
|
||||||
tokens.push(lexer.next());
|
let next = lexer.next();
|
||||||
}
|
|
||||||
|
|
||||||
// If there isn't already an EOF token, add it.
|
if let TokenKind::EOF = next.kind {
|
||||||
if let TokenKind::EOF = tokens.last().unwrap().kind {
|
tokens.push(next);
|
||||||
|
break;
|
||||||
} else {
|
} else {
|
||||||
tokens.push(build(TokenKind::EOF, "", (source.len(), source.len())));
|
tokens.push(next);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tokens
|
tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
fn next(&mut self) -> Token {
|
fn next(&mut self) -> Token {
|
||||||
let mut c = self.peek();
|
let mut c = if let Some(c) = self.peek() {
|
||||||
|
*c
|
||||||
|
} else {
|
||||||
|
return build(TokenKind::EOF, "", (self.index, self.index));
|
||||||
|
};
|
||||||
|
|
||||||
while c == ' ' || c == '\t' || c == '\r' || c == '\n' {
|
while c == ' ' || c == '\t' || c == '\r' || c == '\n' {
|
||||||
self.advance();
|
if let Some(next_c) = self.advance() {
|
||||||
|
c = next_c;
|
||||||
if self.is_at_end() {
|
|
||||||
return build(TokenKind::EOF, "", (self.index, self.index));
|
|
||||||
} else {
|
} else {
|
||||||
c = self.peek();
|
return build(TokenKind::EOF, "", (self.index, self.index));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,7 +84,7 @@ impl<'a> Lexer<'a> {
|
|||||||
return self.next_number_literal();
|
return self.next_number_literal();
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_valid_identifier(c) {
|
if is_valid_identifier(Some(&c)) {
|
||||||
return self.next_identifier();
|
return self.next_identifier();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,9 +95,13 @@ impl<'a> Lexer<'a> {
|
|||||||
'*' => build(TokenKind::Star, "", span),
|
'*' => build(TokenKind::Star, "", span),
|
||||||
'/' => build(TokenKind::Slash, "", span),
|
'/' => build(TokenKind::Slash, "", span),
|
||||||
'^' => build(TokenKind::Power, "", span),
|
'^' => build(TokenKind::Power, "", span),
|
||||||
|
'|' => build(TokenKind::Pipe, "", span),
|
||||||
|
'⌈' => build(TokenKind::OpenCeil, "", span),
|
||||||
|
'⌉' => build(TokenKind::ClosedCeil, "", span),
|
||||||
|
'⌊' => build(TokenKind::OpenFloor, "", span),
|
||||||
|
'⌋' => build(TokenKind::ClosedFloor, "", span),
|
||||||
'(' => build(TokenKind::OpenParenthesis, "", span),
|
'(' => build(TokenKind::OpenParenthesis, "", span),
|
||||||
')' => build(TokenKind::ClosedParenthesis, "", span),
|
')' => build(TokenKind::ClosedParenthesis, "", span),
|
||||||
'|' => build(TokenKind::Pipe, "", span),
|
|
||||||
'=' => build(TokenKind::Equals, "", span),
|
'=' => build(TokenKind::Equals, "", span),
|
||||||
'!' => build(TokenKind::Exclamation, "", span),
|
'!' => build(TokenKind::Exclamation, "", span),
|
||||||
',' => build(TokenKind::Comma, "", span),
|
',' => build(TokenKind::Comma, "", span),
|
||||||
@ -103,70 +116,63 @@ impl<'a> Lexer<'a> {
|
|||||||
fn next_number_literal(&mut self) -> Token {
|
fn next_number_literal(&mut self) -> Token {
|
||||||
let start = self.index;
|
let start = self.index;
|
||||||
let mut end = start;
|
let mut end = start;
|
||||||
|
let mut value = String::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let c = if let Some(c) = self.peek() {
|
||||||
|
*c
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
if !c.is_digit(10) && c != '.' && !c.is_whitespace() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
while !self.is_at_end()
|
|
||||||
&& (self.peek().is_digit(10) || self.peek() == '.' || self.peek().is_whitespace())
|
|
||||||
{
|
|
||||||
end += 1;
|
end += 1;
|
||||||
|
value.push(c);
|
||||||
self.advance();
|
self.advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Ok(value) = str::from_utf8(&self.source[start..end]) {
|
build(TokenKind::Literal, &value, (start, end))
|
||||||
build(TokenKind::Literal, value, (start, end))
|
|
||||||
} else {
|
|
||||||
build(TokenKind::Unknown, "", (self.index, self.index))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn next_identifier(&mut self) -> Token {
|
fn next_identifier(&mut self) -> Token {
|
||||||
let start = self.index;
|
let start = self.index;
|
||||||
let mut end = start;
|
let mut end = start;
|
||||||
let letter_reg = regex::Regex::new(r"[A-z']").unwrap();
|
let letter_reg = regex::Regex::new(r"[A-z']").unwrap();
|
||||||
|
let mut value = String::new();
|
||||||
|
|
||||||
while !self.is_at_end() && is_valid_identifier(self.peek()) {
|
while is_valid_identifier(self.peek()) {
|
||||||
let c = self.peek();
|
let c = *self.peek().unwrap();
|
||||||
|
|
||||||
// Separate special characters from normal characters
|
// Only allow identifiers with a special character to have *one* character. No more.
|
||||||
// in order to allow eg. x√64
|
// Break the loop if it isn't the first run and the current character is a special character.
|
||||||
if end - start > 0 // If this isn't the first run
|
if end - start > 0 && !letter_reg.is_match(&c.to_string()) {
|
||||||
&& letter_reg.is_match(&(self.previous() as char).to_string()) // and the previous char was a normal one
|
|
||||||
&& !letter_reg.is_match(&c.to_string())
|
|
||||||
// and this one is a special character (why did rustfmt put this on a new line??)
|
|
||||||
{
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
end += 1;
|
end += 1;
|
||||||
|
value.push(c);
|
||||||
self.advance();
|
self.advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Ok(value) = str::from_utf8(&self.source[start..end]) {
|
let kind = match value.as_ref() {
|
||||||
let kind = match value {
|
|
||||||
"deg" | "°" => TokenKind::Deg,
|
"deg" | "°" => TokenKind::Deg,
|
||||||
"rad" => TokenKind::Rad,
|
"rad" => TokenKind::Rad,
|
||||||
_ => TokenKind::Identifier,
|
_ => TokenKind::Identifier,
|
||||||
};
|
};
|
||||||
|
|
||||||
build(kind, value, (start, end))
|
build(kind, &value, (start, end))
|
||||||
} else {
|
|
||||||
build(TokenKind::Unknown, "", (self.index, self.index))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn peek(&self) -> char {
|
fn peek(&mut self) -> Option<&char> {
|
||||||
self.source[self.index].into()
|
self.chars.peek()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn previous(&self) -> char {
|
fn advance(&mut self) -> Option<char> {
|
||||||
self.source[self.index - 1].into()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn advance(&mut self) {
|
|
||||||
self.index += 1;
|
self.index += 1;
|
||||||
}
|
self.chars.next()
|
||||||
|
|
||||||
fn is_at_end(&self) -> bool {
|
|
||||||
self.index >= self.source.len()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -178,10 +184,14 @@ fn build(kind: TokenKind, value: &str, span: (usize, usize)) -> Token {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_valid_identifier(c: char) -> bool {
|
fn is_valid_identifier(c: Option<&char>) -> bool {
|
||||||
regex::Regex::new(r"[^\s\n\r0-9\+-/\*\^!\(\)=\.,|]")
|
if let Some(c) = c {
|
||||||
|
regex::Regex::new(r"[^\s\n\r0-9\+-/\*\^!\(\)=\.,|⌊⌋⌈⌉]")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.is_match(&c.to_string())
|
.is_match(&c.to_string())
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -208,7 +208,7 @@ fn parse_factorial(context: &mut Context) -> Result<Expr, CalcError> {
|
|||||||
fn parse_primary(context: &mut Context) -> Result<Expr, CalcError> {
|
fn parse_primary(context: &mut Context) -> Result<Expr, CalcError> {
|
||||||
let expr = match peek(context).kind {
|
let expr = match peek(context).kind {
|
||||||
TokenKind::OpenParenthesis => parse_group(context)?,
|
TokenKind::OpenParenthesis => parse_group(context)?,
|
||||||
TokenKind::Pipe => parse_abs(context)?,
|
TokenKind::Pipe | TokenKind::OpenCeil | TokenKind::OpenFloor => parse_group_fn(context)?,
|
||||||
TokenKind::Identifier => parse_identifier(context)?,
|
TokenKind::Identifier => parse_identifier(context)?,
|
||||||
_ => Expr::Literal(advance(context).value.clone()),
|
_ => Expr::Literal(advance(context).value.clone()),
|
||||||
};
|
};
|
||||||
@ -228,12 +228,18 @@ fn parse_group(context: &mut Context) -> Result<Expr, CalcError> {
|
|||||||
Ok(group_expr)
|
Ok(group_expr)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_abs(context: &mut Context) -> Result<Expr, CalcError> {
|
fn parse_group_fn(context: &mut Context) -> Result<Expr, CalcError> {
|
||||||
advance(context);
|
let name = match &advance(context).kind {
|
||||||
let group_expr = Expr::Group(Box::new(parse_expr(context)?));
|
TokenKind::Pipe => "abs",
|
||||||
consume(context, TokenKind::Pipe)?;
|
TokenKind::OpenCeil => "ceil",
|
||||||
|
TokenKind::OpenFloor => "floor",
|
||||||
|
_ => panic!("Unexpected parsing error."),
|
||||||
|
};
|
||||||
|
|
||||||
Ok(Expr::FnCall(String::from("abs"), vec![group_expr]))
|
let expr = parse_expr(context)?;
|
||||||
|
advance(context);
|
||||||
|
|
||||||
|
Ok(Expr::FnCall(name.to_string(), vec![expr]))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_identifier(context: &mut Context) -> Result<Expr, CalcError> {
|
fn parse_identifier(context: &mut Context) -> Result<Expr, CalcError> {
|
||||||
|
Loading…
Reference in New Issue
Block a user