Merge pull request #42 from wycats/lexer

Custom lexer
This commit is contained in:
Yehuda Katz 2019-05-29 08:27:56 -07:00 committed by GitHub
commit 8f5d959692
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 1286 additions and 648 deletions

72
Cargo.lock generated
View File

@ -379,6 +379,15 @@ dependencies = [
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ctor"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "cursive"
version = "0.12.0"
@ -501,6 +510,11 @@ dependencies = [
"syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "difference"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "directories"
version = "1.0.2"
@ -910,6 +924,27 @@ dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "logos"
version = "0.10.0-rc2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"logos-derive 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "logos-derive"
version = "0.10.0-rc2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "maplit"
version = "1.0.1"
@ -1015,9 +1050,12 @@ dependencies = [
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lalrpop-util 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"logos 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)",
"logos-derive 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 5.0.0-beta1 (registry+https://github.com/rust-lang/crates.io-index)",
"ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"pancurses 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)",
"pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
"pretty_env_logger 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"prettyprint 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"prettytable-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1129,6 +1167,14 @@ dependencies = [
"num-traits 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "output_vt100"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "owning_ref"
version = "0.4.0"
@ -1214,6 +1260,17 @@ dependencies = [
"xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "pretty_assertions"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
"ctor 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"output_vt100 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "pretty_env_logger"
version = "0.3.0"
@ -1452,6 +1509,14 @@ name = "rustc-demangle"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rustc-hash"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rustc_version"
version = "0.2.3"
@ -1988,6 +2053,7 @@ dependencies = [
"checksum crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "f8306fcef4a7b563b76b7dd949ca48f52bc1141aa067d2ea09565f3e2652aa5c"
"checksum csv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9044e25afb0924b5a5fc5511689b0918629e85d68ea591e5e87fbf1e85ea1b3b"
"checksum csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa5cdef62f37e6ffe7d1f07a381bc0db32b7a3ff1cac0de56cb0d81e71f53d65"
"checksum ctor 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3b4c17619643c1252b5f690084b82639dd7fac141c57c8e77a00e0148132092c"
"checksum cursive 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b7ecc7282b5361471b607c26f44148205607e26d48a2fc65bd16e7619b1ebb78"
"checksum darling 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9158d690bc62a3a57c3e45b85e4d50de2008b39345592c64efd79345c7e24be0"
"checksum darling 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fcfbcb0c5961907597a7d1148e3af036268f2b773886b8bb3eeb1e1281d3d3d6"
@ -1998,6 +2064,7 @@ dependencies = [
"checksum derive-new 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "6ca414e896ae072546f4d789f452daaecf60ddee4c9df5dc6d5936d769e3d87c"
"checksum derive_builder 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a0ca533e6abb78f9108585535ce2ae0b14c8b4504e138a9a28eaf8ba2b270c1d"
"checksum derive_builder_core 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fb484fe06ba1dc5b82f88aff700191dfc127e02b06b35e302c169706168e2528"
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
"checksum directories 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "72d337a64190607d4fcca2cb78982c5dd57f4916e19696b48a575fa746b6cb0f"
"checksum dirs 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901"
"checksum dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0ad6bf6a88548d1126045c413548df1453d9be094a8ab9fd59bf1fdd338da4f"
@ -2049,6 +2116,8 @@ dependencies = [
"checksum linked-hash-map 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ae91b68aebc4ddb91978b11a1b02ddd8602a05ec19002801c5666000e05e0f83"
"checksum lock_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ed946d4529956a20f2d63ebe1b69996d5a2137c91913fe3ebbeff957f5bca7ff"
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
"checksum logos 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)" = "e136962e0902a48fd1d8da8706fac078fdba547bf82f9d9d728cf551d367b41e"
"checksum logos-derive 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)" = "5f03ecd1d993aacc6c4f3a9540e60a4f3811ddac2276dbb66dad4d42671bd5bf"
"checksum maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "08cbb6b4fef96b6d77bfc40ec491b1690c779e77b05cd9f07f787ed376fd4c43"
"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3"
@ -2070,6 +2139,7 @@ dependencies = [
"checksum onig 4.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a646989adad8a19f49be2090374712931c3a59835cb5277b4530f48b417f26e7"
"checksum onig_sys 69.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388410bf5fa341f10e58e6db3975f4bea1ac30247dd79d37a9e5ced3cb4cc3b0"
"checksum ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "18869315e81473c951eb56ad5558bbc56978562d3ecfb87abb7a1e944cea4518"
"checksum output_vt100 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "53cdc5b785b7a58c5aad8216b3dfa114df64b0b06ae6e1501cef91df2fbdf8f9"
"checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13"
"checksum pancurses 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d3058bc37c433096b2ac7afef1c5cdfae49ede0a4ffec3dfc1df1df0959d0ff0"
"checksum parking_lot 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fa7767817701cce701d5585b9c4db3cdd02086398322c1d7e8bf5094a96a2ce7"
@ -2079,6 +2149,7 @@ dependencies = [
"checksum pin-utils 0.1.0-alpha.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5894c618ce612a3fa23881b152b608bafb8c56cfc22f434a3ba3120b40f7b587"
"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c"
"checksum plist 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f4739851c08dd9a62a78beff2edf1a438517268b2c563c42fc6d9d3139e42d2a"
"checksum pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f81e1644e1b54f5a68959a29aa86cde704219254669da328ecfdf6a1f09d427"
"checksum pretty_env_logger 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df8b3f4e0475def7d9c2e5de8e5a1306949849761e107b360d03e98eafaffd61"
"checksum prettyprint 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2705417f8aa07cb6308db42e55623479c1c9667942a4d5e4174c684e5da5590d"
"checksum prettytable-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0fd04b170004fa2daccf418a7f8253aaf033c27760b5f225889024cf66d7ac2e"
@ -2104,6 +2175,7 @@ dependencies = [
"checksum regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8f0a0bcab2fd7d1d7c54fa9eae6f43eddeb9ce2e7352f8518a814a4f65d60c58"
"checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96"
"checksum rustc-demangle 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "ccc78bfd5acd7bf3e89cffcf899e5cb1a52d6fafa8dec2739ad70c9577a57288"
"checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum rustyline 4.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0f47ea1ceb347d2deae482d655dc8eef4bd82363d3329baffa3818bd76fea48b"
"checksum ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "b96a9549dc8d48f2c283938303c4b5a77aa29bfbc5b54b084fb1630408899a8f"

View File

@ -41,7 +41,12 @@ serde = "1.0.91"
serde_json = "1.0.39"
serde_derive = "1.0.91"
getset = "0.0.7"
logos = "0.10.0-rc2"
logos-derive = "0.10.0-rc2"
[dependencies.pancurses]
version = "0.16"
features = ["win32a"]
[dev-dependencies]
pretty_assertions = "0.6.1"

View File

@ -1,5 +1,6 @@
crate mod ast;
crate mod completer;
crate mod lexer;
crate mod parser;
crate mod registry;

585
src/parser/lexer.rs Normal file
View File

@ -0,0 +1,585 @@
use crate::errors::ShellError;
use derive_new::new;
use logos_derive::Logos;
use std::ops::Range;
#[derive(Debug, Clone, Copy, Eq, PartialEq, Logos)]
#[extras = "LexerState"]
crate enum TopToken {
#[error]
Error,
#[end]
END,
#[regex = "-?[0-9]+"]
Num,
#[regex = r#"'([^']|\\')*'"#]
SQString,
#[regex = r#""([^"]|\\")*""#]
DQString,
#[regex = "-?[0-9]+[A-Za-z]+"]
Size,
#[regex = r"\$"]
#[callback = "start_variable"]
Dollar,
#[regex = r#"[^\s0-9"'$\-][^\s"'\.]*"#]
#[callback = "end_bare_variable"]
Bare,
#[token = "|"]
Pipe,
#[token = "."]
Dot,
#[token = "{"]
OpenBrace,
#[token = "}"]
CloseBrace,
#[token = "("]
OpenParen,
#[token = ")"]
CloseParen,
#[token = ">"]
OpGt,
#[token = "<"]
OpLt,
#[token = ">="]
OpGte,
#[token = "<="]
OpLte,
#[token = "=="]
OpEq,
#[token = "!="]
OpNeq,
#[regex = r"\s+"]
Whitespace,
}
fn start_variable<S>(lex: &mut logos::Lexer<TopToken, S>) {
println!("start_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::Var;
}
fn end_bare_variable<S>(lex: &mut logos::Lexer<TopToken, S>) {
println!("end_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterVariableToken;
}
#[derive(Logos, Debug, Clone, Copy, Eq, PartialEq)]
#[extras = "LexerState"]
crate enum VariableToken {
#[error]
Error,
#[end]
END,
#[regex = r"[A-Za-z][A-Za-z0-9\-?!]*"]
#[callback = "end_variable"]
Variable,
}
fn end_variable<S>(lex: &mut logos::Lexer<VariableToken, S>) {
println!("end_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterVariableToken;
}
#[derive(Logos, Debug, Clone, Copy, Eq, PartialEq)]
#[extras = "LexerState"]
crate enum AfterVariableToken {
#[error]
Error,
#[end]
END,
#[token = "."]
#[callback = "start_member"]
Dot,
#[regex = r"\s"]
#[callback = "terminate_variable"]
Whitespace,
}
fn start_member<S>(lex: &mut logos::Lexer<AfterVariableToken, S>) {
println!("start_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterMemberDot;
}
fn terminate_variable<S>(lex: &mut logos::Lexer<AfterVariableToken, S>) {
println!("terminate_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::Top;
}
#[derive(Logos, Debug, Clone, Copy, Eq, PartialEq)]
#[extras = "LexerState"]
crate enum AfterMemberDot {
#[error]
Error,
#[end]
END,
#[regex = r"[A-Za-z][A-Za-z0-9\-?!]*"]
#[callback = "finish_member"]
Member,
#[regex = r"\s"]
Whitespace,
}
fn finish_member<S>(lex: &mut logos::Lexer<AfterMemberDot, S>) {
println!("finish_member EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterVariableToken;
}
#[derive(Debug, Clone, Copy)]
crate enum LexerStateName {
Top,
Var,
AfterMemberDot,
AfterVariableToken,
}
impl Default for LexerStateName {
fn default() -> LexerStateName {
LexerStateName::Top
}
}
#[derive(Debug, Clone, Default)]
crate struct LexerState {
current: LexerStateName,
}
impl logos::Extras for LexerState {
fn on_advance(&mut self) {}
fn on_whitespace(&mut self, _byte: u8) {}
}
#[derive(new, Debug, Clone, Eq, PartialEq)]
crate struct SpannedToken<'source, T> {
span: std::ops::Range<usize>,
slice: &'source str,
token: T,
}
#[derive(Debug, Clone, Eq, PartialEq)]
crate enum Token<'source> {
Top(SpannedToken<'source, TopToken>),
Var(SpannedToken<'source, VariableToken>),
Dot(SpannedToken<'source, &'source str>),
Member(SpannedToken<'source, &'source str>),
Whitespace(SpannedToken<'source, &'source str>),
}
impl Token<'source> {
crate fn range(&self) -> &Range<usize> {
match self {
Token::Top(spanned) => &spanned.span,
Token::Var(spanned) => &spanned.span,
Token::Dot(spanned) => &spanned.span,
Token::Member(spanned) => &spanned.span,
Token::Whitespace(spanned) => &spanned.span,
}
}
crate fn slice(&self) -> &str {
match self {
Token::Top(spanned) => spanned.slice,
Token::Var(spanned) => spanned.slice,
Token::Dot(spanned) => spanned.slice,
Token::Member(spanned) => spanned.slice,
Token::Whitespace(spanned) => spanned.slice,
}
}
}
crate struct Lexer<'source> {
lexer: logos::Lexer<TopToken, &'source str>,
first: bool,
// state: LexerState,
}
impl Lexer<'source> {
crate fn new(source: &str) -> Lexer<'_> {
Lexer {
first: true,
lexer: logos::Logos::lexer(source),
// state: LexerState::default(),
}
}
}
impl Iterator for Lexer<'source> {
type Item = Result<Token<'source>, ShellError>;
fn next(&mut self) -> Option<Self::Item> {
if self.first {
self.first = false;
match self.lexer.token {
TopToken::END => None,
TopToken::Whitespace => Some(Ok(Token::Whitespace(SpannedToken::new(
self.lexer.range(),
self.lexer.slice(),
self.lexer.slice(),
)))),
_ => {
let token = Token::Top(SpannedToken::new(
self.lexer.range(),
self.lexer.slice(),
self.lexer.token,
));
Some(Ok(token))
}
}
} else {
println!("STATE={:?}", self.lexer.extras);
match self.lexer.extras.current {
LexerStateName::Top => {
let (lexer, range, slice, token) = advance::<TopToken>(self.lexer.clone());
self.lexer = lexer;
match token {
TopToken::END => None,
TopToken::Whitespace => Some(Ok(Token::Whitespace(SpannedToken::new(
range, slice, slice,
)))),
other => {
let token = Token::Top(SpannedToken::new(range, slice, other));
Some(Ok(token))
}
}
}
LexerStateName::AfterMemberDot => {
let (lexer, range, slice, token) =
advance::<AfterMemberDot>(self.lexer.clone());
self.lexer = lexer;
match token {
AfterMemberDot::END => None,
AfterMemberDot::Error => {
Some(Err(ShellError::string(&format!("Lex error at {}", slice))))
}
AfterMemberDot::Whitespace => Some(Ok(Token::Whitespace(
SpannedToken::new(range, slice, slice),
))),
AfterMemberDot::Member => {
Some(Ok(Token::Member(SpannedToken::new(range, slice, slice))))
}
}
}
LexerStateName::AfterVariableToken => {
let (lexer, range, slice, token) =
advance::<AfterVariableToken>(self.lexer.clone());
self.lexer = lexer;
match token {
AfterVariableToken::END => None,
AfterVariableToken::Error => {
Some(Err(ShellError::string(&format!("Lex error at {}", slice))))
}
AfterVariableToken::Whitespace => Some(Ok(Token::Whitespace(
SpannedToken::new(range, slice, slice),
))),
AfterVariableToken::Dot => {
Some(Ok(Token::Dot(SpannedToken::new(range, slice, slice))))
}
}
}
LexerStateName::Var => {
let (lexer, range, slice, token) = advance::<VariableToken>(self.lexer.clone());
self.lexer = lexer;
match token {
VariableToken::END => None,
other => {
let token = Token::Var(SpannedToken::new(range, slice, other));
Some(Ok(token))
}
}
}
}
}
}
}
fn advance<T>(
lexer: logos::Lexer<TopToken, &'source str>,
) -> (
logos::Lexer<TopToken, &'source str>,
Range<usize>,
&'source str,
T,
)
where
T: logos::Logos<Extras = LexerState> + logos::source::WithSource<&'source str> + Copy,
{
let lexer = lexer.advance_as::<T>();
let token = &lexer.token;
let range = lexer.range();
let slice = lexer.slice();
(lexer.clone().morph::<TopToken>(), range, slice, *token)
}
#[cfg(test)]
mod tests {
use super::*;
use logos::Logos;
use pretty_assertions::assert_eq;
fn assert_lex(source: &str, tokens: &[TestToken<'_>]) {
let lex = Lexer::new(source);
let mut current = 0;
let expected_tokens: Vec<Token> = tokens
.iter()
.map(|token_desc| {
println!("{:?}", token_desc);
let len = token_desc.source.len();
let range = current..(current + len);
let token = token_desc.to_token(range);
current = current + len;
token
})
.collect();
let actual_tokens: Result<Vec<Token>, _> = lex
.map(|i| {
println!("{:?}", i);
i
})
.collect();
let actual_tokens = actual_tokens.unwrap();
assert_eq!(actual_tokens, expected_tokens);
}
#[derive(Debug)]
enum TokenDesc {
Ws,
Member,
Top(TopToken),
Var(VariableToken),
}
#[derive(Debug, new)]
struct TestToken<'source> {
desc: TokenDesc,
source: &'source str,
}
impl TestToken<'source> {
fn to_token(&self, span: std::ops::Range<usize>) -> Token {
match self.desc {
TokenDesc::Top(TopToken::Dot) => {
Token::Dot(SpannedToken::new(span, self.source, "."))
}
TokenDesc::Top(tok) => Token::Top(SpannedToken::new(span, self.source, tok)),
TokenDesc::Var(tok) => Token::Var(SpannedToken::new(span, self.source, tok)),
TokenDesc::Member => {
Token::Member(SpannedToken::new(span, self.source, self.source))
}
TokenDesc::Ws => {
Token::Whitespace(SpannedToken::new(span, self.source, self.source))
}
}
}
}
macro_rules! chomp_tokens {
{ rest = { SP $($rest:tt)* }, accum = [ $($accum:tt)* ] } => {
chomp_tokens! { rest = { $($rest)* }, accum = [ $($accum)* { SP } ] }
};
{ rest = { ws($expr:expr) $($rest:tt)* }, accum = [ $($accum:tt)* ] } => {
chomp_tokens! { rest = { $($rest)* }, accum = [ $($accum)* { ws($expr) } ] }
};
{ rest = { $id:ident ( $expr:expr ) $($rest:tt)* }, accum = [ $($accum:tt)* ] } => {
chomp_tokens! { rest = { $($rest)* }, accum = [ $($accum)* { tok(stringify!($id), $expr) } ] }
};
{ rest = { $token:tt $($rest:tt)* }, accum = [ $($accum:tt)* ] } => {
chomp_tokens! { rest = { $($rest)* }, accum = [ $($accum)* { tk($token) } ] }
};
{ rest = { }, accum = [ $({ $($tokens:tt)* })* ] } => {
&[ $($($tokens)*),* ]
}
}
macro_rules! tokens {
($($tokens:tt)*) => {
chomp_tokens! { rest = { $($tokens)* }, accum = [] }
};
}
#[test]
fn test_tokenize_number() {
assert_lex("123", tokens![Num("123")]);
// assert_lex("123", &[tok("Num", "123")]);
assert_lex(
"123 456 789",
tokens![Num("123") SP Num("456") SP Num("789")],
);
assert_lex("-123", tokens![Num("-123")]);
assert_lex(
"123 -456 789",
tokens![
Num("123")
ws(" ")
Num("-456")
ws(" ")
Num("789")
],
)
}
#[test]
fn test_tokenize_variable() {
assert_lex("$var", tokens![ "$" Var("var")]);
}
#[test]
fn test_tokenize_string() {
assert_lex(
r#" "hello world" "#,
tokens![ SP DQString(r#""hello world""#) SP ],
);
assert_lex(
r#" 'hello world' "#,
tokens![ SP SQString(r#"'hello world'"#) SP ],
);
}
#[test]
fn test_tokenize_path() {
assert_lex("$var.bar", tokens![ "$" Var("var") "." Member("bar") ]);
assert_lex("$it.bar", tokens![ "$" Var("it") "." Member("bar") ]);
assert_lex("$var. bar", tokens![ "$" Var("var") "." SP Member("bar") ]);
assert_lex("$it. bar", tokens![ "$" Var("it") "." SP Member("bar") ]);
}
#[test]
fn test_tokenize_operator() {
assert_lex(
"$it.cpu > 10",
tokens![ "$" Var("it") "." Member("cpu") SP ">" SP Num("10") ],
);
assert_lex(
"$it.cpu < 10",
tokens![ "$" Var("it") "." Member("cpu") SP "<" SP Num("10") ],
);
assert_lex(
"$it.cpu >= 10",
tokens![ "$" Var("it") "." Member("cpu") SP ">=" SP Num("10") ],
);
assert_lex(
"$it.cpu <= 10",
tokens![ "$" Var("it") "." Member("cpu") SP "<=" SP Num("10") ],
);
assert_lex(
"$it.cpu == 10",
tokens![ "$" Var("it") "." Member("cpu") SP "==" SP Num("10") ],
);
assert_lex(
"$it.cpu != 10",
tokens![ "$" Var("it") "." Member("cpu") SP "!=" SP Num("10") ],
);
}
#[test]
fn test_tokenize_smoke() {
assert_lex(
"ls | where cpu > 10",
tokens![ Bare("ls") SP "|" SP Bare("where") SP Bare("cpu") SP ">" SP Num("10") ],
);
assert_lex(
"ls | where { $it.cpu > 10 }",
tokens![ Bare("ls") SP "|" SP Bare("where") SP "{" SP "$" Var("it") "." Member("cpu") SP ">" SP Num("10") SP "}" ],
);
assert_lex(
"open input2.json | from-json | select glossary",
tokens![ Bare("open") SP Bare("input2") "." Member("json") SP "|" SP Bare("from-json") SP "|" SP Bare("select") SP Bare("glossary") ],
);
}
fn tok(name: &str, value: &'source str) -> TestToken<'source> {
match name {
"Num" => TestToken::new(TokenDesc::Top(TopToken::Num), value),
"Var" => TestToken::new(TokenDesc::Var(VariableToken::Variable), value),
"Member" => TestToken::new(TokenDesc::Member, value),
"Bare" => TestToken::new(TokenDesc::Top(TopToken::Bare), value),
"DQString" => TestToken::new(TokenDesc::Top(TopToken::DQString), value),
"SQString" => TestToken::new(TokenDesc::Top(TopToken::SQString), value),
other => panic!("Unexpected token name in test: {}", other),
}
}
fn tk(name: &'source str) -> TestToken<'source> {
let token = match name {
"." => TopToken::Dot,
"$" => TopToken::Dollar,
"|" => TopToken::Pipe,
"{" => TopToken::OpenBrace,
"}" => TopToken::CloseBrace,
">" => TopToken::OpGt,
"<" => TopToken::OpLt,
">=" => TopToken::OpGte,
"<=" => TopToken::OpLte,
"==" => TopToken::OpEq,
"!=" => TopToken::OpNeq,
other => panic!("Unexpected token name in test: {}", other),
};
TestToken::new(TokenDesc::Top(token), name)
}
const SP: TestToken<'static> = TestToken {
desc: TokenDesc::Ws,
source: " ",
};
fn ws(string: &'static str) -> TestToken<'source> {
TestToken::new(TokenDesc::Ws, string)
}
}

View File

@ -10,8 +10,8 @@ pub Pipeline: Pipeline = {
}
Command: ParsedCommand = {
<command:RawBareWord> <expr:Expr*> => ParsedCommand::new(command, expr),
<command:RawBareWord> <expr:BinaryExpression> => ParsedCommand::new(command, vec![expr]),
<command:BareWord> <expr:Expr*> => ParsedCommand::new(command, expr),
<command:BareWord> <expr:BinaryExpression> => ParsedCommand::new(command, vec![expr]),
}
Leaf: Expression = {
@ -46,11 +46,12 @@ WholeExpression: Expression = {
}
PathExpression: Expression = {
<head:WholeExpression> <tail: ( "." <Member> )*> => Expression::Path(Box::new(Path::new(head, tail)))
<head:WholeExpression> <tail: ( "." <Member> )+> => Expression::Path(Box::new(Path::new(head, tail)))
}
Expr: Expression = {
<RawBareWord> => Expression::Leaf(Leaf::Bare(<>)),
<BareWord> => Expression::Leaf(Leaf::Bare(<>)),
<WholeExpression> => <>,
<PathExpression>
}
@ -58,11 +59,11 @@ Variable: Variable = {
"$true" => Variable::True,
"$false" => Variable::False,
"$it" => Variable::It,
"$" <RawBareWord> => Variable::Other(<>.to_string()),
<VariableBody> => Variable::Other(<>[1..].to_string()),
}
Member: String = {
<RawBareWord>,
<BareWord>,
<String>
}
@ -76,8 +77,8 @@ Operator: Operator = {
}
Flag: Flag = {
"-" <RawBareWord> => Flag::Shorthand(<>.to_string()),
"--" <RawBareWord> => Flag::Longhand(<>.to_string()),
"-" <BareWord> => Flag::Shorthand(<>.to_string()),
"--" <BareWord> => Flag::Longhand(<>.to_string()),
}
String: String = {
@ -85,7 +86,12 @@ String: String = {
DQString,
}
RawBareWord: String = <s:r#"[^0-9"'\-][^\s"']*"#> => <>.to_string();
BareWord = {
<RawBareWord>,
}
VariableBody: &'input str = <s:r"\$\p{XID_Start}(\p{XID_Continue}|[\-?!])*"> => <>;
RawBareWord: String = <s:r#"[^0-9"'$\-\.][^\s"']*"#> => <>.to_string();
DQString: String = <s:r#""([^"]|\\")*""#> => s[1..s.len() - 1].to_string();
SQString: String = <s:r#"'([^']|\\')*'"#> => s[1..s.len() - 1].to_string();
Num: i64 = <s:r"-?[0-9]+"> => i64::from_str(s).unwrap();

File diff suppressed because one or more lines are too long