mirror of
https://github.com/nushell/nushell.git
synced 2024-12-22 07:02:19 +01:00
Custom lexer
This commit is contained in:
parent
89b0bf1926
commit
e4013a008c
72
Cargo.lock
generated
72
Cargo.lock
generated
@ -379,6 +379,15 @@ dependencies = [
|
||||
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ctor"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cursive"
|
||||
version = "0.12.0"
|
||||
@ -501,6 +510,11 @@ dependencies = [
|
||||
"syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "difference"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "directories"
|
||||
version = "1.0.2"
|
||||
@ -910,6 +924,27 @@ dependencies = [
|
||||
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos"
|
||||
version = "0.10.0-rc2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"logos-derive 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-derive"
|
||||
version = "0.10.0-rc2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "maplit"
|
||||
version = "1.0.1"
|
||||
@ -1015,9 +1050,12 @@ dependencies = [
|
||||
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lalrpop-util 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"logos 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"logos-derive 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"nom 5.0.0-beta1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pancurses 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pretty_env_logger 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"prettyprint 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"prettytable-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -1129,6 +1167,14 @@ dependencies = [
|
||||
"num-traits 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "output_vt100"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "owning_ref"
|
||||
version = "0.4.0"
|
||||
@ -1214,6 +1260,17 @@ dependencies = [
|
||||
"xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pretty_assertions"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ctor 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"output_vt100 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pretty_env_logger"
|
||||
version = "0.3.0"
|
||||
@ -1452,6 +1509,14 @@ name = "rustc-demangle"
|
||||
version = "0.1.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.2.3"
|
||||
@ -1988,6 +2053,7 @@ dependencies = [
|
||||
"checksum crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "f8306fcef4a7b563b76b7dd949ca48f52bc1141aa067d2ea09565f3e2652aa5c"
|
||||
"checksum csv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9044e25afb0924b5a5fc5511689b0918629e85d68ea591e5e87fbf1e85ea1b3b"
|
||||
"checksum csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa5cdef62f37e6ffe7d1f07a381bc0db32b7a3ff1cac0de56cb0d81e71f53d65"
|
||||
"checksum ctor 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3b4c17619643c1252b5f690084b82639dd7fac141c57c8e77a00e0148132092c"
|
||||
"checksum cursive 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b7ecc7282b5361471b607c26f44148205607e26d48a2fc65bd16e7619b1ebb78"
|
||||
"checksum darling 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9158d690bc62a3a57c3e45b85e4d50de2008b39345592c64efd79345c7e24be0"
|
||||
"checksum darling 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fcfbcb0c5961907597a7d1148e3af036268f2b773886b8bb3eeb1e1281d3d3d6"
|
||||
@ -1998,6 +2064,7 @@ dependencies = [
|
||||
"checksum derive-new 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "6ca414e896ae072546f4d789f452daaecf60ddee4c9df5dc6d5936d769e3d87c"
|
||||
"checksum derive_builder 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a0ca533e6abb78f9108585535ce2ae0b14c8b4504e138a9a28eaf8ba2b270c1d"
|
||||
"checksum derive_builder_core 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fb484fe06ba1dc5b82f88aff700191dfc127e02b06b35e302c169706168e2528"
|
||||
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
|
||||
"checksum directories 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "72d337a64190607d4fcca2cb78982c5dd57f4916e19696b48a575fa746b6cb0f"
|
||||
"checksum dirs 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901"
|
||||
"checksum dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0ad6bf6a88548d1126045c413548df1453d9be094a8ab9fd59bf1fdd338da4f"
|
||||
@ -2049,6 +2116,8 @@ dependencies = [
|
||||
"checksum linked-hash-map 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ae91b68aebc4ddb91978b11a1b02ddd8602a05ec19002801c5666000e05e0f83"
|
||||
"checksum lock_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ed946d4529956a20f2d63ebe1b69996d5a2137c91913fe3ebbeff957f5bca7ff"
|
||||
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
|
||||
"checksum logos 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)" = "e136962e0902a48fd1d8da8706fac078fdba547bf82f9d9d728cf551d367b41e"
|
||||
"checksum logos-derive 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)" = "5f03ecd1d993aacc6c4f3a9540e60a4f3811ddac2276dbb66dad4d42671bd5bf"
|
||||
"checksum maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "08cbb6b4fef96b6d77bfc40ec491b1690c779e77b05cd9f07f787ed376fd4c43"
|
||||
"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
|
||||
"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3"
|
||||
@ -2070,6 +2139,7 @@ dependencies = [
|
||||
"checksum onig 4.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a646989adad8a19f49be2090374712931c3a59835cb5277b4530f48b417f26e7"
|
||||
"checksum onig_sys 69.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388410bf5fa341f10e58e6db3975f4bea1ac30247dd79d37a9e5ced3cb4cc3b0"
|
||||
"checksum ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "18869315e81473c951eb56ad5558bbc56978562d3ecfb87abb7a1e944cea4518"
|
||||
"checksum output_vt100 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "53cdc5b785b7a58c5aad8216b3dfa114df64b0b06ae6e1501cef91df2fbdf8f9"
|
||||
"checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13"
|
||||
"checksum pancurses 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d3058bc37c433096b2ac7afef1c5cdfae49ede0a4ffec3dfc1df1df0959d0ff0"
|
||||
"checksum parking_lot 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fa7767817701cce701d5585b9c4db3cdd02086398322c1d7e8bf5094a96a2ce7"
|
||||
@ -2079,6 +2149,7 @@ dependencies = [
|
||||
"checksum pin-utils 0.1.0-alpha.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5894c618ce612a3fa23881b152b608bafb8c56cfc22f434a3ba3120b40f7b587"
|
||||
"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c"
|
||||
"checksum plist 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f4739851c08dd9a62a78beff2edf1a438517268b2c563c42fc6d9d3139e42d2a"
|
||||
"checksum pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f81e1644e1b54f5a68959a29aa86cde704219254669da328ecfdf6a1f09d427"
|
||||
"checksum pretty_env_logger 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df8b3f4e0475def7d9c2e5de8e5a1306949849761e107b360d03e98eafaffd61"
|
||||
"checksum prettyprint 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2705417f8aa07cb6308db42e55623479c1c9667942a4d5e4174c684e5da5590d"
|
||||
"checksum prettytable-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0fd04b170004fa2daccf418a7f8253aaf033c27760b5f225889024cf66d7ac2e"
|
||||
@ -2104,6 +2175,7 @@ dependencies = [
|
||||
"checksum regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8f0a0bcab2fd7d1d7c54fa9eae6f43eddeb9ce2e7352f8518a814a4f65d60c58"
|
||||
"checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96"
|
||||
"checksum rustc-demangle 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "ccc78bfd5acd7bf3e89cffcf899e5cb1a52d6fafa8dec2739ad70c9577a57288"
|
||||
"checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8"
|
||||
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
|
||||
"checksum rustyline 4.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0f47ea1ceb347d2deae482d655dc8eef4bd82363d3329baffa3818bd76fea48b"
|
||||
"checksum ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "b96a9549dc8d48f2c283938303c4b5a77aa29bfbc5b54b084fb1630408899a8f"
|
||||
|
@ -41,7 +41,12 @@ serde = "1.0.91"
|
||||
serde_json = "1.0.39"
|
||||
serde_derive = "1.0.91"
|
||||
getset = "0.0.7"
|
||||
logos = "0.10.0-rc2"
|
||||
logos-derive = "0.10.0-rc2"
|
||||
|
||||
[dependencies.pancurses]
|
||||
version = "0.16"
|
||||
features = ["win32a"]
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "0.6.1"
|
||||
|
@ -1,5 +1,6 @@
|
||||
crate mod ast;
|
||||
crate mod completer;
|
||||
crate mod lexer;
|
||||
crate mod parser;
|
||||
crate mod registry;
|
||||
|
||||
|
585
src/parser/lexer.rs
Normal file
585
src/parser/lexer.rs
Normal file
@ -0,0 +1,585 @@
|
||||
use crate::errors::ShellError;
|
||||
use derive_new::new;
|
||||
use logos_derive::Logos;
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Logos)]
|
||||
#[extras = "LexerState"]
|
||||
crate enum TopToken {
|
||||
#[error]
|
||||
Error,
|
||||
|
||||
#[end]
|
||||
END,
|
||||
|
||||
#[regex = "-?[0-9]+"]
|
||||
Num,
|
||||
|
||||
#[regex = r#"'([^']|\\')*'"#]
|
||||
SQString,
|
||||
|
||||
#[regex = r#""([^"]|\\")*""#]
|
||||
DQString,
|
||||
|
||||
#[regex = "-?[0-9]+[A-Za-z]+"]
|
||||
Size,
|
||||
|
||||
#[regex = r"\$"]
|
||||
#[callback = "start_variable"]
|
||||
Dollar,
|
||||
|
||||
#[regex = r#"[^\s0-9"'$\-][^\s"'\.]*"#]
|
||||
#[callback = "end_bare_variable"]
|
||||
Bare,
|
||||
|
||||
#[token = "|"]
|
||||
Pipe,
|
||||
|
||||
#[token = "."]
|
||||
Dot,
|
||||
|
||||
#[token = "{"]
|
||||
OpenBrace,
|
||||
|
||||
#[token = "}"]
|
||||
CloseBrace,
|
||||
|
||||
#[token = "("]
|
||||
OpenParen,
|
||||
|
||||
#[token = ")"]
|
||||
CloseParen,
|
||||
|
||||
#[token = ">"]
|
||||
OpGt,
|
||||
|
||||
#[token = "<"]
|
||||
OpLt,
|
||||
|
||||
#[token = ">="]
|
||||
OpGte,
|
||||
|
||||
#[token = "<="]
|
||||
OpLte,
|
||||
|
||||
#[token = "=="]
|
||||
OpEq,
|
||||
|
||||
#[token = "!="]
|
||||
OpNeq,
|
||||
|
||||
#[regex = r"\s+"]
|
||||
Whitespace,
|
||||
}
|
||||
|
||||
fn start_variable<S>(lex: &mut logos::Lexer<TopToken, S>) {
|
||||
println!("start_variable EXTRAS={:?}", lex.extras);
|
||||
lex.extras.current = LexerStateName::Var;
|
||||
}
|
||||
|
||||
fn end_bare_variable<S>(lex: &mut logos::Lexer<TopToken, S>) {
|
||||
println!("end_variable EXTRAS={:?}", lex.extras);
|
||||
lex.extras.current = LexerStateName::AfterVariableToken;
|
||||
}
|
||||
|
||||
#[derive(Logos, Debug, Clone, Copy, Eq, PartialEq)]
|
||||
#[extras = "LexerState"]
|
||||
crate enum VariableToken {
|
||||
#[error]
|
||||
Error,
|
||||
|
||||
#[end]
|
||||
END,
|
||||
|
||||
#[regex = r"[A-Za-z][A-Za-z0-9\-?!]*"]
|
||||
#[callback = "end_variable"]
|
||||
Variable,
|
||||
}
|
||||
|
||||
fn end_variable<S>(lex: &mut logos::Lexer<VariableToken, S>) {
|
||||
println!("end_variable EXTRAS={:?}", lex.extras);
|
||||
lex.extras.current = LexerStateName::AfterVariableToken;
|
||||
}
|
||||
|
||||
#[derive(Logos, Debug, Clone, Copy, Eq, PartialEq)]
|
||||
#[extras = "LexerState"]
|
||||
crate enum AfterVariableToken {
|
||||
#[error]
|
||||
Error,
|
||||
|
||||
#[end]
|
||||
END,
|
||||
|
||||
#[token = "."]
|
||||
#[callback = "start_member"]
|
||||
Dot,
|
||||
|
||||
#[regex = r"\s"]
|
||||
#[callback = "terminate_variable"]
|
||||
Whitespace,
|
||||
}
|
||||
|
||||
fn start_member<S>(lex: &mut logos::Lexer<AfterVariableToken, S>) {
|
||||
println!("start_variable EXTRAS={:?}", lex.extras);
|
||||
lex.extras.current = LexerStateName::AfterMemberDot;
|
||||
}
|
||||
|
||||
fn terminate_variable<S>(lex: &mut logos::Lexer<AfterVariableToken, S>) {
|
||||
println!("terminate_variable EXTRAS={:?}", lex.extras);
|
||||
lex.extras.current = LexerStateName::Top;
|
||||
}
|
||||
|
||||
#[derive(Logos, Debug, Clone, Copy, Eq, PartialEq)]
|
||||
#[extras = "LexerState"]
|
||||
crate enum AfterMemberDot {
|
||||
#[error]
|
||||
Error,
|
||||
|
||||
#[end]
|
||||
END,
|
||||
|
||||
#[regex = r"[A-Za-z][A-Za-z0-9\-?!]*"]
|
||||
#[callback = "finish_member"]
|
||||
Member,
|
||||
|
||||
#[regex = r"\s"]
|
||||
Whitespace,
|
||||
}
|
||||
|
||||
fn finish_member<S>(lex: &mut logos::Lexer<AfterMemberDot, S>) {
|
||||
println!("finish_member EXTRAS={:?}", lex.extras);
|
||||
lex.extras.current = LexerStateName::AfterVariableToken;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
crate enum LexerStateName {
|
||||
Top,
|
||||
Var,
|
||||
AfterMemberDot,
|
||||
AfterVariableToken,
|
||||
}
|
||||
|
||||
impl Default for LexerStateName {
|
||||
fn default() -> LexerStateName {
|
||||
LexerStateName::Top
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
crate struct LexerState {
|
||||
current: LexerStateName,
|
||||
}
|
||||
|
||||
impl logos::Extras for LexerState {
|
||||
fn on_advance(&mut self) {}
|
||||
fn on_whitespace(&mut self, _byte: u8) {}
|
||||
}
|
||||
|
||||
#[derive(new, Debug, Clone, Eq, PartialEq)]
|
||||
crate struct SpannedToken<'source, T> {
|
||||
span: std::ops::Range<usize>,
|
||||
slice: &'source str,
|
||||
token: T,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
crate enum Token<'source> {
|
||||
Top(SpannedToken<'source, TopToken>),
|
||||
Var(SpannedToken<'source, VariableToken>),
|
||||
Dot(SpannedToken<'source, &'source str>),
|
||||
Member(SpannedToken<'source, &'source str>),
|
||||
Whitespace(SpannedToken<'source, &'source str>),
|
||||
}
|
||||
|
||||
impl Token<'source> {
|
||||
crate fn range(&self) -> &Range<usize> {
|
||||
match self {
|
||||
Token::Top(spanned) => &spanned.span,
|
||||
Token::Var(spanned) => &spanned.span,
|
||||
Token::Dot(spanned) => &spanned.span,
|
||||
Token::Member(spanned) => &spanned.span,
|
||||
Token::Whitespace(spanned) => &spanned.span,
|
||||
}
|
||||
}
|
||||
|
||||
crate fn slice(&self) -> &str {
|
||||
match self {
|
||||
Token::Top(spanned) => spanned.slice,
|
||||
Token::Var(spanned) => spanned.slice,
|
||||
Token::Dot(spanned) => spanned.slice,
|
||||
Token::Member(spanned) => spanned.slice,
|
||||
Token::Whitespace(spanned) => spanned.slice,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
crate struct Lexer<'source> {
|
||||
lexer: logos::Lexer<TopToken, &'source str>,
|
||||
first: bool,
|
||||
// state: LexerState,
|
||||
}
|
||||
|
||||
impl Lexer<'source> {
|
||||
crate fn new(source: &str) -> Lexer<'_> {
|
||||
Lexer {
|
||||
first: true,
|
||||
lexer: logos::Logos::lexer(source),
|
||||
// state: LexerState::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Lexer<'source> {
|
||||
type Item = Result<Token<'source>, ShellError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.first {
|
||||
self.first = false;
|
||||
|
||||
match self.lexer.token {
|
||||
TopToken::END => None,
|
||||
TopToken::Whitespace => Some(Ok(Token::Whitespace(SpannedToken::new(
|
||||
self.lexer.range(),
|
||||
self.lexer.slice(),
|
||||
self.lexer.slice(),
|
||||
)))),
|
||||
_ => {
|
||||
let token = Token::Top(SpannedToken::new(
|
||||
self.lexer.range(),
|
||||
self.lexer.slice(),
|
||||
self.lexer.token,
|
||||
));
|
||||
Some(Ok(token))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("STATE={:?}", self.lexer.extras);
|
||||
|
||||
match self.lexer.extras.current {
|
||||
LexerStateName::Top => {
|
||||
let (lexer, range, slice, token) = advance::<TopToken>(self.lexer.clone());
|
||||
self.lexer = lexer;
|
||||
|
||||
match token {
|
||||
TopToken::END => None,
|
||||
TopToken::Whitespace => Some(Ok(Token::Whitespace(SpannedToken::new(
|
||||
range, slice, slice,
|
||||
)))),
|
||||
other => {
|
||||
let token = Token::Top(SpannedToken::new(range, slice, other));
|
||||
Some(Ok(token))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LexerStateName::AfterMemberDot => {
|
||||
let (lexer, range, slice, token) =
|
||||
advance::<AfterMemberDot>(self.lexer.clone());
|
||||
self.lexer = lexer;
|
||||
|
||||
match token {
|
||||
AfterMemberDot::END => None,
|
||||
AfterMemberDot::Error => {
|
||||
Some(Err(ShellError::string(&format!("Lex error at {}", slice))))
|
||||
}
|
||||
AfterMemberDot::Whitespace => Some(Ok(Token::Whitespace(
|
||||
SpannedToken::new(range, slice, slice),
|
||||
))),
|
||||
AfterMemberDot::Member => {
|
||||
Some(Ok(Token::Member(SpannedToken::new(range, slice, slice))))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LexerStateName::AfterVariableToken => {
|
||||
let (lexer, range, slice, token) =
|
||||
advance::<AfterVariableToken>(self.lexer.clone());
|
||||
self.lexer = lexer;
|
||||
|
||||
match token {
|
||||
AfterVariableToken::END => None,
|
||||
AfterVariableToken::Error => {
|
||||
Some(Err(ShellError::string(&format!("Lex error at {}", slice))))
|
||||
}
|
||||
AfterVariableToken::Whitespace => Some(Ok(Token::Whitespace(
|
||||
SpannedToken::new(range, slice, slice),
|
||||
))),
|
||||
AfterVariableToken::Dot => {
|
||||
Some(Ok(Token::Dot(SpannedToken::new(range, slice, slice))))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LexerStateName::Var => {
|
||||
let (lexer, range, slice, token) = advance::<VariableToken>(self.lexer.clone());
|
||||
self.lexer = lexer;
|
||||
|
||||
match token {
|
||||
VariableToken::END => None,
|
||||
other => {
|
||||
let token = Token::Var(SpannedToken::new(range, slice, other));
|
||||
Some(Ok(token))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn advance<T>(
|
||||
lexer: logos::Lexer<TopToken, &'source str>,
|
||||
) -> (
|
||||
logos::Lexer<TopToken, &'source str>,
|
||||
Range<usize>,
|
||||
&'source str,
|
||||
T,
|
||||
)
|
||||
where
|
||||
T: logos::Logos<Extras = LexerState> + logos::source::WithSource<&'source str> + Copy,
|
||||
{
|
||||
let lexer = lexer.advance_as::<T>();
|
||||
let token = &lexer.token;
|
||||
let range = lexer.range();
|
||||
let slice = lexer.slice();
|
||||
(lexer.clone().morph::<TopToken>(), range, slice, *token)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use logos::Logos;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn assert_lex(source: &str, tokens: &[TestToken<'_>]) {
|
||||
let lex = Lexer::new(source);
|
||||
let mut current = 0;
|
||||
|
||||
let expected_tokens: Vec<Token> = tokens
|
||||
.iter()
|
||||
.map(|token_desc| {
|
||||
println!("{:?}", token_desc);
|
||||
|
||||
let len = token_desc.source.len();
|
||||
let range = current..(current + len);
|
||||
let token = token_desc.to_token(range);
|
||||
|
||||
current = current + len;
|
||||
|
||||
token
|
||||
})
|
||||
.collect();
|
||||
|
||||
let actual_tokens: Result<Vec<Token>, _> = lex
|
||||
.map(|i| {
|
||||
println!("{:?}", i);
|
||||
i
|
||||
})
|
||||
.collect();
|
||||
|
||||
let actual_tokens = actual_tokens.unwrap();
|
||||
|
||||
assert_eq!(actual_tokens, expected_tokens);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum TokenDesc {
|
||||
Ws,
|
||||
Member,
|
||||
Top(TopToken),
|
||||
Var(VariableToken),
|
||||
}
|
||||
|
||||
#[derive(Debug, new)]
|
||||
struct TestToken<'source> {
|
||||
desc: TokenDesc,
|
||||
source: &'source str,
|
||||
}
|
||||
|
||||
impl TestToken<'source> {
|
||||
fn to_token(&self, span: std::ops::Range<usize>) -> Token {
|
||||
match self.desc {
|
||||
TokenDesc::Top(TopToken::Dot) => {
|
||||
Token::Dot(SpannedToken::new(span, self.source, "."))
|
||||
}
|
||||
TokenDesc::Top(tok) => Token::Top(SpannedToken::new(span, self.source, tok)),
|
||||
TokenDesc::Var(tok) => Token::Var(SpannedToken::new(span, self.source, tok)),
|
||||
TokenDesc::Member => {
|
||||
Token::Member(SpannedToken::new(span, self.source, self.source))
|
||||
}
|
||||
TokenDesc::Ws => {
|
||||
Token::Whitespace(SpannedToken::new(span, self.source, self.source))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! chomp_tokens {
|
||||
{ rest = { SP $($rest:tt)* }, accum = [ $($accum:tt)* ] } => {
|
||||
chomp_tokens! { rest = { $($rest)* }, accum = [ $($accum)* { SP } ] }
|
||||
};
|
||||
|
||||
{ rest = { ws($expr:expr) $($rest:tt)* }, accum = [ $($accum:tt)* ] } => {
|
||||
chomp_tokens! { rest = { $($rest)* }, accum = [ $($accum)* { ws($expr) } ] }
|
||||
};
|
||||
|
||||
{ rest = { $id:ident ( $expr:expr ) $($rest:tt)* }, accum = [ $($accum:tt)* ] } => {
|
||||
chomp_tokens! { rest = { $($rest)* }, accum = [ $($accum)* { tok(stringify!($id), $expr) } ] }
|
||||
};
|
||||
|
||||
{ rest = { $token:tt $($rest:tt)* }, accum = [ $($accum:tt)* ] } => {
|
||||
chomp_tokens! { rest = { $($rest)* }, accum = [ $($accum)* { tk($token) } ] }
|
||||
};
|
||||
|
||||
{ rest = { }, accum = [ $({ $($tokens:tt)* })* ] } => {
|
||||
&[ $($($tokens)*),* ]
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! tokens {
|
||||
($($tokens:tt)*) => {
|
||||
chomp_tokens! { rest = { $($tokens)* }, accum = [] }
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_number() {
|
||||
assert_lex("123", tokens![Num("123")]);
|
||||
// assert_lex("123", &[tok("Num", "123")]);
|
||||
assert_lex(
|
||||
"123 456 789",
|
||||
tokens![Num("123") SP Num("456") SP Num("789")],
|
||||
);
|
||||
|
||||
assert_lex("-123", tokens![Num("-123")]);
|
||||
|
||||
assert_lex(
|
||||
"123 -456 789",
|
||||
tokens![
|
||||
Num("123")
|
||||
ws(" ")
|
||||
Num("-456")
|
||||
ws(" ")
|
||||
Num("789")
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_variable() {
|
||||
assert_lex("$var", tokens![ "$" Var("var")]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_string() {
|
||||
assert_lex(
|
||||
r#" "hello world" "#,
|
||||
tokens![ SP DQString(r#""hello world""#) SP ],
|
||||
);
|
||||
|
||||
assert_lex(
|
||||
r#" 'hello world' "#,
|
||||
tokens![ SP SQString(r#"'hello world'"#) SP ],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_path() {
|
||||
assert_lex("$var.bar", tokens![ "$" Var("var") "." Member("bar") ]);
|
||||
assert_lex("$it.bar", tokens![ "$" Var("it") "." Member("bar") ]);
|
||||
assert_lex("$var. bar", tokens![ "$" Var("var") "." SP Member("bar") ]);
|
||||
assert_lex("$it. bar", tokens![ "$" Var("it") "." SP Member("bar") ]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_operator() {
|
||||
assert_lex(
|
||||
"$it.cpu > 10",
|
||||
tokens![ "$" Var("it") "." Member("cpu") SP ">" SP Num("10") ],
|
||||
);
|
||||
|
||||
assert_lex(
|
||||
"$it.cpu < 10",
|
||||
tokens![ "$" Var("it") "." Member("cpu") SP "<" SP Num("10") ],
|
||||
);
|
||||
|
||||
assert_lex(
|
||||
"$it.cpu >= 10",
|
||||
tokens![ "$" Var("it") "." Member("cpu") SP ">=" SP Num("10") ],
|
||||
);
|
||||
|
||||
assert_lex(
|
||||
"$it.cpu <= 10",
|
||||
tokens![ "$" Var("it") "." Member("cpu") SP "<=" SP Num("10") ],
|
||||
);
|
||||
|
||||
assert_lex(
|
||||
"$it.cpu == 10",
|
||||
tokens![ "$" Var("it") "." Member("cpu") SP "==" SP Num("10") ],
|
||||
);
|
||||
|
||||
assert_lex(
|
||||
"$it.cpu != 10",
|
||||
tokens![ "$" Var("it") "." Member("cpu") SP "!=" SP Num("10") ],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_smoke() {
|
||||
assert_lex(
|
||||
"ls | where cpu > 10",
|
||||
tokens![ Bare("ls") SP "|" SP Bare("where") SP Bare("cpu") SP ">" SP Num("10") ],
|
||||
);
|
||||
|
||||
assert_lex(
|
||||
"ls | where { $it.cpu > 10 }",
|
||||
tokens![ Bare("ls") SP "|" SP Bare("where") SP "{" SP "$" Var("it") "." Member("cpu") SP ">" SP Num("10") SP "}" ],
|
||||
);
|
||||
|
||||
assert_lex(
|
||||
"open input2.json | from-json | select glossary",
|
||||
tokens![ Bare("open") SP Bare("input2") "." Member("json") SP "|" SP Bare("from-json") SP "|" SP Bare("select") SP Bare("glossary") ],
|
||||
);
|
||||
}
|
||||
|
||||
fn tok(name: &str, value: &'source str) -> TestToken<'source> {
|
||||
match name {
|
||||
"Num" => TestToken::new(TokenDesc::Top(TopToken::Num), value),
|
||||
"Var" => TestToken::new(TokenDesc::Var(VariableToken::Variable), value),
|
||||
"Member" => TestToken::new(TokenDesc::Member, value),
|
||||
"Bare" => TestToken::new(TokenDesc::Top(TopToken::Bare), value),
|
||||
"DQString" => TestToken::new(TokenDesc::Top(TopToken::DQString), value),
|
||||
"SQString" => TestToken::new(TokenDesc::Top(TopToken::SQString), value),
|
||||
other => panic!("Unexpected token name in test: {}", other),
|
||||
}
|
||||
}
|
||||
|
||||
fn tk(name: &'source str) -> TestToken<'source> {
|
||||
let token = match name {
|
||||
"." => TopToken::Dot,
|
||||
"$" => TopToken::Dollar,
|
||||
"|" => TopToken::Pipe,
|
||||
"{" => TopToken::OpenBrace,
|
||||
"}" => TopToken::CloseBrace,
|
||||
">" => TopToken::OpGt,
|
||||
"<" => TopToken::OpLt,
|
||||
">=" => TopToken::OpGte,
|
||||
"<=" => TopToken::OpLte,
|
||||
"==" => TopToken::OpEq,
|
||||
"!=" => TopToken::OpNeq,
|
||||
other => panic!("Unexpected token name in test: {}", other),
|
||||
};
|
||||
|
||||
TestToken::new(TokenDesc::Top(token), name)
|
||||
}
|
||||
|
||||
const SP: TestToken<'static> = TestToken {
|
||||
desc: TokenDesc::Ws,
|
||||
source: " ",
|
||||
};
|
||||
|
||||
fn ws(string: &'static str) -> TestToken<'source> {
|
||||
TestToken::new(TokenDesc::Ws, string)
|
||||
}
|
||||
|
||||
}
|
@ -10,8 +10,8 @@ pub Pipeline: Pipeline = {
|
||||
}
|
||||
|
||||
Command: ParsedCommand = {
|
||||
<command:RawBareWord> <expr:Expr*> => ParsedCommand::new(command, expr),
|
||||
<command:RawBareWord> <expr:BinaryExpression> => ParsedCommand::new(command, vec![expr]),
|
||||
<command:BareWord> <expr:Expr*> => ParsedCommand::new(command, expr),
|
||||
<command:BareWord> <expr:BinaryExpression> => ParsedCommand::new(command, vec![expr]),
|
||||
}
|
||||
|
||||
Leaf: Expression = {
|
||||
@ -46,11 +46,12 @@ WholeExpression: Expression = {
|
||||
}
|
||||
|
||||
PathExpression: Expression = {
|
||||
<head:WholeExpression> <tail: ( "." <Member> )*> => Expression::Path(Box::new(Path::new(head, tail)))
|
||||
<head:WholeExpression> <tail: ( "." <Member> )+> => Expression::Path(Box::new(Path::new(head, tail)))
|
||||
}
|
||||
|
||||
Expr: Expression = {
|
||||
<RawBareWord> => Expression::Leaf(Leaf::Bare(<>)),
|
||||
<BareWord> => Expression::Leaf(Leaf::Bare(<>)),
|
||||
<WholeExpression> => <>,
|
||||
<PathExpression>
|
||||
}
|
||||
|
||||
@ -58,11 +59,11 @@ Variable: Variable = {
|
||||
"$true" => Variable::True,
|
||||
"$false" => Variable::False,
|
||||
"$it" => Variable::It,
|
||||
"$" <RawBareWord> => Variable::Other(<>.to_string()),
|
||||
<VariableBody> => Variable::Other(<>[1..].to_string()),
|
||||
}
|
||||
|
||||
Member: String = {
|
||||
<RawBareWord>,
|
||||
<BareWord>,
|
||||
<String>
|
||||
}
|
||||
|
||||
@ -76,8 +77,8 @@ Operator: Operator = {
|
||||
}
|
||||
|
||||
Flag: Flag = {
|
||||
"-" <RawBareWord> => Flag::Shorthand(<>.to_string()),
|
||||
"--" <RawBareWord> => Flag::Longhand(<>.to_string()),
|
||||
"-" <BareWord> => Flag::Shorthand(<>.to_string()),
|
||||
"--" <BareWord> => Flag::Longhand(<>.to_string()),
|
||||
}
|
||||
|
||||
String: String = {
|
||||
@ -85,7 +86,12 @@ String: String = {
|
||||
DQString,
|
||||
}
|
||||
|
||||
RawBareWord: String = <s:r#"[^0-9"'\-][^\s"']*"#> => <>.to_string();
|
||||
BareWord = {
|
||||
<RawBareWord>,
|
||||
}
|
||||
|
||||
VariableBody: &'input str = <s:r"\$\p{XID_Start}(\p{XID_Continue}|[\-?!])*"> => <>;
|
||||
RawBareWord: String = <s:r#"[^0-9"'$\-\.][^\s"']*"#> => <>.to_string();
|
||||
DQString: String = <s:r#""([^"]|\\")*""#> => s[1..s.len() - 1].to_string();
|
||||
SQString: String = <s:r#"'([^']|\\')*'"#> => s[1..s.len() - 1].to_string();
|
||||
Num: i64 = <s:r"-?[0-9]+"> => i64::from_str(s).unwrap();
|
||||
|
1247
src/parser/parser.rs
1247
src/parser/parser.rs
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user