Small parser refactors (#7568)

This commit is contained in:
Jakub Žádník 2022-12-22 13:41:44 +02:00 committed by GitHub
parent 046e46b962
commit d8a2e0e9a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 376 additions and 370 deletions

View File

@ -4,6 +4,7 @@ mod eval;
mod flatten;
mod known_external;
mod lex;
mod lite_parser;
mod parse_keywords;
mod parser;
mod type_check;
@ -15,12 +16,12 @@ pub use flatten::{
};
pub use known_external::KnownExternal;
pub use lex::{lex, Token, TokenContents};
pub use lite_parser::{lite_parse, LiteBlock, LiteElement};
pub use parse_keywords::*;
pub use parser::{
is_math_expression_like, lite_parse, parse, parse_block, parse_duration_bytes,
parse_expression, parse_external_call, trim_quotes, trim_quotes_str, unescape_unquote_string,
Import, LiteBlock, LiteElement,
is_math_expression_like, parse, parse_block, parse_duration_bytes, parse_expression,
parse_external_call, trim_quotes, trim_quotes_str, unescape_unquote_string,
};
#[cfg(feature = "plugin")]

View File

@ -0,0 +1,365 @@
/// Lite parsing converts a flat stream of tokens from the lexer to a syntax element structure that
/// can be parsed.
use crate::{ParseError, Token, TokenContents};
use nu_protocol::{ast::Redirection, Span};
#[derive(Debug)]
pub struct LiteCommand {
pub comments: Vec<Span>,
pub parts: Vec<Span>,
}
impl Default for LiteCommand {
fn default() -> Self {
Self::new()
}
}
impl LiteCommand {
pub fn new() -> Self {
Self {
comments: vec![],
parts: vec![],
}
}
pub fn push(&mut self, span: Span) {
self.parts.push(span);
}
pub fn is_empty(&self) -> bool {
self.parts.is_empty()
}
}
// Note: the Span is the span of the connector not the whole element
#[derive(Debug)]
pub enum LiteElement {
Command(Option<Span>, LiteCommand),
Redirection(Span, Redirection, LiteCommand),
}
#[derive(Debug)]
pub struct LitePipeline {
pub commands: Vec<LiteElement>,
}
impl Default for LitePipeline {
fn default() -> Self {
Self::new()
}
}
impl LitePipeline {
pub fn new() -> Self {
Self { commands: vec![] }
}
pub fn push(&mut self, element: LiteElement) {
self.commands.push(element);
}
pub fn is_empty(&self) -> bool {
self.commands.is_empty()
}
}
#[derive(Debug)]
pub struct LiteBlock {
pub block: Vec<LitePipeline>,
}
impl Default for LiteBlock {
fn default() -> Self {
Self::new()
}
}
impl LiteBlock {
pub fn new() -> Self {
Self { block: vec![] }
}
pub fn push(&mut self, pipeline: LitePipeline) {
self.block.push(pipeline);
}
pub fn is_empty(&self) -> bool {
self.block.is_empty()
}
}
pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
let mut block = LiteBlock::new();
let mut curr_pipeline = LitePipeline::new();
let mut curr_command = LiteCommand::new();
let mut last_token = TokenContents::Eol;
let mut last_connector = TokenContents::Pipe;
let mut last_connector_span: Option<Span> = None;
if tokens.is_empty() {
return (LiteBlock::new(), None);
}
let mut curr_comment: Option<Vec<Span>> = None;
let mut error = None;
for token in tokens.iter() {
match &token.contents {
TokenContents::PipePipe => {
error = error.or(Some(ParseError::ShellOrOr(token.span)));
curr_command.push(token.span);
last_token = TokenContents::Item;
}
TokenContents::Item => {
// If we have a comment, go ahead and attach it
if let Some(curr_comment) = curr_comment.take() {
curr_command.comments = curr_comment;
}
curr_command.push(token.span);
last_token = TokenContents::Item;
}
TokenContents::OutGreaterThan
| TokenContents::ErrGreaterThan
| TokenContents::OutErrGreaterThan => {
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
last_token = token.contents;
last_connector = token.contents;
last_connector_span = Some(token.span);
}
TokenContents::Pipe => {
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
last_token = TokenContents::Pipe;
last_connector = TokenContents::Pipe;
last_connector_span = Some(token.span);
}
TokenContents::Eol => {
if last_token != TokenContents::Pipe && last_token != TokenContents::OutGreaterThan
{
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span.expect(
"internal error: redirection missing span information",
),
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span.expect(
"internal error: redirection missing span information",
),
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span.expect(
"internal error: redirection missing span information",
),
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
curr_pipeline = LitePipeline::new();
}
}
if last_token == TokenContents::Eol {
// Clear out the comment as we're entering a new comment
curr_comment = None;
}
last_token = TokenContents::Eol;
}
TokenContents::Semicolon => {
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
curr_pipeline = LitePipeline::new();
last_connector = TokenContents::Pipe;
last_connector_span = None;
}
last_token = TokenContents::Semicolon;
}
TokenContents::Comment => {
// Comment is beside something
if last_token != TokenContents::Eol {
curr_command.comments.push(token.span);
curr_comment = None;
} else {
// Comment precedes something
if let Some(curr_comment) = &mut curr_comment {
curr_comment.push(token.span);
} else {
curr_comment = Some(vec![token.span]);
}
}
last_token = TokenContents::Comment;
}
}
}
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline.push(LiteElement::Command(last_connector_span, curr_command));
}
}
}
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
}
if last_token == TokenContents::Pipe {
(
block,
Some(ParseError::UnexpectedEof(
"pipeline missing end".into(),
tokens[tokens.len() - 1].span,
)),
)
} else {
(block, error)
}
}

View File

@ -19,10 +19,11 @@ static PLUGIN_DIRS_ENV: &str = "NU_PLUGIN_DIRS";
use crate::{
known_external::KnownExternal,
lex,
lite_parser::{lite_parse, LiteCommand, LiteElement},
parser::{
check_call, check_name, garbage, garbage_pipeline, lite_parse, parse, parse_internal_call,
check_call, check_name, garbage, garbage_pipeline, parse, parse_internal_call,
parse_multispan_value, parse_signature, parse_string, parse_value, parse_var_with_opt_type,
trim_quotes, LiteCommand, LiteElement, ParsedInternalCall,
trim_quotes, ParsedInternalCall,
},
unescape_unquote_string, ParseError,
};

View File

@ -1,6 +1,8 @@
use crate::{
eval::{eval_constant, value_as_string},
lex, parse_mut,
lex,
lite_parser::{lite_parse, LiteCommand, LiteElement},
parse_mut,
type_check::{math_result_type, type_compatible},
ParseError, Token, TokenContents,
};
@ -9,7 +11,7 @@ use nu_protocol::{
ast::{
Argument, Assignment, Bits, Block, Boolean, Call, CellPath, Comparison, Expr, Expression,
FullCellPath, ImportPattern, ImportPatternHead, ImportPatternMember, Math, Operator,
PathMember, Pipeline, PipelineElement, RangeInclusion, RangeOperator, Redirection,
PathMember, Pipeline, PipelineElement, RangeInclusion, RangeOperator,
},
engine::StateWorkingSet,
span, BlockId, Flag, PositionalArg, Signature, Span, Spanned, SyntaxShape, Type, Unit, VarId,
@ -32,9 +34,6 @@ use std::{
#[cfg(feature = "plugin")]
use crate::parse_keywords::parse_register;
#[derive(Debug, Clone)]
pub enum Import {}
pub fn garbage(span: Span) -> Expression {
Expression::garbage(span)
}
@ -5915,366 +5914,6 @@ fn wrap_expr_with_collect(working_set: &mut StateWorkingSet, expr: &Expression)
}
}
#[derive(Debug)]
pub struct LiteCommand {
pub comments: Vec<Span>,
pub parts: Vec<Span>,
}
impl Default for LiteCommand {
fn default() -> Self {
Self::new()
}
}
impl LiteCommand {
pub fn new() -> Self {
Self {
comments: vec![],
parts: vec![],
}
}
pub fn push(&mut self, span: Span) {
self.parts.push(span);
}
pub fn is_empty(&self) -> bool {
self.parts.is_empty()
}
}
// Note: the Span is the span of the connector not the whole element
#[derive(Debug)]
pub enum LiteElement {
Command(Option<Span>, LiteCommand),
Redirection(Span, Redirection, LiteCommand),
}
#[derive(Debug)]
pub struct LitePipeline {
pub commands: Vec<LiteElement>,
}
impl Default for LitePipeline {
fn default() -> Self {
Self::new()
}
}
impl LitePipeline {
pub fn new() -> Self {
Self { commands: vec![] }
}
pub fn push(&mut self, element: LiteElement) {
self.commands.push(element);
}
pub fn is_empty(&self) -> bool {
self.commands.is_empty()
}
}
#[derive(Debug)]
pub struct LiteBlock {
pub block: Vec<LitePipeline>,
}
impl Default for LiteBlock {
fn default() -> Self {
Self::new()
}
}
impl LiteBlock {
pub fn new() -> Self {
Self { block: vec![] }
}
pub fn push(&mut self, pipeline: LitePipeline) {
self.block.push(pipeline);
}
pub fn is_empty(&self) -> bool {
self.block.is_empty()
}
}
pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
let mut block = LiteBlock::new();
let mut curr_pipeline = LitePipeline::new();
let mut curr_command = LiteCommand::new();
let mut last_token = TokenContents::Eol;
let mut last_connector = TokenContents::Pipe;
let mut last_connector_span: Option<Span> = None;
if tokens.is_empty() {
return (LiteBlock::new(), None);
}
let mut curr_comment: Option<Vec<Span>> = None;
let mut error = None;
for token in tokens.iter() {
match &token.contents {
TokenContents::PipePipe => {
error = error.or(Some(ParseError::ShellOrOr(token.span)));
curr_command.push(token.span);
last_token = TokenContents::Item;
}
TokenContents::Item => {
// If we have a comment, go ahead and attach it
if let Some(curr_comment) = curr_comment.take() {
curr_command.comments = curr_comment;
}
curr_command.push(token.span);
last_token = TokenContents::Item;
}
TokenContents::OutGreaterThan
| TokenContents::ErrGreaterThan
| TokenContents::OutErrGreaterThan => {
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
last_token = token.contents;
last_connector = token.contents;
last_connector_span = Some(token.span);
}
TokenContents::Pipe => {
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
last_token = TokenContents::Pipe;
last_connector = TokenContents::Pipe;
last_connector_span = Some(token.span);
}
TokenContents::Eol => {
if last_token != TokenContents::Pipe && last_token != TokenContents::OutGreaterThan
{
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span.expect(
"internal error: redirection missing span information",
),
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span.expect(
"internal error: redirection missing span information",
),
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span.expect(
"internal error: redirection missing span information",
),
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
curr_pipeline = LitePipeline::new();
}
}
if last_token == TokenContents::Eol {
// Clear out the comment as we're entering a new comment
curr_comment = None;
}
last_token = TokenContents::Eol;
}
TokenContents::Semicolon => {
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
curr_pipeline = LitePipeline::new();
last_connector = TokenContents::Pipe;
last_connector_span = None;
}
last_token = TokenContents::Semicolon;
}
TokenContents::Comment => {
// Comment is beside something
if last_token != TokenContents::Eol {
curr_command.comments.push(token.span);
curr_comment = None;
} else {
// Comment precedes something
if let Some(curr_comment) = &mut curr_comment {
curr_comment.push(token.span);
} else {
curr_comment = Some(vec![token.span]);
}
}
last_token = TokenContents::Comment;
}
}
}
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline.push(LiteElement::Command(last_connector_span, curr_command));
}
}
}
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
}
if last_token == TokenContents::Pipe {
(
block,
Some(ParseError::UnexpectedEof(
"pipeline missing end".into(),
tokens[tokens.len() - 1].span,
)),
)
} else {
(block, error)
}
}
// Parses a vector of u8 to create an AST Block. If a file name is given, then
// the name is stored in the working set. When parsing a source without a file
// name, the source of bytes is stored as "source"