From 308ab91aff888db0c1d8300fc565ff2bdd6acfca Mon Sep 17 00:00:00 2001 From: JT <547158+jntrnr@users.noreply.github.com> Date: Thu, 24 Feb 2022 07:58:53 -0500 Subject: [PATCH] Speed up the parser and nuon parser a bit more (#4626) --- crates/nu-command/src/formats/from/nuon.rs | 26 ++--- crates/nu-parser/src/parser.rs | 112 +++++++++++++++------ 2 files changed, 96 insertions(+), 42 deletions(-) diff --git a/crates/nu-command/src/formats/from/nuon.rs b/crates/nu-command/src/formats/from/nuon.rs index 2f9b1d273d..299a039412 100644 --- a/crates/nu-command/src/formats/from/nuon.rs +++ b/crates/nu-command/src/formats/from/nuon.rs @@ -89,7 +89,7 @@ impl Command for FromNuon { let (lite_block, err) = nu_parser::lite_parse(&lexed); error = error.or(err); - let (block, err) = nu_parser::parse_block(&mut working_set, &lite_block, true); + let (mut block, err) = nu_parser::parse_block(&mut working_set, &lite_block, true); error = error.or(err); if let Some(pipeline) = block.pipelines.get(1) { @@ -119,7 +119,16 @@ impl Command for FromNuon { } } - let expr = if let Some(pipeline) = block.pipelines.get(0) { + let expr = if block.pipelines.is_empty() { + Expression { + expr: Expr::Nothing, + span: head, + custom_completion: None, + ty: Type::Nothing, + } + } else { + let mut pipeline = block.pipelines.remove(0); + if let Some(expr) = pipeline.expressions.get(1) { return Err(ShellError::SpannedLabeledErrorRelated( "error when loading nuon text".into(), @@ -134,22 +143,15 @@ impl Command for FromNuon { )); } - if let Some(expr) = pipeline.expressions.get(0) { - expr.clone() - } else { + if pipeline.expressions.is_empty() { Expression { expr: Expr::Nothing, span: head, custom_completion: None, ty: Type::Nothing, } - } - } else { - Expression { - expr: Expr::Nothing, - span: head, - custom_completion: None, - ty: Type::Nothing, + } else { + pipeline.expressions.remove(0) } }; diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 93d8d19677..09fc38ab64 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -1091,12 +1091,23 @@ pub fn parse_range( // looks like parent directory) let contents = working_set.get_span_contents(span); + let token = if let Ok(s) = String::from_utf8(contents.into()) { s } else { return (garbage(span), Some(ParseError::NonUtf8(span))); }; + if !token.contains("..") { + return ( + garbage(span), + Some(ParseError::Expected( + "at least one range bound set".into(), + span, + )), + ); + } + // First, figure out what exact operators are used and determine their positions let dotdot_pos: Vec<_> = token.match_indices("..").map(|(pos, _)| pos).collect(); @@ -1224,6 +1235,7 @@ pub(crate) fn parse_dollar_expr( working_set: &mut StateWorkingSet, span: Span, ) -> (Expression, Option) { + trace!("parsing: dollar expression"); let contents = working_set.get_span_contents(span); if contents.starts_with(b"$\"") || contents.starts_with(b"$'") { @@ -1712,6 +1724,18 @@ pub fn parse_datetime( trace!("parsing: datetime"); let bytes = working_set.get_span_contents(span); + + if bytes.is_empty() || !bytes[0].is_ascii_digit() { + return ( + garbage(span), + Some(ParseError::Mismatch( + "datetime".into(), + "non-datetime".into(), + span, + )), + ); + } + let token = String::from_utf8_lossy(bytes).to_string(); if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&token) { @@ -1727,8 +1751,8 @@ pub fn parse_datetime( } // Just the date - if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&format!("{}T00:00:00+00:00", token)) - { + let just_date = token.clone() + "T00:00:00+00:00"; + if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&just_date) { return ( Expression { expr: Expr::DateTime(datetime), @@ -1741,7 +1765,8 @@ pub fn parse_datetime( } // Date and time, assume UTC - if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&format!("{}+00:00", token)) { + let datetime = token + "+00:00"; + if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&datetime) { return ( Expression { expr: Expr::DateTime(datetime), @@ -1779,6 +1804,18 @@ pub fn parse_duration( } let bytes = working_set.get_span_contents(span); + + if bytes.is_empty() || (!bytes[0].is_ascii_digit() && bytes[0] != b'-') { + return ( + garbage(span), + Some(ParseError::Mismatch( + "duration".into(), + "non-duration unit".into(), + span, + )), + ); + } + let token = String::from_utf8_lossy(bytes).to_string(); let upper = token.to_uppercase(); @@ -1874,6 +1911,18 @@ pub fn parse_filesize( } let bytes = working_set.get_span_contents(span); + + if bytes.is_empty() || (!bytes[0].is_ascii_digit() && bytes[0] != b'-') { + return ( + garbage(span), + Some(ParseError::Mismatch( + "filesize".into(), + "non-filesize unit".into(), + span, + )), + ); + } + let token = String::from_utf8_lossy(bytes).to_string(); let upper = token.to_uppercase(); @@ -3111,6 +3160,10 @@ pub fn parse_value( ) -> (Expression, Option) { let bytes = working_set.get_span_contents(span); + if bytes.is_empty() { + return (garbage(span), Some(ParseError::IncompleteParser(span))); + } + // First, check the special-cases. These will likely represent specific values as expressions // and may fit a variety of shapes. // @@ -3121,37 +3174,35 @@ pub fn parse_value( trace!("parsing: variable"); return parse_variable_expr(working_set, span); - } else if bytes.starts_with(b"$") { - trace!("parsing: dollar expression"); + } - return parse_dollar_expr(working_set, span); - } else if bytes.starts_with(b"(") { - trace!("parsing: range or full path"); - - if let (expr, None) = parse_range(working_set, span) { - return (expr, None); - } else { - return parse_full_cell_path(working_set, None, span); - } - } else if bytes.starts_with(b"{") { - trace!("parsing: block or full path"); - if !matches!(shape, SyntaxShape::Block(..)) { - if let (expr, None) = parse_full_cell_path(working_set, None, span) { + match bytes[0] { + b'$' => return parse_dollar_expr(working_set, span), + b'(' => { + if let (expr, None) = parse_range(working_set, span) { return (expr, None); + } else { + return parse_full_cell_path(working_set, None, span); } } - if matches!(shape, SyntaxShape::Block(_)) || matches!(shape, SyntaxShape::Any) { - return parse_block_expression(working_set, shape, span); - } else if matches!(shape, SyntaxShape::Record) { - return parse_record(working_set, span); - } else { - return ( - Expression::garbage(span), - Some(ParseError::Expected("non-block value".into(), span)), - ); + b'{' => { + if !matches!(shape, SyntaxShape::Block(..)) { + if let (expr, None) = parse_full_cell_path(working_set, None, span) { + return (expr, None); + } + } + if matches!(shape, SyntaxShape::Block(_)) || matches!(shape, SyntaxShape::Any) { + return parse_block_expression(working_set, shape, span); + } else if matches!(shape, SyntaxShape::Record) { + return parse_record(working_set, span); + } else { + return ( + Expression::garbage(span), + Some(ParseError::Expected("non-block value".into(), span)), + ); + } } - } else if bytes.starts_with(b"[") { - match shape { + b'[' => match shape { SyntaxShape::Any | SyntaxShape::List(_) | SyntaxShape::Table @@ -3162,7 +3213,8 @@ pub fn parse_value( Some(ParseError::Expected("non-[] value".into(), span)), ); } - } + }, + _ => {} } match shape {