From 8e2917b9ae31cf7ae2bef9ddf1e4747c9ad42880 Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Tue, 30 Jul 2024 16:55:22 -0700 Subject: [PATCH] Make assignment and `const` consistent with `let`/`mut` (#13385) # Description This makes assignment operations and `const` behave the same way `let` and `mut` do, absorbing the rest of the pipeline. Changes the lexer to be able to recognize assignment operators as a separate token, and then makes the lite parser continue to push spans into the same command regardless of any redirections or pipes if an assignment operator is encountered. Because the pipeline is no longer split up by the lite parser at this point, it's trivial to just parse the right hand side as if it were a subexpression not contained within parentheses. # User-Facing Changes Big breaking change. These are all now possible: ```nushell const path = 'a' | path join 'b' mut x = 2 $x = random int $x = [1 2 3] | math sum $env.FOO = random chars ``` In the past, these would have led to (an attempt at) bare word string parsing. So while `$env.FOO = bar` would have previously set the environment variable `FOO` to the string `"bar"`, it now tries to run the command named `bar`, hence the major breaking change. However, this is desirable because it is very consistent - if you see the `=`, you can just assume it absorbs everything else to the right of it. # Tests + Formatting Added tests for the new behaviour. Adjusted some existing tests that depended on the right hand side of assignments being parsed as barewords. # After Submitting - [ ] release notes (breaking change!) --- crates/nu-command/tests/commands/complete.rs | 4 +- crates/nu-command/tests/commands/let_.rs | 14 ++ crates/nu-command/tests/commands/rm.rs | 2 +- crates/nu-command/tests/commands/table.rs | 2 +- crates/nu-parser/src/lex.rs | 11 ++ crates/nu-parser/src/lite_parser.rs | 49 ++++- crates/nu-parser/src/parse_keywords.rs | 38 ++-- crates/nu-parser/src/parser.rs | 186 ++++++++++++------- crates/nu-parser/tests/test_parser.rs | 2 +- tests/const_/mod.rs | 6 + tests/plugins/env.rs | 6 +- tests/repl/test_config.rs | 2 +- tests/repl/test_parser.rs | 16 ++ 13 files changed, 247 insertions(+), 91 deletions(-) diff --git a/crates/nu-command/tests/commands/complete.rs b/crates/nu-command/tests/commands/complete.rs index 329e95ad71..5426d4e8bb 100644 --- a/crates/nu-command/tests/commands/complete.rs +++ b/crates/nu-command/tests/commands/complete.rs @@ -95,13 +95,13 @@ fn capture_error_with_both_stdout_stderr_messages_not_hang_nushell() { #[test] fn combined_pipe_redirection() { - let actual = nu!("$env.FOO = hello; $env.BAR = world; nu --testbin echo_env_mixed out-err FOO BAR o+e>| complete | get stdout"); + let actual = nu!("$env.FOO = 'hello'; $env.BAR = 'world'; nu --testbin echo_env_mixed out-err FOO BAR o+e>| complete | get stdout"); assert_eq!(actual.out, "helloworld"); } #[test] fn err_pipe_redirection() { let actual = - nu!("$env.FOO = hello; nu --testbin echo_env_stderr FOO e>| complete | get stdout"); + nu!("$env.FOO = 'hello'; nu --testbin echo_env_stderr FOO e>| complete | get stdout"); assert_eq!(actual.out, "hello"); } diff --git a/crates/nu-command/tests/commands/let_.rs b/crates/nu-command/tests/commands/let_.rs index 4bedf31104..e05d34d258 100644 --- a/crates/nu-command/tests/commands/let_.rs +++ b/crates/nu-command/tests/commands/let_.rs @@ -23,6 +23,13 @@ fn let_takes_pipeline() { assert_eq!(actual.out, "11"); } +#[test] +fn let_takes_pipeline_with_declared_type() { + let actual = nu!(r#"let x: list = [] | append "hello world"; print $x.0"#); + + assert_eq!(actual.out, "hello world"); +} + #[test] fn let_pipeline_allows_in() { let actual = @@ -38,6 +45,13 @@ fn mut_takes_pipeline() { assert_eq!(actual.out, "11"); } +#[test] +fn mut_takes_pipeline_with_declared_type() { + let actual = nu!(r#"mut x: list = [] | append "hello world"; print $x.0"#); + + assert_eq!(actual.out, "hello world"); +} + #[test] fn mut_pipeline_allows_in() { let actual = diff --git a/crates/nu-command/tests/commands/rm.rs b/crates/nu-command/tests/commands/rm.rs index e6cbf88cda..6baefd5420 100644 --- a/crates/nu-command/tests/commands/rm.rs +++ b/crates/nu-command/tests/commands/rm.rs @@ -144,7 +144,7 @@ fn errors_if_attempting_to_delete_home() { Playground::setup("rm_test_8", |dirs, _| { let actual = nu!( cwd: dirs.root(), - "$env.HOME = myhome ; rm -rf ~" + "$env.HOME = 'myhome' ; rm -rf ~" ); assert!(actual.err.contains("please use -I or -i")); diff --git a/crates/nu-command/tests/commands/table.rs b/crates/nu-command/tests/commands/table.rs index 272369a1c9..f73cce6540 100644 --- a/crates/nu-command/tests/commands/table.rs +++ b/crates/nu-command/tests/commands/table.rs @@ -2567,7 +2567,7 @@ fn theme_cmd(theme: &str, footer: bool, then: &str) -> String { with_footer = "$env.config.footer_mode = \"always\"".to_string(); } - format!("$env.config.table.mode = {theme}; $env.config.table.header_on_separator = true; {with_footer}; {then}") + format!("$env.config.table.mode = \"{theme}\"; $env.config.table.header_on_separator = true; {with_footer}; {then}") } #[test] diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index 3290a774f4..bad9e94a10 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -6,6 +6,7 @@ pub enum TokenContents { Comment, Pipe, PipePipe, + AssignmentOperator, ErrGreaterPipe, OutErrGreaterPipe, Semicolon, @@ -69,6 +70,12 @@ fn is_item_terminator( || special_tokens.contains(&c)) } +/// Assignment operators have special handling distinct from math expressions, as they cause the +/// rest of the pipeline to be consumed. +pub fn is_assignment_operator(bytes: &[u8]) -> bool { + matches!(bytes, b"=" | b"+=" | b"++=" | b"-=" | b"*=" | b"/=") +} + // A special token is one that is a byte that stands alone as its own token. For example // when parsing a signature you may want to have `:` be able to separate tokens and also // to be handled as its own token to notify you you're about to parse a type in the example @@ -297,6 +304,10 @@ pub fn lex_item( let mut err = None; let output = match &input[(span.start - span_offset)..(span.end - span_offset)] { + bytes if is_assignment_operator(bytes) => Token { + contents: TokenContents::AssignmentOperator, + span, + }, b"out>" | b"o>" => Token { contents: TokenContents::OutGreaterThan, span, diff --git a/crates/nu-parser/src/lite_parser.rs b/crates/nu-parser/src/lite_parser.rs index c9e9578698..0be57a7c9a 100644 --- a/crates/nu-parser/src/lite_parser.rs +++ b/crates/nu-parser/src/lite_parser.rs @@ -196,10 +196,43 @@ pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option) { let mut last_token = TokenContents::Eol; let mut file_redirection = None; let mut curr_comment: Option> = None; + let mut is_assignment = false; let mut error = None; for (idx, token) in tokens.iter().enumerate() { - if let Some((source, append, span)) = file_redirection.take() { + if is_assignment { + match &token.contents { + // Consume until semicolon or terminating EOL. Assignments absorb pipelines and + // redirections. + TokenContents::Eol => { + // Handle `[Command] [Pipe] ([Comment] | [Eol])+ [Command]` + // + // `[Eol]` branch checks if previous token is `[Pipe]` to construct pipeline + // and so `[Comment] | [Eol]` should be ignore to make it work + let actual_token = last_non_comment_token(tokens, idx); + if actual_token != Some(TokenContents::Pipe) { + is_assignment = false; + pipeline.push(&mut command); + block.push(&mut pipeline); + } + + if last_token == TokenContents::Eol { + // Clear out the comment as we're entering a new comment + curr_comment = None; + } + } + TokenContents::Semicolon => { + is_assignment = false; + pipeline.push(&mut command); + block.push(&mut pipeline); + } + TokenContents::Comment => { + command.comments.push(token.span); + curr_comment = None; + } + _ => command.push(token.span), + } + } else if let Some((source, append, span)) = file_redirection.take() { match &token.contents { TokenContents::PipePipe => { error = error.or(Some(ParseError::ShellOrOr(token.span))); @@ -218,6 +251,11 @@ pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option) { command.push(token.span) } } + TokenContents::AssignmentOperator => { + error = error.or(Some(ParseError::Expected("redirection target", token.span))); + command.push(span); + command.push(token.span); + } TokenContents::OutGreaterThan | TokenContents::OutGreaterGreaterThan | TokenContents::ErrGreaterThan @@ -280,6 +318,15 @@ pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option) { } command.push(token.span); } + TokenContents::AssignmentOperator => { + // When in assignment mode, we'll just consume pipes or redirections as part of + // the command. + is_assignment = true; + if let Some(curr_comment) = curr_comment.take() { + command.comments = curr_comment; + } + command.push(token.span); + } TokenContents::OutGreaterThan => { error = error.or(command.check_accepts_redirection(token.span)); file_redirection = Some((RedirectionSource::Stdout, false, token.span)); diff --git a/crates/nu-parser/src/parse_keywords.rs b/crates/nu-parser/src/parse_keywords.rs index 7c41f8a78d..3e2b1827b2 100644 --- a/crates/nu-parser/src/parse_keywords.rs +++ b/crates/nu-parser/src/parse_keywords.rs @@ -35,8 +35,8 @@ use crate::{ lite_parser::{lite_parse, LiteCommand}, parser::{ check_call, garbage, garbage_pipeline, parse, parse_call, parse_expression, - parse_full_signature, parse_import_pattern, parse_internal_call, parse_multispan_value, - parse_string, parse_value, parse_var_with_opt_type, trim_quotes, ParsedInternalCall, + parse_full_signature, parse_import_pattern, parse_internal_call, parse_string, parse_value, + parse_var_with_opt_type, trim_quotes, ParsedInternalCall, }, unescape_unquote_string, Token, TokenContents, }; @@ -3171,9 +3171,6 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin // } if let Some(decl_id) = working_set.find_decl(b"const") { - let cmd = working_set.get_decl(decl_id); - let call_signature = cmd.signature().call_signature(); - if spans.len() >= 4 { // This is a bit of by-hand parsing to get around the issue where we want to parse in the reverse order // so that the var-id created by the variable isn't visible in the expression that init it @@ -3181,18 +3178,29 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin let item = working_set.get_span_contents(*span.1); // const x = 'f', = at least start from index 2 if item == b"=" && spans.len() > (span.0 + 1) && span.0 > 1 { - let mut idx = span.0; + // Parse the rvalue as a subexpression + let rvalue_span = Span::concat(&spans[(span.0 + 1)..]); - let rvalue = parse_multispan_value( - working_set, - spans, - &mut idx, - &SyntaxShape::Keyword(b"=".to_vec(), Box::new(SyntaxShape::MathExpression)), + let (rvalue_tokens, rvalue_error) = lex( + working_set.get_span_contents(rvalue_span), + rvalue_span.start, + &[], + &[], + false, + ); + working_set.parse_errors.extend(rvalue_error); + + trace!("parsing: const right-hand side subexpression"); + let rvalue_block = + parse_block(working_set, &rvalue_tokens, rvalue_span, false, true); + let rvalue_ty = rvalue_block.output_type(); + let rvalue_block_id = working_set.add_block(Arc::new(rvalue_block)); + let rvalue = Expression::new( + working_set, + Expr::Subexpression(rvalue_block_id), + rvalue_span, + rvalue_ty, ); - if idx < (spans.len() - 1) { - working_set - .error(ParseError::ExtraPositional(call_signature, spans[idx + 1])); - } let mut idx = 0; diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 1fb0aee01d..a5559f5c52 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -1,5 +1,5 @@ use crate::{ - lex::{lex, lex_signature}, + lex::{is_assignment_operator, lex, lex_signature}, lite_parser::{lite_parse, LiteCommand, LitePipeline, LiteRedirection, LiteRedirectionTarget}, parse_keywords::*, parse_patterns::parse_pattern, @@ -1458,7 +1458,8 @@ fn parse_binary_with_base( | TokenContents::ErrGreaterThan | TokenContents::ErrGreaterGreaterThan | TokenContents::OutErrGreaterThan - | TokenContents::OutErrGreaterGreaterThan => { + | TokenContents::OutErrGreaterGreaterThan + | TokenContents::AssignmentOperator => { working_set.error(ParseError::Expected("binary", span)); return garbage(working_set, span); } @@ -3409,7 +3410,7 @@ pub fn parse_signature_helper(working_set: &mut StateWorkingSet, span: Span) -> for token in &output { match token { Token { - contents: crate::TokenContents::Item, + contents: crate::TokenContents::Item | crate::TokenContents::AssignmentOperator, span, } => { let span = *span; @@ -4829,7 +4830,7 @@ pub fn parse_value( } } -pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression { +pub fn parse_assignment_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression { let contents = working_set.get_span_contents(span); let operator = match contents { @@ -4839,6 +4840,95 @@ pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expressi b"-=" => Operator::Assignment(Assignment::MinusAssign), b"*=" => Operator::Assignment(Assignment::MultiplyAssign), b"/=" => Operator::Assignment(Assignment::DivideAssign), + _ => { + working_set.error(ParseError::Expected("assignment operator", span)); + return garbage(working_set, span); + } + }; + + Expression::new(working_set, Expr::Operator(operator), span, Type::Any) +} + +pub fn parse_assignment_expression( + working_set: &mut StateWorkingSet, + spans: &[Span], +) -> Expression { + trace!("parsing: assignment expression"); + let expr_span = Span::concat(spans); + + // Assignment always has the most precedence, and its right-hand side can be a pipeline + let Some(op_index) = spans + .iter() + .position(|span| is_assignment_operator(working_set.get_span_contents(*span))) + else { + working_set.error(ParseError::Expected("assignment expression", expr_span)); + return garbage(working_set, expr_span); + }; + + let lhs_spans = &spans[0..op_index]; + let op_span = spans[op_index]; + let rhs_spans = &spans[(op_index + 1)..]; + + if lhs_spans.is_empty() { + working_set.error(ParseError::Expected( + "left hand side of assignment", + op_span, + )); + return garbage(working_set, expr_span); + } + + if rhs_spans.is_empty() { + working_set.error(ParseError::Expected( + "right hand side of assignment", + op_span, + )); + return garbage(working_set, expr_span); + } + + // Parse the lhs and operator as usual for a math expression + let mut lhs = parse_expression(working_set, lhs_spans); + let mut operator = parse_assignment_operator(working_set, op_span); + + // Re-parse the right-hand side as a subexpression + let rhs_span = Span::concat(rhs_spans); + + let (rhs_tokens, rhs_error) = lex( + working_set.get_span_contents(rhs_span), + rhs_span.start, + &[], + &[], + true, + ); + working_set.parse_errors.extend(rhs_error); + + trace!("parsing: assignment right-hand side subexpression"); + let rhs_block = parse_block(working_set, &rhs_tokens, rhs_span, false, true); + let rhs_ty = rhs_block.output_type(); + let rhs_block_id = working_set.add_block(Arc::new(rhs_block)); + let mut rhs = Expression::new( + working_set, + Expr::Subexpression(rhs_block_id), + rhs_span, + rhs_ty, + ); + + let (result_ty, err) = math_result_type(working_set, &mut lhs, &mut operator, &mut rhs); + if let Some(err) = err { + working_set.parse_errors.push(err); + } + + Expression::new( + working_set, + Expr::BinaryOp(Box::new(lhs), Box::new(operator), Box::new(rhs)), + expr_span, + result_ty, + ) +} + +pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression { + let contents = working_set.get_span_contents(span); + + let operator = match contents { b"==" => Operator::Comparison(Comparison::Equal), b"!=" => Operator::Comparison(Comparison::NotEqual), b"<" => Operator::Comparison(Comparison::LessThan), @@ -4954,6 +5044,10 @@ pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expressi )); return garbage(working_set, span); } + op if is_assignment_operator(op) => { + working_set.error(ParseError::Expected("a non-assignment operator", span)); + return garbage(working_set, span); + } _ => { working_set.error(ParseError::Expected("operator", span)); return garbage(working_set, span); @@ -5258,7 +5352,12 @@ pub fn parse_expression(working_set: &mut StateWorkingSet, spans: &[Span]) -> Ex return garbage(working_set, Span::concat(spans)); } - let output = if is_math_expression_like(working_set, spans[pos]) { + let output = if spans[pos..] + .iter() + .any(|span| is_assignment_operator(working_set.get_span_contents(*span))) + { + parse_assignment_expression(working_set, &spans[pos..]) + } else if is_math_expression_like(working_set, spans[pos]) { parse_math_expression(working_set, &spans[pos..], None) } else { let bytes = working_set.get_span_contents(spans[pos]).to_vec(); @@ -5690,69 +5789,24 @@ pub(crate) fn redirecting_builtin_error( } pub fn parse_pipeline(working_set: &mut StateWorkingSet, pipeline: &LitePipeline) -> Pipeline { - let first_command = pipeline.commands.first(); - let first_command_name = first_command - .and_then(|command| command.parts.first()) - .map(|span| working_set.get_span_contents(*span)); - if pipeline.commands.len() > 1 { - // Special case: allow "let" or "mut" to consume the whole pipeline, if this is a pipeline - // with multiple commands - if matches!(first_command_name, Some(b"let" | b"mut")) { - // Merge the pipeline into one command - let first_command = first_command.expect("must be Some"); + // Parse a normal multi command pipeline + let elements: Vec<_> = pipeline + .commands + .iter() + .enumerate() + .map(|(index, element)| { + let element = parse_pipeline_element(working_set, element); + // Handle $in for pipeline elements beyond the first one + if index > 0 && element.has_in_variable(working_set) { + wrap_element_with_collect(working_set, element.clone()) + } else { + element + } + }) + .collect(); - let remainder_span = first_command - .parts_including_redirection() - .skip(3) - .chain( - pipeline.commands[1..] - .iter() - .flat_map(|command| command.parts_including_redirection()), - ) - .reduce(Span::append); - - let parts = first_command - .parts - .iter() - .take(3) // the let/mut start itself - .copied() - .chain(remainder_span) // everything else - .collect(); - - let comments = pipeline - .commands - .iter() - .flat_map(|command| command.comments.iter()) - .copied() - .collect(); - - let new_command = LiteCommand { - pipe: None, - comments, - parts, - redirection: None, - }; - parse_builtin_commands(working_set, &new_command) - } else { - // Parse a normal multi command pipeline - let elements: Vec<_> = pipeline - .commands - .iter() - .enumerate() - .map(|(index, element)| { - let element = parse_pipeline_element(working_set, element); - // Handle $in for pipeline elements beyond the first one - if index > 0 && element.has_in_variable(working_set) { - wrap_element_with_collect(working_set, element.clone()) - } else { - element - } - }) - .collect(); - - Pipeline { elements } - } + Pipeline { elements } } else { // If there's only one command in the pipeline, this could be a builtin command parse_builtin_commands(working_set, &pipeline.commands[0]) diff --git a/crates/nu-parser/tests/test_parser.rs b/crates/nu-parser/tests/test_parser.rs index 4d6778d201..44cd79a04a 100644 --- a/crates/nu-parser/tests/test_parser.rs +++ b/crates/nu-parser/tests/test_parser.rs @@ -1177,9 +1177,9 @@ fn test_nothing_comparison_eq() { #[rstest] #[case(b"let a = 1 err> /dev/null")] #[case(b"let a = 1 out> /dev/null")] +#[case(b"let a = 1 out+err> /dev/null")] #[case(b"mut a = 1 err> /dev/null")] #[case(b"mut a = 1 out> /dev/null")] -#[case(b"let a = 1 out+err> /dev/null")] #[case(b"mut a = 1 out+err> /dev/null")] fn test_redirection_with_letmut(#[case] phase: &[u8]) { let engine_state = EngineState::new(); diff --git a/tests/const_/mod.rs b/tests/const_/mod.rs index 04db29d3dd..f736b785ca 100644 --- a/tests/const_/mod.rs +++ b/tests/const_/mod.rs @@ -415,3 +415,9 @@ fn const_raw_string() { let actual = nu!(r#"const x = r#'abc'#; $x"#); assert_eq!(actual.out, "abc"); } + +#[test] +fn const_takes_pipeline() { + let actual = nu!(r#"const list = 'bar_baz_quux' | split row '_'; $list | length"#); + assert_eq!(actual.out, "3"); +} diff --git a/tests/plugins/env.rs b/tests/plugins/env.rs index c5a2ccc7af..8b307999b3 100644 --- a/tests/plugins/env.rs +++ b/tests/plugins/env.rs @@ -6,9 +6,9 @@ fn get_env_by_name() { cwd: ".", plugin: ("nu_plugin_example"), r#" - $env.FOO = bar + $env.FOO = 'bar' example env FOO | print - $env.FOO = baz + $env.FOO = 'baz' example env FOO | print "# ); @@ -21,7 +21,7 @@ fn get_envs() { let result = nu_with_plugins!( cwd: ".", plugin: ("nu_plugin_example"), - "$env.BAZ = foo; example env | get BAZ" + "$env.BAZ = 'foo'; example env | get BAZ" ); assert!(result.status.success()); assert_eq!("foo", result.out); diff --git a/tests/repl/test_config.rs b/tests/repl/test_config.rs index 005ae63171..6c0ba6180f 100644 --- a/tests/repl/test_config.rs +++ b/tests/repl/test_config.rs @@ -20,7 +20,7 @@ fn mutate_nu_config_nested_ls() -> TestResult { fn mutate_nu_config_nested_table() -> TestResult { run_test_std( r#" - $env.config.table.trim.methodology = wrapping + $env.config.table.trim.methodology = 'wrapping' $env.config.table.trim.wrapping_try_keep_words = false $env.config.table.trim.wrapping_try_keep_words "#, diff --git a/tests/repl/test_parser.rs b/tests/repl/test_parser.rs index a7a25e1c67..ea983ff82c 100644 --- a/tests/repl/test_parser.rs +++ b/tests/repl/test_parser.rs @@ -295,6 +295,22 @@ fn assign_expressions() -> TestResult { ) } +#[test] +fn assign_takes_pipeline() -> TestResult { + run_test( + r#"mut foo = 'bar'; $foo = $foo | str upcase | str reverse; $foo"#, + "RAB", + ) +} + +#[test] +fn append_assign_takes_pipeline() -> TestResult { + run_test( + r#"mut foo = 'bar'; $foo ++= $foo | str upcase; $foo"#, + "barBAR", + ) +} + #[test] fn string_interpolation_paren_test() -> TestResult { run_test(r#"$"('(')(')')""#, "()")