From b1da50774a381583883f40b47938220883b404a9 Mon Sep 17 00:00:00 2001 From: RobbingDaHood <16130319+RobbingDaHood@users.noreply.github.com> Date: Wed, 25 Dec 2024 14:31:51 +0100 Subject: [PATCH] 14523 all comments should be prefixed with space tab or be beginning of token (#14616) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR should close 1. #10327 1. #13667 1. #13810 1. #14129 # Description This got reverted https://github.com/nushell/nushell/pull/14606 because the previous changes only considered space a whitespace and forgot about tabs. I now added a check for any whitespace, even if it is only those two that would be relevant. The added test failed before the changes. For `#` to start a comment, then it either need to be the first character of the token or prefixed with ` ` (space). So now you can do this: ``` ~/Projects/nushell> 1..10 | each {echo test#testing } 12/05/2024 05:37:19 PM ╭───┬──────────────╮ │ 0 │ test#testing │ │ 1 │ test#testing │ │ 2 │ test#testing │ │ 3 │ test#testing │ │ 4 │ test#testing │ │ 5 │ test#testing │ │ 6 │ test#testing │ │ 7 │ test#testing │ │ 8 │ test#testing │ │ 9 │ test#testing │ ╰───┴──────────────╯ ``` # User-Facing Changes It is a breaking change if anyone expected comments to start in the middle of a string without any prefixing ` ` (space). # Tests + Formatting Did all: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library # After Submitting I cant see that I need to update anything in [the documentation](https://github.com/nushell/nushell.github.io) but please point me in the direction if there is anything. --- crates/nu-parser/src/lex.rs | 15 ++++++----- crates/nu-parser/tests/test_lex.rs | 37 +++++++++++++++++++++++++++ tests/repl/test_parser.rs | 40 ++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 6 deletions(-) diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index f0802fcd7a..b808f6eacc 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -51,7 +51,7 @@ impl BlockKind { } // A baseline token is terminated if it's not nested inside of a paired -// delimiter and the next character is one of: `|`, `;`, `#` or any +// delimiter and the next character is one of: `|`, `;` or any // whitespace. fn is_item_terminator( block_level: &[BlockKind], @@ -115,6 +115,7 @@ pub fn lex_item( // character (whitespace, `|`, `;` or `#`) is encountered, the baseline // token is done. // - Otherwise, accumulate the character into the current baseline token. + let mut previous_char = None; while let Some(c) = input.get(*curr_offset) { let c = *c; @@ -147,11 +148,12 @@ pub fn lex_item( // Also need to check to make sure we aren't escaped quote_start = None; } - } else if c == b'#' { - if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { - break; - } - in_comment = true; + } else if c == b'#' && !in_comment { + // To start a comment, It either need to be the first character of the token or prefixed with whitespace. + in_comment = previous_char + .map(char::from) + .map(char::is_whitespace) + .unwrap_or(true); } else if c == b'\n' || c == b'\r' { in_comment = false; if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { @@ -254,6 +256,7 @@ pub fn lex_item( } *curr_offset += 1; + previous_char = Some(c); } let span = Span::new(span_offset + token_start, span_offset + *curr_offset); diff --git a/crates/nu-parser/tests/test_lex.rs b/crates/nu-parser/tests/test_lex.rs index a14843f3f0..2c1cf85bd8 100644 --- a/crates/nu-parser/tests/test_lex.rs +++ b/crates/nu-parser/tests/test_lex.rs @@ -159,6 +159,43 @@ fn lex_comment() { ); } +#[test] +fn lex_not_comment_needs_space_in_front_of_hashtag() { + let file = b"1..10 | each {echo test#testing }"; + + let output = lex(file, 0, &[], &[], false); + + assert!(output.1.is_none()); +} + +#[test] +fn lex_comment_with_space_in_front_of_hashtag() { + let file = b"1..10 | each {echo test #testing }"; + + let output = lex(file, 0, &[], &[], false); + + assert!(output.1.is_some()); + assert!(matches!( + output.1.unwrap(), + ParseError::UnexpectedEof(missing_token, span) if missing_token == "}" + && span == Span::new(33, 34) + )); +} + +#[test] +fn lex_comment_with_tab_in_front_of_hashtag() { + let file = b"1..10 | each {echo test\t#testing }"; + + let output = lex(file, 0, &[], &[], false); + + assert!(output.1.is_some()); + assert!(matches!( + output.1.unwrap(), + ParseError::UnexpectedEof(missing_token, span) if missing_token == "}" + && span == Span::new(33, 34) + )); +} + #[test] fn lex_is_incomplete() { let file = b"let x = 300 | ;"; diff --git a/tests/repl/test_parser.rs b/tests/repl/test_parser.rs index f77d431110..ddc790324a 100644 --- a/tests/repl/test_parser.rs +++ b/tests/repl/test_parser.rs @@ -169,6 +169,41 @@ fn comment_skipping_in_pipeline_3() -> TestResult { ) } +#[test] +fn still_string_if_hashtag_is_middle_of_string() -> TestResult { + run_test(r#"echo test#testing"#, "test#testing") +} + +#[test] +fn non_comment_hashtag_in_comment_does_not_stop_comment() -> TestResult { + run_test(r#"# command_bar_text: { fg: '#C4C9C6' },"#, "") +} + +#[test] +fn non_comment_hashtag_in_comment_does_not_stop_comment_in_block() -> TestResult { + run_test( + r#"{ + explore: { + # command_bar_text: { fg: '#C4C9C6' }, + } + } | get explore | is-empty"#, + "true", + ) +} + +#[test] +fn still_string_if_hashtag_is_middle_of_string_inside_each() -> TestResult { + run_test( + r#"1..1 | each {echo test#testing } | get 0"#, + "test#testing", + ) +} + +#[test] +fn still_string_if_hashtag_is_middle_of_string_inside_each_also_with_dot() -> TestResult { + run_test(r#"1..1 | each {echo '.#testing' } | get 0"#, ".#testing") +} + #[test] fn bad_var_name() -> TestResult { fail_test(r#"let $"foo bar" = 4"#, "can't contain") @@ -282,6 +317,11 @@ fn raw_string_with_equals() -> TestResult { ) } +#[test] +fn raw_string_with_hashtag() -> TestResult { + run_test(r#"r##' one # two '##"#, "one # two") +} + #[test] fn list_quotes_with_equals() -> TestResult { run_test(