allow raw string to be used inside subexpression, list, and closure (#12776)

# Description Fixes: #12744 This pr is moving raw string lex logic into `lex_item` function, so we can use raw string inside subexpression, list, closure. ```nushell > [r#'abc'#] ╭───┬─────╮ │ 0 │ abc │ ╰───┴─────╯ > (r#'abc'#) abc > do {r#'aa'#} aa ``` # Tests + Formatting Done # After Submitting NaN
2024-10-05 01:43:35 +02:00 · 2024-05-07 04:53:58 +08:00 · 2024-05-07 04:53:58 +08:00 · 97fc190cc5
commit 97fc190cc5
parent f9d4fa2c40
2 changed files with 138 additions and 73 deletions
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@ -225,6 +225,19 @@ pub fn lex_item(
                    )),
                );
            }
+        } else if c == b'r' && input.get(*curr_offset + 1) == Some(b'#').as_ref() {
+            // already checked `r#` pattern, so it's a raw string.
+            let lex_result = lex_raw_string(input, curr_offset, span_offset);
+            let span = Span::new(span_offset + token_start, span_offset + *curr_offset);
+            if let Err(e) = lex_result {
+                return (
+                    Token {
+                        contents: TokenContents::Item,
+                        span,
+                    },
+                    Some(e),
+                );
+            }
        } else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
            break;
        }
@ -331,6 +344,65 @@ pub fn lex_item(
    (output, err)
 }

+fn lex_raw_string(
+    input: &[u8],
+    curr_offset: &mut usize,
+    span_offset: usize,
+) -> Result<(), ParseError> {
+    // A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#`
+    // If the next character is `#` we're probably looking at a raw string literal
+    // so we need to read all the text until we find a closing `#`. This raw string
+    // can contain any character, including newlines and double quotes without needing
+    // to escape them.
+    //
+    // A raw string can contain many `#` as prefix,
+    // incase if there is a `'#` or `#'` in the string itself.
+    // E.g: r##'I can use '#' in a raw string'##
+    let mut prefix_sharp_cnt = 0;
+    let start = *curr_offset;
+    while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) {
+        prefix_sharp_cnt += 1;
+    }
+
+    // curr_offset is the character `r`, we need to move forward and skip all `#`
+    // characters.
+    //
+    // e.g: r###'<body>
+    //      ^
+    //      ^
+    //   curr_offset
+    *curr_offset += prefix_sharp_cnt + 1;
+    // the next one should be a single quote.
+    if input.get(*curr_offset) != Some(&b'\'') {
+        return Err(ParseError::Expected(
+            "'",
+            Span::new(span_offset + *curr_offset, span_offset + *curr_offset + 1),
+        ));
+    }
+
+    *curr_offset += 1;
+    let mut matches = false;
+    while let Some(ch) = input.get(*curr_offset) {
+        // check for postfix '###
+        if *ch == b'#' {
+            let start_ch = input[*curr_offset - prefix_sharp_cnt];
+            let postfix = &input[*curr_offset - prefix_sharp_cnt + 1..=*curr_offset];
+            if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') {
+                matches = true;
+                break;
+            }
+        }
+        *curr_offset += 1
+    }
+    if !matches {
+        return Err(ParseError::UnexpectedEof(
+            "#".to_string(),
+            Span::new(span_offset + *curr_offset, span_offset + *curr_offset),
+        ));
+    }
+    Ok(())
+}
+
 pub fn lex_signature(
    input: &[u8],
    span_offset: usize,
@ -503,79 +575,6 @@ fn lex_internal(
        } else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
            // If the next character is non-newline whitespace, skip it.
            curr_offset += 1;
-        } else if c == b'r' {
-            // A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#`
-            // If the next character is `#` we're probably looking at a raw string literal
-            // so we need to read all the text until we find a closing `#`. This raw string
-            // can contain any character, including newlines and double quotes without needing
-            // to escape them.
-            //
-            // A raw string can contain many `#` as prefix,
-            // incase if there is a `'#` or `#'` in the string itself.
-            // E.g: r##'I can use '#' in a raw string'##
-            let mut prefix_sharp_cnt = 0;
-            let start = curr_offset;
-            while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) {
-                prefix_sharp_cnt += 1;
-            }
-
-            if prefix_sharp_cnt != 0 {
-                // curr_offset is the character `r`, we need to move forward and skip all `#`
-                // characters.
-                //
-                // e.g: r###'<body>
-                //      ^
-                //      ^
-                //   curr_offset
-                curr_offset += prefix_sharp_cnt + 1;
-                // the next one should be a single quote.
-                if input.get(curr_offset) != Some(&b'\'') {
-                    error = Some(ParseError::Expected(
-                        "'",
-                        Span::new(span_offset + curr_offset, span_offset + curr_offset + 1),
-                    ));
-                }
-
-                curr_offset += 1;
-                let mut matches = false;
-                while let Some(ch) = input.get(curr_offset) {
-                    // check for postfix '###
-                    if *ch == b'#' {
-                        let start_ch = input[curr_offset - prefix_sharp_cnt];
-                        let postfix = &input[curr_offset - prefix_sharp_cnt + 1..=curr_offset];
-                        if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') {
-                            matches = true;
-                            curr_offset += 1;
-                            break;
-                        }
-                    }
-                    curr_offset += 1
-                }
-                if matches {
-                    output.push(Token::new(
-                        TokenContents::Item,
-                        Span::new(span_offset + start, span_offset + curr_offset),
-                    ));
-                } else if error.is_none() {
-                    error = Some(ParseError::UnexpectedEof(
-                        "#".to_string(),
-                        Span::new(span_offset + curr_offset, span_offset + curr_offset),
-                    ))
-                }
-            } else {
-                let (token, err) = lex_item(
-                    input,
-                    &mut curr_offset,
-                    span_offset,
-                    additional_whitespace,
-                    special_tokens,
-                    in_signature,
-                );
-                if error.is_none() {
-                    error = err;
-                }
-                output.push(token);
-            }
        } else {
            let token = try_lex_special_piped_item(input, &mut curr_offset, span_offset);
            if let Some(token) = token {
--- a/src/tests/test_strings.rs
+++ b/src/tests/test_strings.rs
@ -87,6 +87,72 @@ fn raw_string() -> TestResult {
    )
 }

+#[test]
+fn raw_string_inside_parentheses() -> TestResult {
+    let (left, right) = ('(', ')');
+    run_test(
+        &format!(r#"{left}r#'abcde""fghi"''''jkl'#{right}"#),
+        r#"abcde""fghi"''''jkl"#,
+    )?;
+    run_test(
+        &format!(r#"{left}r##'abcde""fghi"''''#jkl'##{right}"#),
+        r#"abcde""fghi"''''#jkl"#,
+    )?;
+    run_test(
+        &format!(r#"{left}r###'abcde""fghi"'''##'#jkl'###{right}"#),
+        r#"abcde""fghi"'''##'#jkl"#,
+    )?;
+    run_test(&format!("{left}r#''#{right}"), "")?;
+    run_test(
+        &format!(r#"{left}r#'a string with sharp inside # and ends with #'#{right}"#),
+        "a string with sharp inside # and ends with #",
+    )
+}
+
+#[test]
+fn raw_string_inside_list() -> TestResult {
+    let (left, right) = ('[', ']');
+    run_test(
+        &format!(r#"{left}r#'abcde""fghi"''''jkl'#{right} | get 0"#),
+        r#"abcde""fghi"''''jkl"#,
+    )?;
+    run_test(
+        &format!(r#"{left}r##'abcde""fghi"''''#jkl'##{right} | get 0"#),
+        r#"abcde""fghi"''''#jkl"#,
+    )?;
+    run_test(
+        &format!(r#"{left}r###'abcde""fghi"'''##'#jkl'###{right} | get 0"#),
+        r#"abcde""fghi"'''##'#jkl"#,
+    )?;
+    run_test(&format!("{left}r#''#{right} | get 0"), "")?;
+    run_test(
+        &format!(r#"{left}r#'a string with sharp inside # and ends with #'#{right} | get 0"#),
+        "a string with sharp inside # and ends with #",
+    )
+}
+
+#[test]
+fn raw_string_inside_closure() -> TestResult {
+    let (left, right) = ('{', '}');
+    run_test(
+        &format!(r#"do {left}r#'abcde""fghi"''''jkl'#{right}"#),
+        r#"abcde""fghi"''''jkl"#,
+    )?;
+    run_test(
+        &format!(r#"do {left}r##'abcde""fghi"''''#jkl'##{right}"#),
+        r#"abcde""fghi"''''#jkl"#,
+    )?;
+    run_test(
+        &format!(r#"do {left}r###'abcde""fghi"'''##'#jkl'###{right}"#),
+        r#"abcde""fghi"'''##'#jkl"#,
+    )?;
+    run_test(&format!("do {left}r#''#{right}"), "")?;
+    run_test(
+        &format!(r#"do {left}r#'a string with sharp inside # and ends with #'#{right}"#),
+        "a string with sharp inside # and ends with #",
+    )
+}
+
 #[test]
 fn incomplete_raw_string() -> TestResult {
    fail_test("r#abc", "expected '")