add raw-string literal support (#9956)

# Description This PR adds raw string support by using `r#` at the beginning of single quoted strings and `#` at the end. Notice that escapes do not process, even within single quotes, parentheses don't mean anything, $variables don't mean anything. It's just a string. ```nushell ❯ echo r#'one\ntwo (blah) ($var)'# one\ntwo (blah) ($var) ``` Notice how they work without `echo` or `print` and how they work without carriage returns. ```nushell ❯ r#'adsfa'# adsfa ❯ r##"asdfa'@qpejq'## asdfa'@qpejq ❯ r#'asdfasdfasf ∙ foqwejfqo@'23rfjqf'# ``` They also have a special configurable color in the repl. (use single quotes though) ![image](https://github.com/nushell/nushell/assets/343840/8780e21d-de4c-45b3-9880-2425f5fe10ef) They should work like rust raw literals and allow `r##`, `r###`, `r####`, etc, to help with having one or many `#`'s in the middle of your raw-string. They should work with `let` as well. ```nushell r#'some\nraw\nstring'# | str upcase ``` closes https://github.com/nushell/nushell/issues/5091 # User-Facing Changes  # Tests + Formatting  # After Submitting  --------- Co-authored-by: WindSoilder <WindSoilder@outlook.com> Co-authored-by: Ian Manske <ian.manske@pm.me>
2025-06-30 22:50:14 +02:00 · 2024-05-02 09:36:37 -04:00
parent b5741ef14b
commit 8ed0d84d6a
17 changed files with 241 additions and 3 deletions
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -66,6 +66,11 @@ pub fn is_math_expression_like(working_set: &mut StateWorkingSet, span: Span) ->

    let b = bytes[0];

+    // check for raw string
+    if bytes.starts_with(b"r#") {
+        return true;
+    }
+
    if b == b'(' || b == b'{' || b == b'[' || b == b'$' || b == b'"' || b == b'\'' || b == b'-' {
        return true;
    }
@ -578,6 +583,7 @@ pub fn parse_multispan_value(
    spans_idx: &mut usize,
    shape: &SyntaxShape,
 ) -> Expression {
+    trace!("parse multispan value");
    match shape {
        SyntaxShape::VarWithOptType => {
            trace!("parsing: var with opt type");
@ -1565,6 +1571,66 @@ pub(crate) fn parse_dollar_expr(working_set: &mut StateWorkingSet, span: Span) -
    }
 }

+pub fn parse_raw_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
+    trace!("parsing: raw-string, with required delimiters");
+
+    let bytes = working_set.get_span_contents(span);
+
+    let prefix_sharp_cnt = if bytes.starts_with(b"r#") {
+        // actually `sharp_cnt` is always `index - 1`
+        // but create a variable here to make it clearer.
+        let mut sharp_cnt = 1;
+        let mut index = 2;
+        while index < bytes.len() && bytes[index] == b'#' {
+            index += 1;
+            sharp_cnt += 1;
+        }
+        sharp_cnt
+    } else {
+        working_set.error(ParseError::Expected("r#", span));
+        return garbage(span);
+    };
+    let expect_postfix_sharp_cnt = prefix_sharp_cnt;
+    // check the length of whole raw string.
+    // the whole raw string should contains at least
+    // 1(r) + prefix_sharp_cnt + 1(') + 1(') + postfix_sharp characters
+    if bytes.len() < prefix_sharp_cnt + expect_postfix_sharp_cnt + 3 {
+        working_set.error(ParseError::Unclosed('\''.into(), span));
+        return garbage(span);
+    }
+
+    // check for unbalanced # and single quotes.
+    let postfix_bytes = &bytes[bytes.len() - expect_postfix_sharp_cnt..bytes.len()];
+    if postfix_bytes.iter().any(|b| *b != b'#') {
+        working_set.error(ParseError::Unbalanced(
+            "prefix #".to_string(),
+            "postfix #".to_string(),
+            span,
+        ));
+        return garbage(span);
+    }
+    // check for unblanaced single quotes.
+    if bytes[1 + prefix_sharp_cnt] != b'\''
+        || bytes[bytes.len() - expect_postfix_sharp_cnt - 1] != b'\''
+    {
+        working_set.error(ParseError::Unclosed('\''.into(), span));
+        return garbage(span);
+    }
+
+    let bytes = &bytes[prefix_sharp_cnt + 1 + 1..bytes.len() - 1 - prefix_sharp_cnt];
+    if let Ok(token) = String::from_utf8(bytes.into()) {
+        Expression {
+            expr: Expr::RawString(token),
+            span,
+            ty: Type::String,
+            custom_completion: None,
+        }
+    } else {
+        working_set.error(ParseError::Expected("utf8 raw-string", span));
+        garbage(span)
+    }
+}
+
 pub fn parse_paren_expr(
    working_set: &mut StateWorkingSet,
    span: Span,
@ -4553,6 +4619,9 @@ pub fn parse_value(
                return Expression::garbage(span);
            }
        },
+        b'r' if bytes.len() > 1 && bytes[1] == b'#' => {
+            return parse_raw_string(working_set, span);
+        }
        _ => {}
    }

@ -6075,6 +6144,7 @@ pub fn discover_captures_in_expr(
            }
        }
        Expr::String(_) => {}
+        Expr::RawString(_) => {}
        Expr::StringInterpolation(exprs) => {
            for expr in exprs {
                discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
@ -6236,6 +6306,7 @@ pub fn parse(
    contents: &[u8],
    scoped: bool,
 ) -> Arc<Block> {
+    trace!("parse");
    let name = match fname {
        Some(fname) => {
            // use the canonical name for this filename
@ -6253,9 +6324,13 @@ pub fn parse(

    let mut output = {
        if let Some(block) = previously_parsed_block {
+            // dbg!("previous block");
            return block;
        } else {
+            // dbg!("starting lex");
            let (output, err) = lex(contents, new_span.start, &[], &[], false);
+            // dbg!("finished lex");
+            // dbg!(&output);
            if let Some(err) = err {
                working_set.error(err)
            }