Add support for escape characters, make nuon a JSON superset (#4706)

* WIP * Finish adding escape support in strings * Try to fix windows
2025-08-12 13:17:36 +02:00 · 2022-03-03 13:14:03 -05:00
parent 13f2048ffb
commit 7d0531d270
10 changed files with 271 additions and 78 deletions
--- a/crates/nu-cli/src/completions.rs
+++ b/crates/nu-cli/src/completions.rs
@ -563,7 +563,7 @@ fn file_path_completion(
 ) -> Vec<(nu_protocol::Span, String)> {
    use std::path::{is_separator, Path};
-    let partial = partial.replace('\"', "");
+    let partial = partial.replace('\'', "");
    let (base_dir_name, partial) = {
        // If partial is only a word we want to search in the current dir
@ -596,7 +596,7 @@ fn file_path_completion(
                        }
                        if path.contains(' ') {
-                            path = format!("\"{}\"", path);
+                            path = format!("\'{}\'", path);
                        }
                        Some((span, path))
--- a/crates/nu-command/src/conversions/into/duration.rs
+++ b/crates/nu-command/src/conversions/into/duration.rs
@ -226,7 +226,7 @@ mod test {
        let span = Span::test_data();
        let word = Value::test_string("1sec");
        let expected = Value::Duration {
-            val: 1 * 1000 * 1000 * 1000,
+            val: 1000 * 1000 * 1000,
            span,
        };
--- a/crates/nu-command/src/strings/parse.rs
+++ b/crates/nu-command/src/strings/parse.rs
@ -48,7 +48,7 @@ impl Command for Parse {
            },
            Example {
                description: "Parse a string using regex pattern",
-                example: "echo \"hi there\" | parse -r \"(?P<foo>\\w+) (?P<bar>\\w+)\"",
+                example: "echo \"hi there\" | parse -r '(?P<foo>\\w+) (?P<bar>\\w+)'",
                result: Some(result),
            },
        ]
--- a/crates/nu-command/tests/commands/parse.rs
+++ b/crates/nu-command/tests/commands/parse.rs
@ -123,7 +123,7 @@ mod regex {
                cwd: dirs.test(), pipeline(
                r#"
                    open nushell_git_log_oneline.txt
-                    | parse --regex "(?P<Hash>\w+) (?P<Message>.+) \(#(?P<PR>\d+)\)"
+                    | parse --regex "(?P<Hash>\\w+) (?P<Message>.+) \\(#(?P<PR>\\d+)\\)"
                    | get 1
                    | get PR
                "#
@ -142,7 +142,7 @@ mod regex {
                cwd: dirs.test(), pipeline(
                r#"
                    open nushell_git_log_oneline.txt
-                    | parse --regex "(\w+) (.+) \(#(\d+)\)"
+                    | parse --regex "(\\w+) (.+) \\(#(\\d+)\\)"
                    | get 1
                    | get Capture1
                "#
@ -161,7 +161,7 @@ mod regex {
                cwd: dirs.test(), pipeline(
                r#"
                    open nushell_git_log_oneline.txt
-                    | parse --regex "(?P<Hash>\w+) (.+) \(#(?P<PR>\d+)\)"
+                    | parse --regex "(?P<Hash>\\w+) (.+) \\(#(?P<PR>\\d+)\\)"
                    | get 1
                    | get Capture2
                "#
@ -180,7 +180,7 @@ mod regex {
                cwd: dirs.test(), pipeline(
                r#"
                    open nushell_git_log_oneline.txt
-                    | parse --regex "(?P<Hash>\w+ unfinished capture group"
+                    | parse --regex "(?P<Hash>\\w+ unfinished capture group"
                "#
            ));
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@ -89,7 +89,7 @@ pub fn lex_item(
    // The process of slurping up a baseline token repeats:
    //
-    // - String literal, which begins with `'`, `"` or `\``, and continues until
+    // - String literal, which begins with `'` or `"`, and continues until
    //   the same character is encountered again.
    // - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until
    //   the matching closing delimiter is found, skipping comments and string
@ -101,10 +101,33 @@ pub fn lex_item(
    while let Some(c) = input.get(*curr_offset) {
        let c = *c;
-        if quote_start.is_some() {
+        if let Some(start) = quote_start {
            // Check if we're in an escape sequence
            if c == b'\\' && start == b'"' {
                // Go ahead and consume the escape character if possible
                if input.get(*curr_offset + 1).is_some() {
                    // Successfully escaped the character
                    *curr_offset += 2;
                    continue;
                } else {
                    let span = Span::new(span_offset + token_start, span_offset + *curr_offset);
                    return (
                        span,
                        Some(ParseError::UnexpectedEof(
                            (start as char).to_string(),
                            Span {
                                start: span.end,
                                end: span.end,
                            },
                        )),
                    );
                }
            }
            // If we encountered the closing quote character for the current
            // string, we're done with the current string.
-            if Some(c) == quote_start {
+            if c == start {
                // Also need to check to make sure we aren't escaped
                quote_start = None;
            }
        } else if c == b'#' {
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -49,7 +49,7 @@ pub fn is_math_expression_like(bytes: &[u8]) -> bool {
        return false;
    }
-    if bytes == b"true" || bytes == b"false" {
+    if bytes == b"true" || bytes == b"false" || bytes == b"null" {
        return true;
    }
@ -1346,7 +1346,10 @@ pub fn parse_string_interpolation(
    let contents = working_set.get_span_contents(span);
    let mut double_quote = false;
    let (start, end) = if contents.starts_with(b"$\"") {
        double_quote = true;
        let end = if contents.ends_with(b"\"") && contents.len() > 2 {
            span.end - 1
        } else {
@ -1384,8 +1387,18 @@ pub fn parse_string_interpolation(
                    end: b,
                };
                let str_contents = working_set.get_span_contents(span);
                let str_contents = if double_quote {
                    let (str_contents, err) = unescape_string(str_contents, span);
                    error = error.or(err);
                    str_contents
                } else {
                    str_contents.to_vec()
                };
                output.push(Expression {
-                    expr: Expr::String(String::from_utf8_lossy(str_contents).to_string()),
+                    expr: Expr::String(String::from_utf8_lossy(&str_contents).to_string()),
                    span,
                    ty: Type::String,
                    custom_completion: None,
@ -2116,6 +2129,151 @@ pub fn parse_glob_pattern(
    }
 }
 pub fn unescape_string(bytes: &[u8], span: Span) -> (Vec<u8>, Option<ParseError>) {
    let mut output = Vec::new();
    let mut idx = 0;
    let mut err = None;
    while idx < bytes.len() {
        if bytes[idx] == b'\\' {
            // We're in an escape
            idx += 1;
            match bytes.get(idx) {
                Some(b'"') => {
                    output.push(b'"');
                    idx += 1;
                }
                Some(b'\'') => {
                    output.push(b'\'');
                    idx += 1;
                }
                Some(b'\\') => {
                    output.push(b'\\');
                    idx += 1;
                }
                Some(b'/') => {
                    output.push(b'/');
                    idx += 1;
                }
                Some(b'b') => {
                    output.push(0x8);
                    idx += 1;
                }
                Some(b'f') => {
                    output.push(0xc);
                    idx += 1;
                }
                Some(b'n') => {
                    output.push(b'\n');
                    idx += 1;
                }
                Some(b'r') => {
                    output.push(b'\r');
                    idx += 1;
                }
                Some(b't') => {
                    output.push(b'\t');
                    idx += 1;
                }
                Some(b'u') => {
                    match (
                        bytes.get(idx + 1),
                        bytes.get(idx + 2),
                        bytes.get(idx + 3),
                        bytes.get(idx + 4),
                    ) {
                        (Some(h1), Some(h2), Some(h3), Some(h4)) => {
                            let s = String::from_utf8(vec![*h1, *h2, *h3, *h4]);
                            if let Ok(s) = s {
                                let int = u32::from_str_radix(&s, 16);
                                if let Ok(int) = int {
                                    let result = char::from_u32(int);
                                    if let Some(result) = result {
                                        let mut buffer = vec![0; 4];
                                        let result = result.encode_utf8(&mut buffer);
                                        for elem in result.bytes() {
                                            output.push(elem);
                                        }
                                        idx += 5;
                                        continue;
                                    }
                                }
                            }
                            err = Some(ParseError::Expected(
                                "unicode hex value".into(),
                                Span {
                                    start: (span.start + idx),
                                    end: span.end,
                                },
                            ));
                        }
                        _ => {
                            err = Some(ParseError::Expected(
                                "unicode hex value".into(),
                                Span {
                                    start: (span.start + idx),
                                    end: span.end,
                                },
                            ));
                        }
                    }
                    idx += 5;
                }
                _ => {
                    err = Some(ParseError::Expected(
                        "supported escape character".into(),
                        Span {
                            start: (span.start + idx),
                            end: span.end,
                        },
                    ));
                }
            }
        } else {
            output.push(bytes[idx]);
            idx += 1;
        }
    }
    (output, err)
 }
 pub fn unescape_unquote_string(bytes: &[u8], span: Span) -> (String, Option<ParseError>) {
    if bytes.starts_with(b"\"") {
        // Needs unescaping
        let bytes = trim_quotes(bytes);
        let (bytes, err) = unescape_string(bytes, span);
        if let Ok(token) = String::from_utf8(bytes) {
            (token, err)
        } else {
            (
                String::new(),
                Some(ParseError::Expected("string".into(), span)),
            )
        }
    } else {
        let bytes = trim_quotes(bytes);
        if let Ok(token) = String::from_utf8(bytes.into()) {
            (token, None)
        } else {
            (
                String::new(),
                Some(ParseError::Expected("string".into(), span)),
            )
        }
    }
 }
 pub fn parse_string(
    working_set: &mut StateWorkingSet,
    span: Span,
@ -2124,26 +2282,17 @@ pub fn parse_string(
    let bytes = working_set.get_span_contents(span);
-    let bytes = trim_quotes(bytes);
+    let (s, err) = unescape_unquote_string(bytes, span);
    if let Ok(token) = String::from_utf8(bytes.into()) {
        trace!("-- found {}", token);
    (
        Expression {
-                expr: Expr::String(token),
+            expr: Expr::String(s),
            span,
            ty: Type::String,
            custom_completion: None,
        },
-            None,
+        err,
    )
    } else {
        (
            garbage(span),
            Some(ParseError::Expected("string".into(), span)),
        )
    }
 }
 pub fn parse_string_strict(
@ -3259,7 +3408,9 @@ pub fn parse_value(
        return parse_variable_expr(working_set, span);
    }
-    if bytes == b"true" {
+    // Check for reserved keyword values
    match bytes {
        b"true" => {
            if matches!(shape, SyntaxShape::Boolean) || matches!(shape, SyntaxShape::Any) {
                return (
                    Expression {
@ -3276,7 +3427,8 @@ pub fn parse_value(
                    Some(ParseError::Expected("non-boolean value".into(), span)),
                );
            }
-    } else if bytes == b"false" {
+        }
        b"false" => {
            if matches!(shape, SyntaxShape::Boolean) || matches!(shape, SyntaxShape::Any) {
                return (
                    Expression {
@ -3294,6 +3446,21 @@ pub fn parse_value(
                );
            }
        }
        b"null" => {
            return (
                Expression {
                    expr: Expr::Nothing,
                    span,
                    ty: Type::Nothing,
                    custom_completion: None,
                },
                None,
            );
        }
        _ => {}
    }
    match bytes[0] {
        b'$' => return parse_dollar_expr(working_set, span),
        b'(' => {
@ -3351,18 +3518,6 @@ pub fn parse_value(
        SyntaxShape::GlobPattern => parse_glob_pattern(working_set, span),
        SyntaxShape::String => parse_string(working_set, span),
        SyntaxShape::Binary => parse_binary(working_set, span),
        SyntaxShape::Block(_) => {
            if bytes.starts_with(b"{") {
                trace!("parsing value as a block expression");
                parse_block_expression(working_set, shape, span)
            } else {
                (
                    Expression::garbage(span),
                    Some(ParseError::Expected("block".into(), span)),
                )
            }
        }
        SyntaxShape::Signature => {
            if bytes.starts_with(b"[") {
                parse_signature(working_set, span)
@ -3447,6 +3602,7 @@ pub fn parse_value(
                    SyntaxShape::DateTime,
                    SyntaxShape::Filesize,
                    SyntaxShape::Duration,
                    SyntaxShape::Record,
                    SyntaxShape::Block(None),
                    SyntaxShape::String,
                ];
--- a/docs/sample_config/default_config.nu
+++ b/docs/sample_config/default_config.nu
@ -45,7 +45,7 @@ let-env ENV_CONVERSIONS = {
 #
 # This is a simplified version of completions for git branches and git remotes
 def "nu-complete git branches" [] {
-  ^git branch | lines | each { |line| $line | str find-replace "\* " "" | str trim }
+  ^git branch | lines | each { |line| $line | str find-replace '\* ' '' | str trim }
 }
 def "nu-complete git remotes" [] {
--- a/src/eval_file.rs
+++ b/src/eval_file.rs
@ -55,22 +55,26 @@ pub(crate) fn evaluate(
    if working_set.find_decl(b"main").is_some() {
        let args = format!("main {}", args.join(" "));
-        eval_source(
+        if !eval_source(
            engine_state,
            &mut stack,
            &file,
            &path,
            PipelineData::new(Span::new(0, 0)),
-        );
+        ) {
-        eval_source(
+            std::process::exit(1);
        }
        if !eval_source(
            engine_state,
            &mut stack,
            args.as_bytes(),
            "<commandline>",
            input,
-        );
+        ) {
-    } else {
+            std::process::exit(1);
-        eval_source(engine_state, &mut stack, &file, &path, input);
+        }
    } else if !eval_source(engine_state, &mut stack, &file, &path, input) {
        std::process::exit(1);
    }
    if is_perf_true() {
--- a/src/tests/test_parser.rs
+++ b/src/tests/test_parser.rs
@ -329,3 +329,13 @@ fn block_arity_check2() -> TestResult {
 fn block_arity_check3() -> TestResult {
    fail_test(r#"ls | each { |x, y| 1}"#, "expected 1 block parameter")
 }
 #[test]
 fn string_escape() -> TestResult {
    run_test(r#""\u015B""#, "ś")
 }
 #[test]
 fn string_escape_interpolation() -> TestResult {
    run_test(r#"$"\u015B(char hamburger)abc""#, "ś≡abc")
 }
--- a/src/utils.rs
+++ b/src/utils.rs
@ -23,7 +23,7 @@ pub(crate) fn gather_parent_env_vars(engine_state: &mut EngineState) {
                Some('\'')
            }
        } else {
-            Some('"')
+            Some('\'')
        }
    }