From 7d0531d27054cb32262549e4befb3b5a406d37c7 Mon Sep 17 00:00:00 2001 From: JT <547158+jntrnr@users.noreply.github.com> Date: Thu, 3 Mar 2022 13:14:03 -0500 Subject: [PATCH] Add support for escape characters, make nuon a JSON superset (#4706) * WIP * Finish adding escape support in strings * Try to fix windows --- crates/nu-cli/src/completions.rs | 4 +- .../src/conversions/into/duration.rs | 2 +- crates/nu-command/src/strings/parse.rs | 2 +- crates/nu-command/tests/commands/parse.rs | 8 +- crates/nu-parser/src/lex.rs | 29 +- crates/nu-parser/src/parser.rs | 274 ++++++++++++++---- docs/sample_config/default_config.nu | 2 +- src/eval_file.rs | 16 +- src/tests/test_parser.rs | 10 + src/utils.rs | 2 +- 10 files changed, 271 insertions(+), 78 deletions(-) diff --git a/crates/nu-cli/src/completions.rs b/crates/nu-cli/src/completions.rs index a5044c0a2..47816b6df 100644 --- a/crates/nu-cli/src/completions.rs +++ b/crates/nu-cli/src/completions.rs @@ -563,7 +563,7 @@ fn file_path_completion( ) -> Vec<(nu_protocol::Span, String)> { use std::path::{is_separator, Path}; - let partial = partial.replace('\"', ""); + let partial = partial.replace('\'', ""); let (base_dir_name, partial) = { // If partial is only a word we want to search in the current dir @@ -596,7 +596,7 @@ fn file_path_completion( } if path.contains(' ') { - path = format!("\"{}\"", path); + path = format!("\'{}\'", path); } Some((span, path)) diff --git a/crates/nu-command/src/conversions/into/duration.rs b/crates/nu-command/src/conversions/into/duration.rs index 5b6cdb211..ebf175a7b 100644 --- a/crates/nu-command/src/conversions/into/duration.rs +++ b/crates/nu-command/src/conversions/into/duration.rs @@ -226,7 +226,7 @@ mod test { let span = Span::test_data(); let word = Value::test_string("1sec"); let expected = Value::Duration { - val: 1 * 1000 * 1000 * 1000, + val: 1000 * 1000 * 1000, span, }; diff --git a/crates/nu-command/src/strings/parse.rs b/crates/nu-command/src/strings/parse.rs index 874f8b06c..528504f37 100644 --- a/crates/nu-command/src/strings/parse.rs +++ b/crates/nu-command/src/strings/parse.rs @@ -48,7 +48,7 @@ impl Command for Parse { }, Example { description: "Parse a string using regex pattern", - example: "echo \"hi there\" | parse -r \"(?P\\w+) (?P\\w+)\"", + example: "echo \"hi there\" | parse -r '(?P\\w+) (?P\\w+)'", result: Some(result), }, ] diff --git a/crates/nu-command/tests/commands/parse.rs b/crates/nu-command/tests/commands/parse.rs index b783b97bd..081391fcf 100644 --- a/crates/nu-command/tests/commands/parse.rs +++ b/crates/nu-command/tests/commands/parse.rs @@ -123,7 +123,7 @@ mod regex { cwd: dirs.test(), pipeline( r#" open nushell_git_log_oneline.txt - | parse --regex "(?P\w+) (?P.+) \(#(?P\d+)\)" + | parse --regex "(?P\\w+) (?P.+) \\(#(?P\\d+)\\)" | get 1 | get PR "# @@ -142,7 +142,7 @@ mod regex { cwd: dirs.test(), pipeline( r#" open nushell_git_log_oneline.txt - | parse --regex "(\w+) (.+) \(#(\d+)\)" + | parse --regex "(\\w+) (.+) \\(#(\\d+)\\)" | get 1 | get Capture1 "# @@ -161,7 +161,7 @@ mod regex { cwd: dirs.test(), pipeline( r#" open nushell_git_log_oneline.txt - | parse --regex "(?P\w+) (.+) \(#(?P\d+)\)" + | parse --regex "(?P\\w+) (.+) \\(#(?P\\d+)\\)" | get 1 | get Capture2 "# @@ -180,7 +180,7 @@ mod regex { cwd: dirs.test(), pipeline( r#" open nushell_git_log_oneline.txt - | parse --regex "(?P\w+ unfinished capture group" + | parse --regex "(?P\\w+ unfinished capture group" "# )); diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index cac85e404..5d0916326 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -89,7 +89,7 @@ pub fn lex_item( // The process of slurping up a baseline token repeats: // - // - String literal, which begins with `'`, `"` or `\``, and continues until + // - String literal, which begins with `'` or `"`, and continues until // the same character is encountered again. // - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until // the matching closing delimiter is found, skipping comments and string @@ -101,10 +101,33 @@ pub fn lex_item( while let Some(c) = input.get(*curr_offset) { let c = *c; - if quote_start.is_some() { + if let Some(start) = quote_start { + // Check if we're in an escape sequence + if c == b'\\' && start == b'"' { + // Go ahead and consume the escape character if possible + if input.get(*curr_offset + 1).is_some() { + // Successfully escaped the character + *curr_offset += 2; + continue; + } else { + let span = Span::new(span_offset + token_start, span_offset + *curr_offset); + + return ( + span, + Some(ParseError::UnexpectedEof( + (start as char).to_string(), + Span { + start: span.end, + end: span.end, + }, + )), + ); + } + } // If we encountered the closing quote character for the current // string, we're done with the current string. - if Some(c) == quote_start { + if c == start { + // Also need to check to make sure we aren't escaped quote_start = None; } } else if c == b'#' { diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 27265569e..90eb16994 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -49,7 +49,7 @@ pub fn is_math_expression_like(bytes: &[u8]) -> bool { return false; } - if bytes == b"true" || bytes == b"false" { + if bytes == b"true" || bytes == b"false" || bytes == b"null" { return true; } @@ -1346,7 +1346,10 @@ pub fn parse_string_interpolation( let contents = working_set.get_span_contents(span); + let mut double_quote = false; + let (start, end) = if contents.starts_with(b"$\"") { + double_quote = true; let end = if contents.ends_with(b"\"") && contents.len() > 2 { span.end - 1 } else { @@ -1384,8 +1387,18 @@ pub fn parse_string_interpolation( end: b, }; let str_contents = working_set.get_span_contents(span); + + let str_contents = if double_quote { + let (str_contents, err) = unescape_string(str_contents, span); + error = error.or(err); + + str_contents + } else { + str_contents.to_vec() + }; + output.push(Expression { - expr: Expr::String(String::from_utf8_lossy(str_contents).to_string()), + expr: Expr::String(String::from_utf8_lossy(&str_contents).to_string()), span, ty: Type::String, custom_completion: None, @@ -2116,6 +2129,151 @@ pub fn parse_glob_pattern( } } +pub fn unescape_string(bytes: &[u8], span: Span) -> (Vec, Option) { + let mut output = Vec::new(); + + let mut idx = 0; + let mut err = None; + + while idx < bytes.len() { + if bytes[idx] == b'\\' { + // We're in an escape + idx += 1; + + match bytes.get(idx) { + Some(b'"') => { + output.push(b'"'); + idx += 1; + } + Some(b'\'') => { + output.push(b'\''); + idx += 1; + } + Some(b'\\') => { + output.push(b'\\'); + idx += 1; + } + Some(b'/') => { + output.push(b'/'); + idx += 1; + } + Some(b'b') => { + output.push(0x8); + idx += 1; + } + Some(b'f') => { + output.push(0xc); + idx += 1; + } + Some(b'n') => { + output.push(b'\n'); + idx += 1; + } + Some(b'r') => { + output.push(b'\r'); + idx += 1; + } + Some(b't') => { + output.push(b'\t'); + idx += 1; + } + Some(b'u') => { + match ( + bytes.get(idx + 1), + bytes.get(idx + 2), + bytes.get(idx + 3), + bytes.get(idx + 4), + ) { + (Some(h1), Some(h2), Some(h3), Some(h4)) => { + let s = String::from_utf8(vec![*h1, *h2, *h3, *h4]); + + if let Ok(s) = s { + let int = u32::from_str_radix(&s, 16); + + if let Ok(int) = int { + let result = char::from_u32(int); + + if let Some(result) = result { + let mut buffer = vec![0; 4]; + let result = result.encode_utf8(&mut buffer); + + for elem in result.bytes() { + output.push(elem); + } + + idx += 5; + continue; + } + } + } + err = Some(ParseError::Expected( + "unicode hex value".into(), + Span { + start: (span.start + idx), + end: span.end, + }, + )); + } + _ => { + err = Some(ParseError::Expected( + "unicode hex value".into(), + Span { + start: (span.start + idx), + end: span.end, + }, + )); + } + } + idx += 5; + } + _ => { + err = Some(ParseError::Expected( + "supported escape character".into(), + Span { + start: (span.start + idx), + end: span.end, + }, + )); + } + } + } else { + output.push(bytes[idx]); + idx += 1; + } + } + + (output, err) +} + +pub fn unescape_unquote_string(bytes: &[u8], span: Span) -> (String, Option) { + if bytes.starts_with(b"\"") { + // Needs unescaping + let bytes = trim_quotes(bytes); + + let (bytes, err) = unescape_string(bytes, span); + + if let Ok(token) = String::from_utf8(bytes) { + (token, err) + } else { + ( + String::new(), + Some(ParseError::Expected("string".into(), span)), + ) + } + } else { + let bytes = trim_quotes(bytes); + + if let Ok(token) = String::from_utf8(bytes.into()) { + (token, None) + } else { + ( + String::new(), + Some(ParseError::Expected("string".into(), span)), + ) + } + } +} + pub fn parse_string( working_set: &mut StateWorkingSet, span: Span, @@ -2124,26 +2282,17 @@ pub fn parse_string( let bytes = working_set.get_span_contents(span); - let bytes = trim_quotes(bytes); + let (s, err) = unescape_unquote_string(bytes, span); - if let Ok(token) = String::from_utf8(bytes.into()) { - trace!("-- found {}", token); - - ( - Expression { - expr: Expr::String(token), - span, - ty: Type::String, - custom_completion: None, - }, - None, - ) - } else { - ( - garbage(span), - Some(ParseError::Expected("string".into(), span)), - ) - } + ( + Expression { + expr: Expr::String(s), + span, + ty: Type::String, + custom_completion: None, + }, + err, + ) } pub fn parse_string_strict( @@ -3259,41 +3408,59 @@ pub fn parse_value( return parse_variable_expr(working_set, span); } - if bytes == b"true" { - if matches!(shape, SyntaxShape::Boolean) || matches!(shape, SyntaxShape::Any) { + // Check for reserved keyword values + match bytes { + b"true" => { + if matches!(shape, SyntaxShape::Boolean) || matches!(shape, SyntaxShape::Any) { + return ( + Expression { + expr: Expr::Bool(true), + span, + ty: Type::Bool, + custom_completion: None, + }, + None, + ); + } else { + return ( + Expression::garbage(span), + Some(ParseError::Expected("non-boolean value".into(), span)), + ); + } + } + b"false" => { + if matches!(shape, SyntaxShape::Boolean) || matches!(shape, SyntaxShape::Any) { + return ( + Expression { + expr: Expr::Bool(false), + span, + ty: Type::Bool, + custom_completion: None, + }, + None, + ); + } else { + return ( + Expression::garbage(span), + Some(ParseError::Expected("non-boolean value".into(), span)), + ); + } + } + b"null" => { return ( Expression { - expr: Expr::Bool(true), + expr: Expr::Nothing, span, - ty: Type::Bool, + ty: Type::Nothing, custom_completion: None, }, None, ); - } else { - return ( - Expression::garbage(span), - Some(ParseError::Expected("non-boolean value".into(), span)), - ); - } - } else if bytes == b"false" { - if matches!(shape, SyntaxShape::Boolean) || matches!(shape, SyntaxShape::Any) { - return ( - Expression { - expr: Expr::Bool(false), - span, - ty: Type::Bool, - custom_completion: None, - }, - None, - ); - } else { - return ( - Expression::garbage(span), - Some(ParseError::Expected("non-boolean value".into(), span)), - ); } + + _ => {} } + match bytes[0] { b'$' => return parse_dollar_expr(working_set, span), b'(' => { @@ -3351,18 +3518,6 @@ pub fn parse_value( SyntaxShape::GlobPattern => parse_glob_pattern(working_set, span), SyntaxShape::String => parse_string(working_set, span), SyntaxShape::Binary => parse_binary(working_set, span), - SyntaxShape::Block(_) => { - if bytes.starts_with(b"{") { - trace!("parsing value as a block expression"); - - parse_block_expression(working_set, shape, span) - } else { - ( - Expression::garbage(span), - Some(ParseError::Expected("block".into(), span)), - ) - } - } SyntaxShape::Signature => { if bytes.starts_with(b"[") { parse_signature(working_set, span) @@ -3447,6 +3602,7 @@ pub fn parse_value( SyntaxShape::DateTime, SyntaxShape::Filesize, SyntaxShape::Duration, + SyntaxShape::Record, SyntaxShape::Block(None), SyntaxShape::String, ]; diff --git a/docs/sample_config/default_config.nu b/docs/sample_config/default_config.nu index b2c6f0db1..bd93297f6 100644 --- a/docs/sample_config/default_config.nu +++ b/docs/sample_config/default_config.nu @@ -45,7 +45,7 @@ let-env ENV_CONVERSIONS = { # # This is a simplified version of completions for git branches and git remotes def "nu-complete git branches" [] { - ^git branch | lines | each { |line| $line | str find-replace "\* " "" | str trim } + ^git branch | lines | each { |line| $line | str find-replace '\* ' '' | str trim } } def "nu-complete git remotes" [] { diff --git a/src/eval_file.rs b/src/eval_file.rs index d606520f8..d96f95f40 100644 --- a/src/eval_file.rs +++ b/src/eval_file.rs @@ -55,22 +55,26 @@ pub(crate) fn evaluate( if working_set.find_decl(b"main").is_some() { let args = format!("main {}", args.join(" ")); - eval_source( + if !eval_source( engine_state, &mut stack, &file, &path, PipelineData::new(Span::new(0, 0)), - ); - eval_source( + ) { + std::process::exit(1); + } + if !eval_source( engine_state, &mut stack, args.as_bytes(), "", input, - ); - } else { - eval_source(engine_state, &mut stack, &file, &path, input); + ) { + std::process::exit(1); + } + } else if !eval_source(engine_state, &mut stack, &file, &path, input) { + std::process::exit(1); } if is_perf_true() { diff --git a/src/tests/test_parser.rs b/src/tests/test_parser.rs index ec04eeaba..ac498985d 100644 --- a/src/tests/test_parser.rs +++ b/src/tests/test_parser.rs @@ -329,3 +329,13 @@ fn block_arity_check2() -> TestResult { fn block_arity_check3() -> TestResult { fail_test(r#"ls | each { |x, y| 1}"#, "expected 1 block parameter") } + +#[test] +fn string_escape() -> TestResult { + run_test(r#""\u015B""#, "ś") +} + +#[test] +fn string_escape_interpolation() -> TestResult { + run_test(r#"$"\u015B(char hamburger)abc""#, "ś≡abc") +} diff --git a/src/utils.rs b/src/utils.rs index 1cf088bd0..bf0019030 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -23,7 +23,7 @@ pub(crate) fn gather_parent_env_vars(engine_state: &mut EngineState) { Some('\'') } } else { - Some('"') + Some('\'') } }