mirror of
https://github.com/nushell/nushell.git
synced 2025-04-17 09:48:19 +02:00
Add support for escape characters, make nuon a JSON superset (#4706)
* WIP * Finish adding escape support in strings * Try to fix windows
This commit is contained in:
parent
13f2048ffb
commit
7d0531d270
@ -563,7 +563,7 @@ fn file_path_completion(
|
|||||||
) -> Vec<(nu_protocol::Span, String)> {
|
) -> Vec<(nu_protocol::Span, String)> {
|
||||||
use std::path::{is_separator, Path};
|
use std::path::{is_separator, Path};
|
||||||
|
|
||||||
let partial = partial.replace('\"', "");
|
let partial = partial.replace('\'', "");
|
||||||
|
|
||||||
let (base_dir_name, partial) = {
|
let (base_dir_name, partial) = {
|
||||||
// If partial is only a word we want to search in the current dir
|
// If partial is only a word we want to search in the current dir
|
||||||
@ -596,7 +596,7 @@ fn file_path_completion(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if path.contains(' ') {
|
if path.contains(' ') {
|
||||||
path = format!("\"{}\"", path);
|
path = format!("\'{}\'", path);
|
||||||
}
|
}
|
||||||
|
|
||||||
Some((span, path))
|
Some((span, path))
|
||||||
|
@ -226,7 +226,7 @@ mod test {
|
|||||||
let span = Span::test_data();
|
let span = Span::test_data();
|
||||||
let word = Value::test_string("1sec");
|
let word = Value::test_string("1sec");
|
||||||
let expected = Value::Duration {
|
let expected = Value::Duration {
|
||||||
val: 1 * 1000 * 1000 * 1000,
|
val: 1000 * 1000 * 1000,
|
||||||
span,
|
span,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ impl Command for Parse {
|
|||||||
},
|
},
|
||||||
Example {
|
Example {
|
||||||
description: "Parse a string using regex pattern",
|
description: "Parse a string using regex pattern",
|
||||||
example: "echo \"hi there\" | parse -r \"(?P<foo>\\w+) (?P<bar>\\w+)\"",
|
example: "echo \"hi there\" | parse -r '(?P<foo>\\w+) (?P<bar>\\w+)'",
|
||||||
result: Some(result),
|
result: Some(result),
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
@ -123,7 +123,7 @@ mod regex {
|
|||||||
cwd: dirs.test(), pipeline(
|
cwd: dirs.test(), pipeline(
|
||||||
r#"
|
r#"
|
||||||
open nushell_git_log_oneline.txt
|
open nushell_git_log_oneline.txt
|
||||||
| parse --regex "(?P<Hash>\w+) (?P<Message>.+) \(#(?P<PR>\d+)\)"
|
| parse --regex "(?P<Hash>\\w+) (?P<Message>.+) \\(#(?P<PR>\\d+)\\)"
|
||||||
| get 1
|
| get 1
|
||||||
| get PR
|
| get PR
|
||||||
"#
|
"#
|
||||||
@ -142,7 +142,7 @@ mod regex {
|
|||||||
cwd: dirs.test(), pipeline(
|
cwd: dirs.test(), pipeline(
|
||||||
r#"
|
r#"
|
||||||
open nushell_git_log_oneline.txt
|
open nushell_git_log_oneline.txt
|
||||||
| parse --regex "(\w+) (.+) \(#(\d+)\)"
|
| parse --regex "(\\w+) (.+) \\(#(\\d+)\\)"
|
||||||
| get 1
|
| get 1
|
||||||
| get Capture1
|
| get Capture1
|
||||||
"#
|
"#
|
||||||
@ -161,7 +161,7 @@ mod regex {
|
|||||||
cwd: dirs.test(), pipeline(
|
cwd: dirs.test(), pipeline(
|
||||||
r#"
|
r#"
|
||||||
open nushell_git_log_oneline.txt
|
open nushell_git_log_oneline.txt
|
||||||
| parse --regex "(?P<Hash>\w+) (.+) \(#(?P<PR>\d+)\)"
|
| parse --regex "(?P<Hash>\\w+) (.+) \\(#(?P<PR>\\d+)\\)"
|
||||||
| get 1
|
| get 1
|
||||||
| get Capture2
|
| get Capture2
|
||||||
"#
|
"#
|
||||||
@ -180,7 +180,7 @@ mod regex {
|
|||||||
cwd: dirs.test(), pipeline(
|
cwd: dirs.test(), pipeline(
|
||||||
r#"
|
r#"
|
||||||
open nushell_git_log_oneline.txt
|
open nushell_git_log_oneline.txt
|
||||||
| parse --regex "(?P<Hash>\w+ unfinished capture group"
|
| parse --regex "(?P<Hash>\\w+ unfinished capture group"
|
||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
|
|
||||||
|
@ -89,7 +89,7 @@ pub fn lex_item(
|
|||||||
|
|
||||||
// The process of slurping up a baseline token repeats:
|
// The process of slurping up a baseline token repeats:
|
||||||
//
|
//
|
||||||
// - String literal, which begins with `'`, `"` or `\``, and continues until
|
// - String literal, which begins with `'` or `"`, and continues until
|
||||||
// the same character is encountered again.
|
// the same character is encountered again.
|
||||||
// - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until
|
// - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until
|
||||||
// the matching closing delimiter is found, skipping comments and string
|
// the matching closing delimiter is found, skipping comments and string
|
||||||
@ -101,10 +101,33 @@ pub fn lex_item(
|
|||||||
while let Some(c) = input.get(*curr_offset) {
|
while let Some(c) = input.get(*curr_offset) {
|
||||||
let c = *c;
|
let c = *c;
|
||||||
|
|
||||||
if quote_start.is_some() {
|
if let Some(start) = quote_start {
|
||||||
|
// Check if we're in an escape sequence
|
||||||
|
if c == b'\\' && start == b'"' {
|
||||||
|
// Go ahead and consume the escape character if possible
|
||||||
|
if input.get(*curr_offset + 1).is_some() {
|
||||||
|
// Successfully escaped the character
|
||||||
|
*curr_offset += 2;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
let span = Span::new(span_offset + token_start, span_offset + *curr_offset);
|
||||||
|
|
||||||
|
return (
|
||||||
|
span,
|
||||||
|
Some(ParseError::UnexpectedEof(
|
||||||
|
(start as char).to_string(),
|
||||||
|
Span {
|
||||||
|
start: span.end,
|
||||||
|
end: span.end,
|
||||||
|
},
|
||||||
|
)),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
// If we encountered the closing quote character for the current
|
// If we encountered the closing quote character for the current
|
||||||
// string, we're done with the current string.
|
// string, we're done with the current string.
|
||||||
if Some(c) == quote_start {
|
if c == start {
|
||||||
|
// Also need to check to make sure we aren't escaped
|
||||||
quote_start = None;
|
quote_start = None;
|
||||||
}
|
}
|
||||||
} else if c == b'#' {
|
} else if c == b'#' {
|
||||||
|
@ -49,7 +49,7 @@ pub fn is_math_expression_like(bytes: &[u8]) -> bool {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if bytes == b"true" || bytes == b"false" {
|
if bytes == b"true" || bytes == b"false" || bytes == b"null" {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1346,7 +1346,10 @@ pub fn parse_string_interpolation(
|
|||||||
|
|
||||||
let contents = working_set.get_span_contents(span);
|
let contents = working_set.get_span_contents(span);
|
||||||
|
|
||||||
|
let mut double_quote = false;
|
||||||
|
|
||||||
let (start, end) = if contents.starts_with(b"$\"") {
|
let (start, end) = if contents.starts_with(b"$\"") {
|
||||||
|
double_quote = true;
|
||||||
let end = if contents.ends_with(b"\"") && contents.len() > 2 {
|
let end = if contents.ends_with(b"\"") && contents.len() > 2 {
|
||||||
span.end - 1
|
span.end - 1
|
||||||
} else {
|
} else {
|
||||||
@ -1384,8 +1387,18 @@ pub fn parse_string_interpolation(
|
|||||||
end: b,
|
end: b,
|
||||||
};
|
};
|
||||||
let str_contents = working_set.get_span_contents(span);
|
let str_contents = working_set.get_span_contents(span);
|
||||||
|
|
||||||
|
let str_contents = if double_quote {
|
||||||
|
let (str_contents, err) = unescape_string(str_contents, span);
|
||||||
|
error = error.or(err);
|
||||||
|
|
||||||
|
str_contents
|
||||||
|
} else {
|
||||||
|
str_contents.to_vec()
|
||||||
|
};
|
||||||
|
|
||||||
output.push(Expression {
|
output.push(Expression {
|
||||||
expr: Expr::String(String::from_utf8_lossy(str_contents).to_string()),
|
expr: Expr::String(String::from_utf8_lossy(&str_contents).to_string()),
|
||||||
span,
|
span,
|
||||||
ty: Type::String,
|
ty: Type::String,
|
||||||
custom_completion: None,
|
custom_completion: None,
|
||||||
@ -2116,6 +2129,151 @@ pub fn parse_glob_pattern(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn unescape_string(bytes: &[u8], span: Span) -> (Vec<u8>, Option<ParseError>) {
|
||||||
|
let mut output = Vec::new();
|
||||||
|
|
||||||
|
let mut idx = 0;
|
||||||
|
let mut err = None;
|
||||||
|
|
||||||
|
while idx < bytes.len() {
|
||||||
|
if bytes[idx] == b'\\' {
|
||||||
|
// We're in an escape
|
||||||
|
idx += 1;
|
||||||
|
|
||||||
|
match bytes.get(idx) {
|
||||||
|
Some(b'"') => {
|
||||||
|
output.push(b'"');
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
Some(b'\'') => {
|
||||||
|
output.push(b'\'');
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
Some(b'\\') => {
|
||||||
|
output.push(b'\\');
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
Some(b'/') => {
|
||||||
|
output.push(b'/');
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
Some(b'b') => {
|
||||||
|
output.push(0x8);
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
Some(b'f') => {
|
||||||
|
output.push(0xc);
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
Some(b'n') => {
|
||||||
|
output.push(b'\n');
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
Some(b'r') => {
|
||||||
|
output.push(b'\r');
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
Some(b't') => {
|
||||||
|
output.push(b'\t');
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
Some(b'u') => {
|
||||||
|
match (
|
||||||
|
bytes.get(idx + 1),
|
||||||
|
bytes.get(idx + 2),
|
||||||
|
bytes.get(idx + 3),
|
||||||
|
bytes.get(idx + 4),
|
||||||
|
) {
|
||||||
|
(Some(h1), Some(h2), Some(h3), Some(h4)) => {
|
||||||
|
let s = String::from_utf8(vec![*h1, *h2, *h3, *h4]);
|
||||||
|
|
||||||
|
if let Ok(s) = s {
|
||||||
|
let int = u32::from_str_radix(&s, 16);
|
||||||
|
|
||||||
|
if let Ok(int) = int {
|
||||||
|
let result = char::from_u32(int);
|
||||||
|
|
||||||
|
if let Some(result) = result {
|
||||||
|
let mut buffer = vec![0; 4];
|
||||||
|
let result = result.encode_utf8(&mut buffer);
|
||||||
|
|
||||||
|
for elem in result.bytes() {
|
||||||
|
output.push(elem);
|
||||||
|
}
|
||||||
|
|
||||||
|
idx += 5;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err = Some(ParseError::Expected(
|
||||||
|
"unicode hex value".into(),
|
||||||
|
Span {
|
||||||
|
start: (span.start + idx),
|
||||||
|
end: span.end,
|
||||||
|
},
|
||||||
|
));
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
err = Some(ParseError::Expected(
|
||||||
|
"unicode hex value".into(),
|
||||||
|
Span {
|
||||||
|
start: (span.start + idx),
|
||||||
|
end: span.end,
|
||||||
|
},
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idx += 5;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
err = Some(ParseError::Expected(
|
||||||
|
"supported escape character".into(),
|
||||||
|
Span {
|
||||||
|
start: (span.start + idx),
|
||||||
|
end: span.end,
|
||||||
|
},
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
output.push(bytes[idx]);
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(output, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unescape_unquote_string(bytes: &[u8], span: Span) -> (String, Option<ParseError>) {
|
||||||
|
if bytes.starts_with(b"\"") {
|
||||||
|
// Needs unescaping
|
||||||
|
let bytes = trim_quotes(bytes);
|
||||||
|
|
||||||
|
let (bytes, err) = unescape_string(bytes, span);
|
||||||
|
|
||||||
|
if let Ok(token) = String::from_utf8(bytes) {
|
||||||
|
(token, err)
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
String::new(),
|
||||||
|
Some(ParseError::Expected("string".into(), span)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let bytes = trim_quotes(bytes);
|
||||||
|
|
||||||
|
if let Ok(token) = String::from_utf8(bytes.into()) {
|
||||||
|
(token, None)
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
String::new(),
|
||||||
|
Some(ParseError::Expected("string".into(), span)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parse_string(
|
pub fn parse_string(
|
||||||
working_set: &mut StateWorkingSet,
|
working_set: &mut StateWorkingSet,
|
||||||
span: Span,
|
span: Span,
|
||||||
@ -2124,26 +2282,17 @@ pub fn parse_string(
|
|||||||
|
|
||||||
let bytes = working_set.get_span_contents(span);
|
let bytes = working_set.get_span_contents(span);
|
||||||
|
|
||||||
let bytes = trim_quotes(bytes);
|
let (s, err) = unescape_unquote_string(bytes, span);
|
||||||
|
|
||||||
if let Ok(token) = String::from_utf8(bytes.into()) {
|
|
||||||
trace!("-- found {}", token);
|
|
||||||
|
|
||||||
(
|
(
|
||||||
Expression {
|
Expression {
|
||||||
expr: Expr::String(token),
|
expr: Expr::String(s),
|
||||||
span,
|
span,
|
||||||
ty: Type::String,
|
ty: Type::String,
|
||||||
custom_completion: None,
|
custom_completion: None,
|
||||||
},
|
},
|
||||||
None,
|
err,
|
||||||
)
|
)
|
||||||
} else {
|
|
||||||
(
|
|
||||||
garbage(span),
|
|
||||||
Some(ParseError::Expected("string".into(), span)),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_string_strict(
|
pub fn parse_string_strict(
|
||||||
@ -3259,7 +3408,9 @@ pub fn parse_value(
|
|||||||
return parse_variable_expr(working_set, span);
|
return parse_variable_expr(working_set, span);
|
||||||
}
|
}
|
||||||
|
|
||||||
if bytes == b"true" {
|
// Check for reserved keyword values
|
||||||
|
match bytes {
|
||||||
|
b"true" => {
|
||||||
if matches!(shape, SyntaxShape::Boolean) || matches!(shape, SyntaxShape::Any) {
|
if matches!(shape, SyntaxShape::Boolean) || matches!(shape, SyntaxShape::Any) {
|
||||||
return (
|
return (
|
||||||
Expression {
|
Expression {
|
||||||
@ -3276,7 +3427,8 @@ pub fn parse_value(
|
|||||||
Some(ParseError::Expected("non-boolean value".into(), span)),
|
Some(ParseError::Expected("non-boolean value".into(), span)),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else if bytes == b"false" {
|
}
|
||||||
|
b"false" => {
|
||||||
if matches!(shape, SyntaxShape::Boolean) || matches!(shape, SyntaxShape::Any) {
|
if matches!(shape, SyntaxShape::Boolean) || matches!(shape, SyntaxShape::Any) {
|
||||||
return (
|
return (
|
||||||
Expression {
|
Expression {
|
||||||
@ -3294,6 +3446,21 @@ pub fn parse_value(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
b"null" => {
|
||||||
|
return (
|
||||||
|
Expression {
|
||||||
|
expr: Expr::Nothing,
|
||||||
|
span,
|
||||||
|
ty: Type::Nothing,
|
||||||
|
custom_completion: None,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
match bytes[0] {
|
match bytes[0] {
|
||||||
b'$' => return parse_dollar_expr(working_set, span),
|
b'$' => return parse_dollar_expr(working_set, span),
|
||||||
b'(' => {
|
b'(' => {
|
||||||
@ -3351,18 +3518,6 @@ pub fn parse_value(
|
|||||||
SyntaxShape::GlobPattern => parse_glob_pattern(working_set, span),
|
SyntaxShape::GlobPattern => parse_glob_pattern(working_set, span),
|
||||||
SyntaxShape::String => parse_string(working_set, span),
|
SyntaxShape::String => parse_string(working_set, span),
|
||||||
SyntaxShape::Binary => parse_binary(working_set, span),
|
SyntaxShape::Binary => parse_binary(working_set, span),
|
||||||
SyntaxShape::Block(_) => {
|
|
||||||
if bytes.starts_with(b"{") {
|
|
||||||
trace!("parsing value as a block expression");
|
|
||||||
|
|
||||||
parse_block_expression(working_set, shape, span)
|
|
||||||
} else {
|
|
||||||
(
|
|
||||||
Expression::garbage(span),
|
|
||||||
Some(ParseError::Expected("block".into(), span)),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SyntaxShape::Signature => {
|
SyntaxShape::Signature => {
|
||||||
if bytes.starts_with(b"[") {
|
if bytes.starts_with(b"[") {
|
||||||
parse_signature(working_set, span)
|
parse_signature(working_set, span)
|
||||||
@ -3447,6 +3602,7 @@ pub fn parse_value(
|
|||||||
SyntaxShape::DateTime,
|
SyntaxShape::DateTime,
|
||||||
SyntaxShape::Filesize,
|
SyntaxShape::Filesize,
|
||||||
SyntaxShape::Duration,
|
SyntaxShape::Duration,
|
||||||
|
SyntaxShape::Record,
|
||||||
SyntaxShape::Block(None),
|
SyntaxShape::Block(None),
|
||||||
SyntaxShape::String,
|
SyntaxShape::String,
|
||||||
];
|
];
|
||||||
|
@ -45,7 +45,7 @@ let-env ENV_CONVERSIONS = {
|
|||||||
#
|
#
|
||||||
# This is a simplified version of completions for git branches and git remotes
|
# This is a simplified version of completions for git branches and git remotes
|
||||||
def "nu-complete git branches" [] {
|
def "nu-complete git branches" [] {
|
||||||
^git branch | lines | each { |line| $line | str find-replace "\* " "" | str trim }
|
^git branch | lines | each { |line| $line | str find-replace '\* ' '' | str trim }
|
||||||
}
|
}
|
||||||
|
|
||||||
def "nu-complete git remotes" [] {
|
def "nu-complete git remotes" [] {
|
||||||
|
@ -55,22 +55,26 @@ pub(crate) fn evaluate(
|
|||||||
if working_set.find_decl(b"main").is_some() {
|
if working_set.find_decl(b"main").is_some() {
|
||||||
let args = format!("main {}", args.join(" "));
|
let args = format!("main {}", args.join(" "));
|
||||||
|
|
||||||
eval_source(
|
if !eval_source(
|
||||||
engine_state,
|
engine_state,
|
||||||
&mut stack,
|
&mut stack,
|
||||||
&file,
|
&file,
|
||||||
&path,
|
&path,
|
||||||
PipelineData::new(Span::new(0, 0)),
|
PipelineData::new(Span::new(0, 0)),
|
||||||
);
|
) {
|
||||||
eval_source(
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
if !eval_source(
|
||||||
engine_state,
|
engine_state,
|
||||||
&mut stack,
|
&mut stack,
|
||||||
args.as_bytes(),
|
args.as_bytes(),
|
||||||
"<commandline>",
|
"<commandline>",
|
||||||
input,
|
input,
|
||||||
);
|
) {
|
||||||
} else {
|
std::process::exit(1);
|
||||||
eval_source(engine_state, &mut stack, &file, &path, input);
|
}
|
||||||
|
} else if !eval_source(engine_state, &mut stack, &file, &path, input) {
|
||||||
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_perf_true() {
|
if is_perf_true() {
|
||||||
|
@ -329,3 +329,13 @@ fn block_arity_check2() -> TestResult {
|
|||||||
fn block_arity_check3() -> TestResult {
|
fn block_arity_check3() -> TestResult {
|
||||||
fail_test(r#"ls | each { |x, y| 1}"#, "expected 1 block parameter")
|
fail_test(r#"ls | each { |x, y| 1}"#, "expected 1 block parameter")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn string_escape() -> TestResult {
|
||||||
|
run_test(r#""\u015B""#, "ś")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn string_escape_interpolation() -> TestResult {
|
||||||
|
run_test(r#"$"\u015B(char hamburger)abc""#, "ś≡abc")
|
||||||
|
}
|
||||||
|
@ -23,7 +23,7 @@ pub(crate) fn gather_parent_env_vars(engine_state: &mut EngineState) {
|
|||||||
Some('\'')
|
Some('\'')
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Some('"')
|
Some('\'')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user