forked from extern/nushell
Move most of the peculiar argument handling for external calls into the parser (#13089)
# Description We've had a lot of different issues and PRs related to arg handling with externals since the rewrite of `run-external` in #12921: - #12950 - #12955 - #13000 - #13001 - #13021 - #13027 - #13028 - #13073 Many of these are caused by the argument handling of external calls and `run-external` being very special and involving the parser handing quoted strings over to `run-external` so that it knows whether to expand tildes and globs and so on. This is really unusual and also makes it harder to use `run-external`, and also harder to understand it (and probably is part of the reason why it was rewritten in the first place). This PR moves a lot more of that work over to the parser, so that by the time `run-external` gets it, it's dealing with much more normal Nushell values. In particular: - Unquoted strings are handled as globs with no expand - The unescaped-but-quoted handling of strings was removed, and the parser constructs normal looking strings instead, removing internal quotes so that `run-external` doesn't have to do it - Bare word interpolation is now supported and expansion is done in this case - Expressions typed as `Glob` containing `Expr::StringInterpolation` now produce `Value::Glob` instead, with the quoted status from the expr passed through so we know if it was a bare word - Bare word interpolation for values typed as `glob` now possible, but not implemented - Because expansion is now triggered by `Value::Glob(_, false)` instead of looking at the expr, externals now support glob types # User-Facing Changes - Bare word interpolation works for external command options, and otherwise embedded in other strings: ```nushell ^echo --foo=(2 + 2) # prints --foo=4 ^echo -foo=$"(2 + 2)" # prints -foo=4 ^echo foo="(2 + 2)" # prints (no interpolation!) foo=(2 + 2) ^echo foo,(2 + 2),bar # prints foo,4,bar ``` - Bare word interpolation expands for external command head/args: ```nushell let name = "exa" ~/.cargo/bin/($name) # this works, and expands the tilde ^$"~/.cargo/bin/($name)" # this doesn't expand the tilde ^echo ~/($name)/* # this glob is expanded ^echo $"~/($name)/*" # this isn't expanded ``` - Ndots are now supported for the head of an external command (`^.../foo` works) - Glob values are now supported for head/args of an external command, and expanded appropriately: ```nushell ^("~/.cargo/bin/exa" | into glob) # the tilde is expanded ^echo ("*.txt" | into glob) # this glob is expanded ``` - `run-external` now works more like any other command, without expecting a special call convention for its args: ```nushell run-external echo "'foo'" # before PR: 'foo' # after PR: foo run-external echo "*.txt" # before PR: (glob is expanded) # after PR: *.txt ``` # Tests + Formatting Lots of tests added and cleaned up. Some tests that weren't active on Windows changed to use `nu --testbin cococo` so that they can work. Added a test for Linux only to make sure tilde expansion of commands works, because changing `HOME` there causes `~` to reliably change. - 🟢 `toolkit fmt` - 🟢 `toolkit clippy` - 🟢 `toolkit test` - 🟢 `toolkit test stdlib` # After Submitting - [ ] release notes: make sure to mention the new syntaxes that are supported
This commit is contained in:
@ -26,6 +26,7 @@ pub enum FlatShape {
|
||||
Flag,
|
||||
Float,
|
||||
Garbage,
|
||||
GlobInterpolation,
|
||||
GlobPattern,
|
||||
Int,
|
||||
InternalCall(DeclId),
|
||||
@ -67,6 +68,7 @@ impl FlatShape {
|
||||
FlatShape::Flag => "shape_flag",
|
||||
FlatShape::Float => "shape_float",
|
||||
FlatShape::Garbage => "shape_garbage",
|
||||
FlatShape::GlobInterpolation => "shape_glob_interpolation",
|
||||
FlatShape::GlobPattern => "shape_globpattern",
|
||||
FlatShape::Int => "shape_int",
|
||||
FlatShape::InternalCall(_) => "shape_internalcall",
|
||||
@ -277,7 +279,7 @@ fn flatten_expression_into(
|
||||
output[arg_start..].sort();
|
||||
}
|
||||
Expr::ExternalCall(head, args) => {
|
||||
if let Expr::String(..) = &head.expr {
|
||||
if let Expr::String(..) | Expr::GlobPattern(..) = &head.expr {
|
||||
output.push((head.span, FlatShape::External));
|
||||
} else {
|
||||
flatten_expression_into(working_set, head, output);
|
||||
@ -286,7 +288,7 @@ fn flatten_expression_into(
|
||||
for arg in args.as_ref() {
|
||||
match arg {
|
||||
ExternalArgument::Regular(expr) => {
|
||||
if let Expr::String(..) = &expr.expr {
|
||||
if let Expr::String(..) | Expr::GlobPattern(..) = &expr.expr {
|
||||
output.push((expr.span, FlatShape::ExternalArg));
|
||||
} else {
|
||||
flatten_expression_into(working_set, expr, output);
|
||||
@ -431,6 +433,25 @@ fn flatten_expression_into(
|
||||
}
|
||||
output.extend(flattened);
|
||||
}
|
||||
Expr::GlobInterpolation(exprs, quoted) => {
|
||||
let mut flattened = vec![];
|
||||
for expr in exprs {
|
||||
flatten_expression_into(working_set, expr, &mut flattened);
|
||||
}
|
||||
|
||||
if *quoted {
|
||||
// If we aren't a bare word interpolation, also highlight the outer quotes
|
||||
output.push((
|
||||
Span::new(expr.span.start, expr.span.start + 2),
|
||||
FlatShape::GlobInterpolation,
|
||||
));
|
||||
flattened.push((
|
||||
Span::new(expr.span.end - 1, expr.span.end),
|
||||
FlatShape::GlobInterpolation,
|
||||
));
|
||||
}
|
||||
output.extend(flattened);
|
||||
}
|
||||
Expr::Record(list) => {
|
||||
let outer_span = expr.span;
|
||||
let mut last_end = outer_span.start;
|
||||
|
@ -16,7 +16,6 @@ use nu_protocol::{
|
||||
IN_VARIABLE_ID,
|
||||
};
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::{HashMap, HashSet},
|
||||
num::ParseIntError,
|
||||
str,
|
||||
@ -222,6 +221,209 @@ pub(crate) fn check_call(
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a string in the arg or head position of an external call.
|
||||
///
|
||||
/// If the string begins with `r#`, it is parsed as a raw string. If it doesn't contain any quotes
|
||||
/// or parentheses, it is parsed as a glob pattern so that tilde and glob expansion can be handled
|
||||
/// by `run-external`. Otherwise, we use a custom state machine to put together an interpolated
|
||||
/// string, where each balanced pair of quotes is parsed as a separate part of the string, and then
|
||||
/// concatenated together.
|
||||
///
|
||||
/// For example, `-foo="bar\nbaz"` becomes `$"-foo=bar\nbaz"`
|
||||
fn parse_external_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
|
||||
let contents = &working_set.get_span_contents(span);
|
||||
|
||||
if contents.starts_with(b"r#") {
|
||||
parse_raw_string(working_set, span)
|
||||
} else if contents
|
||||
.iter()
|
||||
.any(|b| matches!(b, b'"' | b'\'' | b'(' | b')'))
|
||||
{
|
||||
enum State {
|
||||
Bare {
|
||||
from: usize,
|
||||
},
|
||||
Quote {
|
||||
from: usize,
|
||||
quote_char: u8,
|
||||
escaped: bool,
|
||||
depth: i32,
|
||||
},
|
||||
}
|
||||
// Find the spans of parts of the string that can be parsed as their own strings for
|
||||
// concatenation.
|
||||
//
|
||||
// By passing each of these parts to `parse_string()`, we can eliminate the quotes and also
|
||||
// handle string interpolation.
|
||||
let make_span = |from: usize, index: usize| Span {
|
||||
start: span.start + from,
|
||||
end: span.start + index,
|
||||
};
|
||||
let mut spans = vec![];
|
||||
let mut state = State::Bare { from: 0 };
|
||||
let mut index = 0;
|
||||
while index < contents.len() {
|
||||
let ch = contents[index];
|
||||
match &mut state {
|
||||
State::Bare { from } => match ch {
|
||||
b'"' | b'\'' => {
|
||||
// Push bare string
|
||||
if index != *from {
|
||||
spans.push(make_span(*from, index));
|
||||
}
|
||||
// then transition to other state
|
||||
state = State::Quote {
|
||||
from: index,
|
||||
quote_char: ch,
|
||||
escaped: false,
|
||||
depth: 1,
|
||||
};
|
||||
}
|
||||
b'$' => {
|
||||
if let Some("e_char @ (b'"' | b'\'')) = contents.get(index + 1) {
|
||||
// Start a dollar quote (interpolated string)
|
||||
if index != *from {
|
||||
spans.push(make_span(*from, index));
|
||||
}
|
||||
state = State::Quote {
|
||||
from: index,
|
||||
quote_char,
|
||||
escaped: false,
|
||||
depth: 1,
|
||||
};
|
||||
// Skip over two chars (the dollar sign and the quote)
|
||||
index += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Continue to consume
|
||||
_ => (),
|
||||
},
|
||||
State::Quote {
|
||||
from,
|
||||
quote_char,
|
||||
escaped,
|
||||
depth,
|
||||
} => match ch {
|
||||
ch if ch == *quote_char && !*escaped => {
|
||||
// Count if there are more than `depth` quotes remaining
|
||||
if contents[index..]
|
||||
.iter()
|
||||
.filter(|b| *b == quote_char)
|
||||
.count() as i32
|
||||
> *depth
|
||||
{
|
||||
// Increment depth to be greedy
|
||||
*depth += 1;
|
||||
} else {
|
||||
// Decrement depth
|
||||
*depth -= 1;
|
||||
}
|
||||
if *depth == 0 {
|
||||
// End of string
|
||||
spans.push(make_span(*from, index + 1));
|
||||
// go back to Bare state
|
||||
state = State::Bare { from: index + 1 };
|
||||
}
|
||||
}
|
||||
b'\\' if !*escaped && *quote_char == b'"' => {
|
||||
// The next token is escaped so it doesn't count (only for double quote)
|
||||
*escaped = true;
|
||||
}
|
||||
_ => {
|
||||
*escaped = false;
|
||||
}
|
||||
},
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
|
||||
// Add the final span
|
||||
match state {
|
||||
State::Bare { from } | State::Quote { from, .. } => {
|
||||
if from < contents.len() {
|
||||
spans.push(make_span(from, contents.len()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Log the spans that will be parsed
|
||||
if log::log_enabled!(log::Level::Trace) {
|
||||
let contents = spans
|
||||
.iter()
|
||||
.map(|span| String::from_utf8_lossy(working_set.get_span_contents(*span)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
trace!("parsing: external string, parts: {contents:?}")
|
||||
}
|
||||
|
||||
// Check if the whole thing is quoted. If not, it should be a glob
|
||||
let quoted =
|
||||
(contents.len() >= 3 && contents.starts_with(b"$\"") && contents.ends_with(b"\""))
|
||||
|| is_quoted(contents);
|
||||
|
||||
// Parse each as its own string
|
||||
let exprs: Vec<Expression> = spans
|
||||
.into_iter()
|
||||
.map(|span| parse_string(working_set, span))
|
||||
.collect();
|
||||
|
||||
if exprs
|
||||
.iter()
|
||||
.all(|expr| matches!(expr.expr, Expr::String(..)))
|
||||
{
|
||||
// If the exprs are all strings anyway, just collapse into a single string.
|
||||
let string = exprs
|
||||
.into_iter()
|
||||
.map(|expr| {
|
||||
let Expr::String(contents) = expr.expr else {
|
||||
unreachable!("already checked that this was a String")
|
||||
};
|
||||
contents
|
||||
})
|
||||
.collect::<String>();
|
||||
if quoted {
|
||||
Expression::new(working_set, Expr::String(string), span, Type::String)
|
||||
} else {
|
||||
Expression::new(
|
||||
working_set,
|
||||
Expr::GlobPattern(string, false),
|
||||
span,
|
||||
Type::Glob,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
// Flatten any string interpolations contained with the exprs.
|
||||
let exprs = exprs
|
||||
.into_iter()
|
||||
.flat_map(|expr| match expr.expr {
|
||||
Expr::StringInterpolation(subexprs) => subexprs,
|
||||
_ => vec![expr],
|
||||
})
|
||||
.collect();
|
||||
// Make an interpolation out of the expressions. Use `GlobInterpolation` if it's a bare
|
||||
// word, so that the unquoted state can get passed through to `run-external`.
|
||||
if quoted {
|
||||
Expression::new(
|
||||
working_set,
|
||||
Expr::StringInterpolation(exprs),
|
||||
span,
|
||||
Type::String,
|
||||
)
|
||||
} else {
|
||||
Expression::new(
|
||||
working_set,
|
||||
Expr::GlobInterpolation(exprs, false),
|
||||
span,
|
||||
Type::Glob,
|
||||
)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
parse_glob_pattern(working_set, span)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_external_arg(working_set: &mut StateWorkingSet, span: Span) -> ExternalArgument {
|
||||
let contents = working_set.get_span_contents(span);
|
||||
|
||||
@ -229,8 +431,6 @@ fn parse_external_arg(working_set: &mut StateWorkingSet, span: Span) -> External
|
||||
ExternalArgument::Regular(parse_dollar_expr(working_set, span))
|
||||
} else if contents.starts_with(b"[") {
|
||||
ExternalArgument::Regular(parse_list_expression(working_set, span, &SyntaxShape::Any))
|
||||
} else if contents.starts_with(b"r#") {
|
||||
ExternalArgument::Regular(parse_raw_string(working_set, span))
|
||||
} else if contents.len() > 3
|
||||
&& contents.starts_with(b"...")
|
||||
&& (contents[3] == b'$' || contents[3] == b'[' || contents[3] == b'(')
|
||||
@ -241,18 +441,7 @@ fn parse_external_arg(working_set: &mut StateWorkingSet, span: Span) -> External
|
||||
&SyntaxShape::List(Box::new(SyntaxShape::Any)),
|
||||
))
|
||||
} else {
|
||||
// Eval stage trims the quotes, so we don't have to do the same thing when parsing.
|
||||
let (contents, err) = unescape_string_preserving_quotes(contents, span);
|
||||
if let Some(err) = err {
|
||||
working_set.error(err);
|
||||
}
|
||||
|
||||
ExternalArgument::Regular(Expression::new(
|
||||
working_set,
|
||||
Expr::String(contents),
|
||||
span,
|
||||
Type::String,
|
||||
))
|
||||
ExternalArgument::Regular(parse_external_string(working_set, span))
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,18 +463,7 @@ pub fn parse_external_call(working_set: &mut StateWorkingSet, spans: &[Span]) ->
|
||||
let arg = parse_expression(working_set, &[head_span]);
|
||||
Box::new(arg)
|
||||
} else {
|
||||
// Eval stage will unquote the string, so we don't bother with that here
|
||||
let (contents, err) = unescape_string_preserving_quotes(&head_contents, head_span);
|
||||
if let Some(err) = err {
|
||||
working_set.error(err)
|
||||
}
|
||||
|
||||
Box::new(Expression::new(
|
||||
working_set,
|
||||
Expr::String(contents),
|
||||
head_span,
|
||||
Type::String,
|
||||
))
|
||||
Box::new(parse_external_string(working_set, head_span))
|
||||
};
|
||||
|
||||
let args = spans[1..]
|
||||
@ -2639,23 +2817,6 @@ pub fn unescape_unquote_string(bytes: &[u8], span: Span) -> (String, Option<Pars
|
||||
}
|
||||
}
|
||||
|
||||
/// XXX: This is here temporarily as a patch, but we should replace this with properly representing
|
||||
/// the quoted state of a string in the AST
|
||||
fn unescape_string_preserving_quotes(bytes: &[u8], span: Span) -> (String, Option<ParseError>) {
|
||||
let (bytes, err) = if bytes.starts_with(b"\"") {
|
||||
let (bytes, err) = unescape_string(bytes, span);
|
||||
(Cow::Owned(bytes), err)
|
||||
} else {
|
||||
(Cow::Borrowed(bytes), None)
|
||||
};
|
||||
|
||||
// The original code for args used lossy conversion here, even though that's not what we
|
||||
// typically use for strings. Revisit whether that's actually desirable later, but don't
|
||||
// want to introduce a breaking change for this patch.
|
||||
let token = String::from_utf8_lossy(&bytes).into_owned();
|
||||
(token, err)
|
||||
}
|
||||
|
||||
pub fn parse_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
|
||||
trace!("parsing: string");
|
||||
|
||||
@ -6012,7 +6173,7 @@ pub fn discover_captures_in_expr(
|
||||
}
|
||||
Expr::String(_) => {}
|
||||
Expr::RawString(_) => {}
|
||||
Expr::StringInterpolation(exprs) => {
|
||||
Expr::StringInterpolation(exprs) | Expr::GlobInterpolation(exprs, _) => {
|
||||
for expr in exprs {
|
||||
discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
|
||||
}
|
||||
|
Reference in New Issue
Block a user