Move most of the peculiar argument handling for external calls into the parser (#13089)

# Description

We've had a lot of different issues and PRs related to arg handling with
externals since the rewrite of `run-external` in #12921:

- #12950
- #12955
- #13000
- #13001
- #13021
- #13027
- #13028
- #13073

Many of these are caused by the argument handling of external calls and
`run-external` being very special and involving the parser handing
quoted strings over to `run-external` so that it knows whether to expand
tildes and globs and so on. This is really unusual and also makes it
harder to use `run-external`, and also harder to understand it (and
probably is part of the reason why it was rewritten in the first place).

This PR moves a lot more of that work over to the parser, so that by the
time `run-external` gets it, it's dealing with much more normal Nushell
values. In particular:

- Unquoted strings are handled as globs with no expand
- The unescaped-but-quoted handling of strings was removed, and the
parser constructs normal looking strings instead, removing internal
quotes so that `run-external` doesn't have to do it
- Bare word interpolation is now supported and expansion is done in this
case
- Expressions typed as `Glob` containing `Expr::StringInterpolation` now
produce `Value::Glob` instead, with the quoted status from the expr
passed through so we know if it was a bare word
- Bare word interpolation for values typed as `glob` now possible, but
not implemented
- Because expansion is now triggered by `Value::Glob(_, false)` instead
of looking at the expr, externals now support glob types

# User-Facing Changes

- Bare word interpolation works for external command options, and
otherwise embedded in other strings:
  ```nushell
  ^echo --foo=(2 + 2) # prints --foo=4
  ^echo -foo=$"(2 + 2)" # prints -foo=4
  ^echo foo="(2 + 2)" # prints (no interpolation!) foo=(2 + 2)
  ^echo foo,(2 + 2),bar # prints foo,4,bar
  ```

- Bare word interpolation expands for external command head/args:
  ```nushell
  let name = "exa"
  ~/.cargo/bin/($name) # this works, and expands the tilde
  ^$"~/.cargo/bin/($name)" # this doesn't expand the tilde
  ^echo ~/($name)/* # this glob is expanded
  ^echo $"~/($name)/*" # this isn't expanded
  ```

- Ndots are now supported for the head of an external command
(`^.../foo` works)

- Glob values are now supported for head/args of an external command,
and expanded appropriately:
  ```nushell
  ^("~/.cargo/bin/exa" | into glob) # the tilde is expanded
  ^echo ("*.txt" | into glob) # this glob is expanded
  ```

- `run-external` now works more like any other command, without
expecting a special call convention
  for its args:
  ```nushell
  run-external echo "'foo'"
  # before PR: 'foo'
  # after PR:  foo
  run-external echo "*.txt"
  # before PR: (glob is expanded)
  # after PR:  *.txt
  ```

# Tests + Formatting
Lots of tests added and cleaned up. Some tests that weren't active on
Windows changed to use `nu --testbin cococo` so that they can work.
Added a test for Linux only to make sure tilde expansion of commands
works, because changing `HOME` there causes `~` to reliably change.

- 🟢 `toolkit fmt`
- 🟢 `toolkit clippy`
- 🟢 `toolkit test`
- 🟢 `toolkit test stdlib`

# After Submitting
- [ ] release notes: make sure to mention the new syntaxes that are
supported
This commit is contained in:
Devyn Cairns
2024-06-19 21:00:03 -07:00
committed by GitHub
parent 44aa0a2de4
commit bdc32345bd
13 changed files with 880 additions and 476 deletions

View File

@ -16,7 +16,6 @@ use nu_protocol::{
IN_VARIABLE_ID,
};
use std::{
borrow::Cow,
collections::{HashMap, HashSet},
num::ParseIntError,
str,
@ -222,6 +221,209 @@ pub(crate) fn check_call(
}
}
/// Parses a string in the arg or head position of an external call.
///
/// If the string begins with `r#`, it is parsed as a raw string. If it doesn't contain any quotes
/// or parentheses, it is parsed as a glob pattern so that tilde and glob expansion can be handled
/// by `run-external`. Otherwise, we use a custom state machine to put together an interpolated
/// string, where each balanced pair of quotes is parsed as a separate part of the string, and then
/// concatenated together.
///
/// For example, `-foo="bar\nbaz"` becomes `$"-foo=bar\nbaz"`
fn parse_external_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
let contents = &working_set.get_span_contents(span);
if contents.starts_with(b"r#") {
parse_raw_string(working_set, span)
} else if contents
.iter()
.any(|b| matches!(b, b'"' | b'\'' | b'(' | b')'))
{
enum State {
Bare {
from: usize,
},
Quote {
from: usize,
quote_char: u8,
escaped: bool,
depth: i32,
},
}
// Find the spans of parts of the string that can be parsed as their own strings for
// concatenation.
//
// By passing each of these parts to `parse_string()`, we can eliminate the quotes and also
// handle string interpolation.
let make_span = |from: usize, index: usize| Span {
start: span.start + from,
end: span.start + index,
};
let mut spans = vec![];
let mut state = State::Bare { from: 0 };
let mut index = 0;
while index < contents.len() {
let ch = contents[index];
match &mut state {
State::Bare { from } => match ch {
b'"' | b'\'' => {
// Push bare string
if index != *from {
spans.push(make_span(*from, index));
}
// then transition to other state
state = State::Quote {
from: index,
quote_char: ch,
escaped: false,
depth: 1,
};
}
b'$' => {
if let Some(&quote_char @ (b'"' | b'\'')) = contents.get(index + 1) {
// Start a dollar quote (interpolated string)
if index != *from {
spans.push(make_span(*from, index));
}
state = State::Quote {
from: index,
quote_char,
escaped: false,
depth: 1,
};
// Skip over two chars (the dollar sign and the quote)
index += 2;
continue;
}
}
// Continue to consume
_ => (),
},
State::Quote {
from,
quote_char,
escaped,
depth,
} => match ch {
ch if ch == *quote_char && !*escaped => {
// Count if there are more than `depth` quotes remaining
if contents[index..]
.iter()
.filter(|b| *b == quote_char)
.count() as i32
> *depth
{
// Increment depth to be greedy
*depth += 1;
} else {
// Decrement depth
*depth -= 1;
}
if *depth == 0 {
// End of string
spans.push(make_span(*from, index + 1));
// go back to Bare state
state = State::Bare { from: index + 1 };
}
}
b'\\' if !*escaped && *quote_char == b'"' => {
// The next token is escaped so it doesn't count (only for double quote)
*escaped = true;
}
_ => {
*escaped = false;
}
},
}
index += 1;
}
// Add the final span
match state {
State::Bare { from } | State::Quote { from, .. } => {
if from < contents.len() {
spans.push(make_span(from, contents.len()));
}
}
}
// Log the spans that will be parsed
if log::log_enabled!(log::Level::Trace) {
let contents = spans
.iter()
.map(|span| String::from_utf8_lossy(working_set.get_span_contents(*span)))
.collect::<Vec<_>>();
trace!("parsing: external string, parts: {contents:?}")
}
// Check if the whole thing is quoted. If not, it should be a glob
let quoted =
(contents.len() >= 3 && contents.starts_with(b"$\"") && contents.ends_with(b"\""))
|| is_quoted(contents);
// Parse each as its own string
let exprs: Vec<Expression> = spans
.into_iter()
.map(|span| parse_string(working_set, span))
.collect();
if exprs
.iter()
.all(|expr| matches!(expr.expr, Expr::String(..)))
{
// If the exprs are all strings anyway, just collapse into a single string.
let string = exprs
.into_iter()
.map(|expr| {
let Expr::String(contents) = expr.expr else {
unreachable!("already checked that this was a String")
};
contents
})
.collect::<String>();
if quoted {
Expression::new(working_set, Expr::String(string), span, Type::String)
} else {
Expression::new(
working_set,
Expr::GlobPattern(string, false),
span,
Type::Glob,
)
}
} else {
// Flatten any string interpolations contained with the exprs.
let exprs = exprs
.into_iter()
.flat_map(|expr| match expr.expr {
Expr::StringInterpolation(subexprs) => subexprs,
_ => vec![expr],
})
.collect();
// Make an interpolation out of the expressions. Use `GlobInterpolation` if it's a bare
// word, so that the unquoted state can get passed through to `run-external`.
if quoted {
Expression::new(
working_set,
Expr::StringInterpolation(exprs),
span,
Type::String,
)
} else {
Expression::new(
working_set,
Expr::GlobInterpolation(exprs, false),
span,
Type::Glob,
)
}
}
} else {
parse_glob_pattern(working_set, span)
}
}
fn parse_external_arg(working_set: &mut StateWorkingSet, span: Span) -> ExternalArgument {
let contents = working_set.get_span_contents(span);
@ -229,8 +431,6 @@ fn parse_external_arg(working_set: &mut StateWorkingSet, span: Span) -> External
ExternalArgument::Regular(parse_dollar_expr(working_set, span))
} else if contents.starts_with(b"[") {
ExternalArgument::Regular(parse_list_expression(working_set, span, &SyntaxShape::Any))
} else if contents.starts_with(b"r#") {
ExternalArgument::Regular(parse_raw_string(working_set, span))
} else if contents.len() > 3
&& contents.starts_with(b"...")
&& (contents[3] == b'$' || contents[3] == b'[' || contents[3] == b'(')
@ -241,18 +441,7 @@ fn parse_external_arg(working_set: &mut StateWorkingSet, span: Span) -> External
&SyntaxShape::List(Box::new(SyntaxShape::Any)),
))
} else {
// Eval stage trims the quotes, so we don't have to do the same thing when parsing.
let (contents, err) = unescape_string_preserving_quotes(contents, span);
if let Some(err) = err {
working_set.error(err);
}
ExternalArgument::Regular(Expression::new(
working_set,
Expr::String(contents),
span,
Type::String,
))
ExternalArgument::Regular(parse_external_string(working_set, span))
}
}
@ -274,18 +463,7 @@ pub fn parse_external_call(working_set: &mut StateWorkingSet, spans: &[Span]) ->
let arg = parse_expression(working_set, &[head_span]);
Box::new(arg)
} else {
// Eval stage will unquote the string, so we don't bother with that here
let (contents, err) = unescape_string_preserving_quotes(&head_contents, head_span);
if let Some(err) = err {
working_set.error(err)
}
Box::new(Expression::new(
working_set,
Expr::String(contents),
head_span,
Type::String,
))
Box::new(parse_external_string(working_set, head_span))
};
let args = spans[1..]
@ -2639,23 +2817,6 @@ pub fn unescape_unquote_string(bytes: &[u8], span: Span) -> (String, Option<Pars
}
}
/// XXX: This is here temporarily as a patch, but we should replace this with properly representing
/// the quoted state of a string in the AST
fn unescape_string_preserving_quotes(bytes: &[u8], span: Span) -> (String, Option<ParseError>) {
let (bytes, err) = if bytes.starts_with(b"\"") {
let (bytes, err) = unescape_string(bytes, span);
(Cow::Owned(bytes), err)
} else {
(Cow::Borrowed(bytes), None)
};
// The original code for args used lossy conversion here, even though that's not what we
// typically use for strings. Revisit whether that's actually desirable later, but don't
// want to introduce a breaking change for this patch.
let token = String::from_utf8_lossy(&bytes).into_owned();
(token, err)
}
pub fn parse_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
trace!("parsing: string");
@ -6012,7 +6173,7 @@ pub fn discover_captures_in_expr(
}
Expr::String(_) => {}
Expr::RawString(_) => {}
Expr::StringInterpolation(exprs) => {
Expr::StringInterpolation(exprs) | Expr::GlobInterpolation(exprs, _) => {
for expr in exprs {
discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
}