Devyn Cairns bdc32345bd
Move most of the peculiar argument handling for external calls into the parser (#13089)
# Description

We've had a lot of different issues and PRs related to arg handling with
externals since the rewrite of `run-external` in #12921:

- #12950
- #12955
- #13000
- #13001
- #13021
- #13027
- #13028
- #13073

Many of these are caused by the argument handling of external calls and
`run-external` being very special and involving the parser handing
quoted strings over to `run-external` so that it knows whether to expand
tildes and globs and so on. This is really unusual and also makes it
harder to use `run-external`, and also harder to understand it (and
probably is part of the reason why it was rewritten in the first place).

This PR moves a lot more of that work over to the parser, so that by the
time `run-external` gets it, it's dealing with much more normal Nushell
values. In particular:

- Unquoted strings are handled as globs with no expand
- The unescaped-but-quoted handling of strings was removed, and the
parser constructs normal looking strings instead, removing internal
quotes so that `run-external` doesn't have to do it
- Bare word interpolation is now supported and expansion is done in this
case
- Expressions typed as `Glob` containing `Expr::StringInterpolation` now
produce `Value::Glob` instead, with the quoted status from the expr
passed through so we know if it was a bare word
- Bare word interpolation for values typed as `glob` now possible, but
not implemented
- Because expansion is now triggered by `Value::Glob(_, false)` instead
of looking at the expr, externals now support glob types

# User-Facing Changes

- Bare word interpolation works for external command options, and
otherwise embedded in other strings:
  ```nushell
  ^echo --foo=(2 + 2) # prints --foo=4
  ^echo -foo=$"(2 + 2)" # prints -foo=4
  ^echo foo="(2 + 2)" # prints (no interpolation!) foo=(2 + 2)
  ^echo foo,(2 + 2),bar # prints foo,4,bar
  ```

- Bare word interpolation expands for external command head/args:
  ```nushell
  let name = "exa"
  ~/.cargo/bin/($name) # this works, and expands the tilde
  ^$"~/.cargo/bin/($name)" # this doesn't expand the tilde
  ^echo ~/($name)/* # this glob is expanded
  ^echo $"~/($name)/*" # this isn't expanded
  ```

- Ndots are now supported for the head of an external command
(`^.../foo` works)

- Glob values are now supported for head/args of an external command,
and expanded appropriately:
  ```nushell
  ^("~/.cargo/bin/exa" | into glob) # the tilde is expanded
  ^echo ("*.txt" | into glob) # this glob is expanded
  ```

- `run-external` now works more like any other command, without
expecting a special call convention
  for its args:
  ```nushell
  run-external echo "'foo'"
  # before PR: 'foo'
  # after PR:  foo
  run-external echo "*.txt"
  # before PR: (glob is expanded)
  # after PR:  *.txt
  ```

# Tests + Formatting
Lots of tests added and cleaned up. Some tests that weren't active on
Windows changed to use `nu --testbin cococo` so that they can work.
Added a test for Linux only to make sure tilde expansion of commands
works, because changing `HOME` there causes `~` to reliably change.

- 🟢 `toolkit fmt`
- 🟢 `toolkit clippy`
- 🟢 `toolkit test`
- 🟢 `toolkit test stdlib`

# After Submitting
- [ ] release notes: make sure to mention the new syntaxes that are
supported
2024-06-19 21:00:03 -07:00

381 lines
12 KiB
Rust

//! Nushell Profiler
//!
//! Profiler implements the Debugger trait and can be used via the `debug profile` command for
//! profiling Nushell code.
use crate::{
ast::{Block, Expr, PipelineElement},
debugger::Debugger,
engine::EngineState,
record, PipelineData, ShellError, Span, Value,
};
use std::io::BufRead;
use std::time::Instant;
#[derive(Debug, Clone, Copy)]
struct ElementId(usize);
/// Stores profiling information about one pipeline element
#[derive(Debug, Clone)]
struct ElementInfo {
start: Instant,
duration_sec: f64,
depth: i64,
element_span: Span,
element_output: Option<Value>,
expr: Option<String>,
children: Vec<ElementId>,
}
impl ElementInfo {
pub fn new(depth: i64, element_span: Span) -> Self {
ElementInfo {
start: Instant::now(),
duration_sec: 0.0,
depth,
element_span,
element_output: None,
expr: None,
children: vec![],
}
}
}
/// Basic profiler, used in `debug profile`
#[derive(Debug, Clone)]
pub struct Profiler {
depth: i64,
max_depth: i64,
collect_spans: bool,
collect_source: bool,
collect_expanded_source: bool,
collect_values: bool,
collect_exprs: bool,
collect_lines: bool,
elements: Vec<ElementInfo>,
element_stack: Vec<ElementId>,
}
impl Profiler {
#[allow(clippy::too_many_arguments)]
pub fn new(
max_depth: i64,
collect_spans: bool,
collect_source: bool,
collect_expanded_source: bool,
collect_values: bool,
collect_exprs: bool,
collect_lines: bool,
span: Span,
) -> Self {
let first = ElementInfo {
start: Instant::now(),
duration_sec: 0.0,
depth: 0,
element_span: span,
element_output: collect_values.then(|| Value::nothing(span)),
expr: collect_exprs.then(|| "call".to_string()),
children: vec![],
};
Profiler {
depth: 0,
max_depth,
collect_spans,
collect_source,
collect_expanded_source,
collect_values,
collect_exprs,
collect_lines,
elements: vec![first],
element_stack: vec![ElementId(0)],
}
}
fn last_element_id(&self) -> Option<ElementId> {
self.element_stack.last().copied()
}
fn last_element_mut(&mut self) -> Option<&mut ElementInfo> {
self.last_element_id()
.and_then(|id| self.elements.get_mut(id.0))
}
}
impl Debugger for Profiler {
fn activate(&mut self) {
let Some(root_element) = self.last_element_mut() else {
eprintln!("Profiler Error: Missing root element.");
return;
};
root_element.start = Instant::now();
}
fn deactivate(&mut self) {
let Some(root_element) = self.last_element_mut() else {
eprintln!("Profiler Error: Missing root element.");
return;
};
root_element.duration_sec = root_element.start.elapsed().as_secs_f64();
}
fn enter_block(&mut self, _engine_state: &EngineState, _block: &Block) {
self.depth += 1;
}
fn leave_block(&mut self, _engine_state: &EngineState, _block: &Block) {
self.depth -= 1;
}
fn enter_element(&mut self, engine_state: &EngineState, element: &PipelineElement) {
if self.depth > self.max_depth {
return;
}
let Some(parent_id) = self.last_element_id() else {
eprintln!("Profiler Error: Missing parent element ID.");
return;
};
let expr_opt = self
.collect_exprs
.then(|| expr_to_string(engine_state, &element.expr.expr));
let new_id = ElementId(self.elements.len());
let mut new_element = ElementInfo::new(self.depth, element.expr.span);
new_element.expr = expr_opt;
self.elements.push(new_element);
let Some(parent) = self.elements.get_mut(parent_id.0) else {
eprintln!("Profiler Error: Missing parent element.");
return;
};
parent.children.push(new_id);
self.element_stack.push(new_id);
}
fn leave_element(
&mut self,
_engine_state: &EngineState,
element: &PipelineElement,
result: &Result<PipelineData, ShellError>,
) {
if self.depth > self.max_depth {
return;
}
let element_span = element.expr.span;
let out_opt = self.collect_values.then(|| match result {
Ok(pipeline_data) => match pipeline_data {
PipelineData::Value(val, ..) => val.clone(),
PipelineData::ListStream(..) => Value::string("list stream", element_span),
PipelineData::ByteStream(..) => Value::string("byte stream", element_span),
_ => Value::nothing(element_span),
},
Err(e) => Value::error(e.clone(), element_span),
});
let Some(last_element) = self.last_element_mut() else {
eprintln!("Profiler Error: Missing last element.");
return;
};
last_element.duration_sec = last_element.start.elapsed().as_secs_f64();
last_element.element_output = out_opt;
self.element_stack.pop();
}
fn report(&self, engine_state: &EngineState, profiler_span: Span) -> Result<Value, ShellError> {
Ok(Value::list(
collect_data(
engine_state,
self,
ElementId(0),
ElementId(0),
profiler_span,
)?,
profiler_span,
))
}
}
fn profiler_error(msg: impl Into<String>, span: Span) -> ShellError {
ShellError::GenericError {
error: "Profiler Error".to_string(),
msg: msg.into(),
span: Some(span),
help: None,
inner: vec![],
}
}
fn expr_to_string(engine_state: &EngineState, expr: &Expr) -> String {
match expr {
Expr::Binary(_) => "binary".to_string(),
Expr::BinaryOp(_, _, _) => "binary operation".to_string(),
Expr::Block(_) => "block".to_string(),
Expr::Bool(_) => "bool".to_string(),
Expr::Call(call) => {
let decl = engine_state.get_decl(call.decl_id);
if decl.name() == "collect" && call.head == Span::new(0, 0) {
"call (implicit collect)"
} else {
"call"
}
.to_string()
}
Expr::CellPath(_) => "cell path".to_string(),
Expr::Closure(_) => "closure".to_string(),
Expr::DateTime(_) => "datetime".to_string(),
Expr::Directory(_, _) => "directory".to_string(),
Expr::ExternalCall(_, _) => "external call".to_string(),
Expr::Filepath(_, _) => "filepath".to_string(),
Expr::Float(_) => "float".to_string(),
Expr::FullCellPath(full_cell_path) => {
let head = expr_to_string(engine_state, &full_cell_path.head.expr);
format!("full cell path ({head})")
}
Expr::Garbage => "garbage".to_string(),
Expr::GlobPattern(_, _) => "glob pattern".to_string(),
Expr::ImportPattern(_) => "import pattern".to_string(),
Expr::Int(_) => "int".to_string(),
Expr::Keyword(_) => "keyword".to_string(),
Expr::List(_) => "list".to_string(),
Expr::MatchBlock(_) => "match block".to_string(),
Expr::Nothing => "nothing".to_string(),
Expr::Operator(_) => "operator".to_string(),
Expr::Overlay(_) => "overlay".to_string(),
Expr::Range(_) => "range".to_string(),
Expr::Record(_) => "record".to_string(),
Expr::RowCondition(_) => "row condition".to_string(),
Expr::Signature(_) => "signature".to_string(),
Expr::String(_) | Expr::RawString(_) => "string".to_string(),
Expr::StringInterpolation(_) => "string interpolation".to_string(),
Expr::GlobInterpolation(_, _) => "glob interpolation".to_string(),
Expr::Subexpression(_) => "subexpression".to_string(),
Expr::Table(_) => "table".to_string(),
Expr::UnaryNot(_) => "unary not".to_string(),
Expr::ValueWithUnit(_) => "value with unit".to_string(),
Expr::Var(_) => "var".to_string(),
Expr::VarDecl(_) => "var decl".to_string(),
}
}
// Find a file name and a line number (indexed from 1) of a span
fn find_file_of_span(engine_state: &EngineState, span: Span) -> Option<(&str, usize)> {
for file in engine_state.files() {
if file.covered_span.contains_span(span) {
// count the number of lines between file start and the searched span start
let chunk =
engine_state.get_span_contents(Span::new(file.covered_span.start, span.start));
let nlines = chunk.lines().count();
// account for leading part of current line being counted as a separate line
let line_num = if chunk.last() == Some(&b'\n') {
nlines + 1
} else {
nlines
};
// first line has no previous line, clamp up to `1`
let line_num = usize::max(line_num, 1);
return Some((&file.name, line_num));
}
}
None
}
fn collect_data(
engine_state: &EngineState,
profiler: &Profiler,
element_id: ElementId,
parent_id: ElementId,
profiler_span: Span,
) -> Result<Vec<Value>, ShellError> {
let element = &profiler.elements[element_id.0];
let mut row = record! {
"depth" => Value::int(element.depth, profiler_span),
"id" => Value::int(element_id.0 as i64, profiler_span),
"parent_id" => Value::int(parent_id.0 as i64, profiler_span),
};
if profiler.collect_lines {
if let Some((fname, line_num)) = find_file_of_span(engine_state, element.element_span) {
row.push("file", Value::string(fname, profiler_span));
row.push("line", Value::int(line_num as i64, profiler_span));
} else {
row.push("file", Value::nothing(profiler_span));
row.push("line", Value::nothing(profiler_span));
}
}
if profiler.collect_spans {
let span_start = i64::try_from(element.element_span.start)
.map_err(|_| profiler_error("error converting span start to i64", profiler_span))?;
let span_end = i64::try_from(element.element_span.end)
.map_err(|_| profiler_error("error converting span end to i64", profiler_span))?;
row.push(
"span",
Value::record(
record! {
"start" => Value::int(span_start, profiler_span),
"end" => Value::int(span_end, profiler_span),
},
profiler_span,
),
);
}
if profiler.collect_source {
let val = String::from_utf8_lossy(engine_state.get_span_contents(element.element_span));
let val = val.trim();
let nlines = val.lines().count();
let fragment = if profiler.collect_expanded_source {
val.to_string()
} else {
let mut first_line = val.lines().next().unwrap_or("").to_string();
if nlines > 1 {
first_line.push_str(" ...");
}
first_line
};
row.push("source", Value::string(fragment, profiler_span));
}
if let Some(expr_string) = &element.expr {
row.push("expr", Value::string(expr_string.clone(), profiler_span));
}
if let Some(val) = &element.element_output {
row.push("output", val.clone());
}
row.push(
"duration_ms",
Value::float(element.duration_sec * 1e3, profiler_span),
);
let mut rows = vec![Value::record(row, profiler_span)];
for child in &element.children {
let child_rows = collect_data(engine_state, profiler, *child, element_id, profiler_span)?;
rows.extend(child_rows);
}
Ok(rows)
}