nushell/crates/nu-engine/src/closure_eval.rs
Devyn Cairns 1a5bf2447a
Use Arc for environment variables on the stack (#13333)
# Description

This is another easy performance lift that just changes `env_vars` and
`env_hidden` on `Stack` to use `Arc`. I noticed that these were being
cloned on essentially every closure invocation during captures
gathering, so we're paying the cost for all of that even when we don't
change anything. On top of that, for `env_vars`, there's actually an
entirely fresh `HashMap` created for each child scope, so it's highly
unlikely that we'll modify the parent ones.

Uses `Arc::make_mut` instead to take care of things when we need to
mutate something, and most of the time nothing has to be cloned at all.

# Benchmarks

The benefits are greater the more calls there are to env-cloning
functions like `captures_to_stack()`. Calling custom commands in a loop
is basically best case for a performance improvement. Plain `each` with
a literal block isn't so badly affected because the stack is set up
once.

## random_bytes.nu

```nushell
use std bench
do {
  const SCRIPT = ../nu_scripts/benchmarks/random-bytes.nu
  let before_change = bench { nu $SCRIPT }
  let after_change = bench { target/release/nu $SCRIPT }
  {
    before: ($before_change | reject times),
    after: ($after_change | reject times)
  }
}
```

```
╭────────┬──────────────────────────────╮
│        │ ╭──────┬───────────────────╮ │
│ before │ │ mean │ 603ms 759µs 727ns │ │
│        │ │ min  │ 593ms 298µs 167ns │ │
│        │ │ max  │ 648ms 612µs 291ns │ │
│        │ │ std  │ 9ms 335µs 251ns   │ │
│        │ ╰──────┴───────────────────╯ │
│        │ ╭──────┬───────────────────╮ │
│ after  │ │ mean │ 518ms 400µs 557ns │ │
│        │ │ min  │ 507ms 762µs 583ns │ │
│        │ │ max  │ 566ms 695µs 166ns │ │
│        │ │ std  │ 9ms 554µs 767ns   │ │
│        │ ╰──────┴───────────────────╯ │
╰────────┴──────────────────────────────╯
```

## gradient_benchmark_no_check.nu

```nushell
use std bench
do {
  const SCRIPT = ../nu_scripts/benchmarks/gradient_benchmark_no_check.nu
  let before_change = bench { nu $SCRIPT }
  let after_change = bench { target/release/nu $SCRIPT }
  {
    before: ($before_change | reject times),
    after: ($after_change | reject times)
  }
}
```

```
╭────────┬──────────────────────────────╮
│        │ ╭──────┬───────────────────╮ │
│ before │ │ mean │ 146ms 543µs 380ns │ │
│        │ │ min  │ 142ms 416µs 166ns │ │
│        │ │ max  │ 189ms 595µs       │ │
│        │ │ std  │ 7ms 140µs 342ns   │ │
│        │ ╰──────┴───────────────────╯ │
│        │ ╭──────┬───────────────────╮ │
│ after  │ │ mean │ 134ms 211µs 678ns │ │
│        │ │ min  │ 132ms 433µs 125ns │ │
│        │ │ max  │ 135ms 722µs 583ns │ │
│        │ │ std  │ 793µs 134ns       │ │
│        │ ╰──────┴───────────────────╯ │
╰────────┴──────────────────────────────╯
```

# User-Facing Changes
Better performance, particularly for custom commands, especially if
there are a lot of environment variables. Nothing else.

# Tests + Formatting
All passing.
2024-07-10 17:34:50 -07:00

237 lines
8.5 KiB
Rust

use crate::{
eval_block_with_early_return, get_eval_block_with_early_return, EvalBlockWithEarlyReturnFn,
};
use nu_protocol::{
ast::Block,
debugger::{WithDebug, WithoutDebug},
engine::{Closure, EngineState, EnvVars, Stack},
IntoPipelineData, PipelineData, ShellError, Value,
};
use std::{
borrow::Cow,
collections::{HashMap, HashSet},
sync::Arc,
};
fn eval_fn(debug: bool) -> EvalBlockWithEarlyReturnFn {
if debug {
eval_block_with_early_return::<WithDebug>
} else {
eval_block_with_early_return::<WithoutDebug>
}
}
/// [`ClosureEval`] is used to repeatedly evaluate a closure with different values/inputs.
///
/// [`ClosureEval`] has a builder API.
/// It is first created via [`ClosureEval::new`],
/// then has arguments added via [`ClosureEval::add_arg`],
/// and then can be run using [`ClosureEval::run_with_input`].
///
/// ```no_run
/// # use nu_protocol::{PipelineData, Value};
/// # use nu_engine::ClosureEval;
/// # let engine_state = unimplemented!();
/// # let stack = unimplemented!();
/// # let closure = unimplemented!();
/// let mut closure = ClosureEval::new(engine_state, stack, closure);
/// let iter = Vec::<Value>::new()
/// .into_iter()
/// .map(move |value| closure.add_arg(value).run_with_input(PipelineData::Empty));
/// ```
///
/// Many closures follow a simple, common scheme where the pipeline input and the first argument are the same value.
/// In this case, use [`ClosureEval::run_with_value`]:
///
/// ```no_run
/// # use nu_protocol::{PipelineData, Value};
/// # use nu_engine::ClosureEval;
/// # let engine_state = unimplemented!();
/// # let stack = unimplemented!();
/// # let closure = unimplemented!();
/// let mut closure = ClosureEval::new(engine_state, stack, closure);
/// let iter = Vec::<Value>::new()
/// .into_iter()
/// .map(move |value| closure.run_with_value(value));
/// ```
///
/// Environment isolation and other cleanup is handled by [`ClosureEval`],
/// so nothing needs to be done following [`ClosureEval::run_with_input`] or [`ClosureEval::run_with_value`].
pub struct ClosureEval {
engine_state: EngineState,
stack: Stack,
block: Arc<Block>,
arg_index: usize,
env_vars: Vec<Arc<EnvVars>>,
env_hidden: Arc<HashMap<String, HashSet<String>>>,
eval: EvalBlockWithEarlyReturnFn,
}
impl ClosureEval {
/// Create a new [`ClosureEval`].
pub fn new(engine_state: &EngineState, stack: &Stack, closure: Closure) -> Self {
let engine_state = engine_state.clone();
let stack = stack.captures_to_stack(closure.captures);
let block = engine_state.get_block(closure.block_id).clone();
let env_vars = stack.env_vars.clone();
let env_hidden = stack.env_hidden.clone();
let eval = get_eval_block_with_early_return(&engine_state);
Self {
engine_state,
stack,
block,
arg_index: 0,
env_vars,
env_hidden,
eval,
}
}
/// Sets whether to enable debugging when evaluating the closure.
///
/// By default, this is controlled by the [`EngineState`] used to create this [`ClosureEval`].
pub fn debug(&mut self, debug: bool) -> &mut Self {
self.eval = eval_fn(debug);
self
}
fn try_add_arg(&mut self, value: Cow<Value>) {
if let Some(var_id) = self
.block
.signature
.get_positional(self.arg_index)
.and_then(|var| var.var_id)
{
self.stack.add_var(var_id, value.into_owned());
self.arg_index += 1;
}
}
/// Add an argument [`Value`] to the closure.
///
/// Multiple [`add_arg`](Self::add_arg) calls can be chained together,
/// but make sure that arguments are added based on their positional order.
pub fn add_arg(&mut self, value: Value) -> &mut Self {
self.try_add_arg(Cow::Owned(value));
self
}
/// Run the closure, passing the given [`PipelineData`] as input.
///
/// Any arguments should be added beforehand via [`add_arg`](Self::add_arg).
pub fn run_with_input(&mut self, input: PipelineData) -> Result<PipelineData, ShellError> {
self.arg_index = 0;
self.stack.with_env(&self.env_vars, &self.env_hidden);
(self.eval)(&self.engine_state, &mut self.stack, &self.block, input)
}
/// Run the closure using the given [`Value`] as both the pipeline input and the first argument.
///
/// Using this function after or in combination with [`add_arg`](Self::add_arg) is most likely an error.
/// This function is equivalent to `self.add_arg(value)` followed by `self.run_with_input(value.into_pipeline_data())`.
pub fn run_with_value(&mut self, value: Value) -> Result<PipelineData, ShellError> {
self.try_add_arg(Cow::Borrowed(&value));
self.run_with_input(value.into_pipeline_data())
}
}
/// [`ClosureEvalOnce`] is used to evaluate a closure a single time.
///
/// [`ClosureEvalOnce`] has a builder API.
/// It is first created via [`ClosureEvalOnce::new`],
/// then has arguments added via [`ClosureEvalOnce::add_arg`],
/// and then can be run using [`ClosureEvalOnce::run_with_input`].
///
/// ```no_run
/// # use nu_protocol::{ListStream, PipelineData, PipelineIterator};
/// # use nu_engine::ClosureEvalOnce;
/// # let engine_state = unimplemented!();
/// # let stack = unimplemented!();
/// # let closure = unimplemented!();
/// # let value = unimplemented!();
/// let result = ClosureEvalOnce::new(engine_state, stack, closure)
/// .add_arg(value)
/// .run_with_input(PipelineData::Empty);
/// ```
///
/// Many closures follow a simple, common scheme where the pipeline input and the first argument are the same value.
/// In this case, use [`ClosureEvalOnce::run_with_value`]:
///
/// ```no_run
/// # use nu_protocol::{PipelineData, PipelineIterator};
/// # use nu_engine::ClosureEvalOnce;
/// # let engine_state = unimplemented!();
/// # let stack = unimplemented!();
/// # let closure = unimplemented!();
/// # let value = unimplemented!();
/// let result = ClosureEvalOnce::new(engine_state, stack, closure).run_with_value(value);
/// ```
pub struct ClosureEvalOnce<'a> {
engine_state: &'a EngineState,
stack: Stack,
block: &'a Block,
arg_index: usize,
eval: EvalBlockWithEarlyReturnFn,
}
impl<'a> ClosureEvalOnce<'a> {
/// Create a new [`ClosureEvalOnce`].
pub fn new(engine_state: &'a EngineState, stack: &Stack, closure: Closure) -> Self {
let block = engine_state.get_block(closure.block_id);
let eval = get_eval_block_with_early_return(engine_state);
Self {
engine_state,
stack: stack.captures_to_stack(closure.captures),
block,
arg_index: 0,
eval,
}
}
/// Sets whether to enable debugging when evaluating the closure.
///
/// By default, this is controlled by the [`EngineState`] used to create this [`ClosureEvalOnce`].
pub fn debug(mut self, debug: bool) -> Self {
self.eval = eval_fn(debug);
self
}
fn try_add_arg(&mut self, value: Cow<Value>) {
if let Some(var_id) = self
.block
.signature
.get_positional(self.arg_index)
.and_then(|var| var.var_id)
{
self.stack.add_var(var_id, value.into_owned());
self.arg_index += 1;
}
}
/// Add an argument [`Value`] to the closure.
///
/// Multiple [`add_arg`](Self::add_arg) calls can be chained together,
/// but make sure that arguments are added based on their positional order.
pub fn add_arg(mut self, value: Value) -> Self {
self.try_add_arg(Cow::Owned(value));
self
}
/// Run the closure, passing the given [`PipelineData`] as input.
///
/// Any arguments should be added beforehand via [`add_arg`](Self::add_arg).
pub fn run_with_input(mut self, input: PipelineData) -> Result<PipelineData, ShellError> {
(self.eval)(self.engine_state, &mut self.stack, self.block, input)
}
/// Run the closure using the given [`Value`] as both the pipeline input and the first argument.
///
/// Using this function after or in combination with [`add_arg`](Self::add_arg) is most likely an error.
/// This function is equivalent to `self.add_arg(value)` followed by `self.run_with_input(value.into_pipeline_data())`.
pub fn run_with_value(mut self, value: Value) -> Result<PipelineData, ShellError> {
self.try_add_arg(Cow::Borrowed(&value));
self.run_with_input(value.into_pipeline_data())
}
}