LazyRecord (#7619)

This is an attempt to implement a new `Value::LazyRecord` variant for
performance reasons.

`LazyRecord` is like a regular `Record`, but it's possible to access
individual columns without evaluating other columns. I've implemented
`LazyRecord` for the special `$nu` variable; accessing `$nu` is
relatively slow because of all the information in `scope`, and [`$nu`
accounts for about 2/3 of Nu's startup time on
Linux](https://github.com/nushell/nushell/issues/6677#issuecomment-1364618122).

### Benchmarks

I ran some benchmarks on my desktop (Linux, 12900K) and the results are
very pleasing.

Nu's time to start up and run a command (`cargo build --release;
hyperfine 'target/release/nu -c "echo \"Hello, world!\""' --shell=none
--warmup 10`) goes from **8.8ms to 3.2ms, about 2.8x faster**.

Tests are also much faster! Running `cargo nextest` (with our very slow
`proptest` tests disabled) goes from **7.2s to 4.4s (1.6x faster)**,
because most tests involve launching a new instance of Nu.

### Design (updated)

I've added a new `LazyRecord` trait and added a `Value` variant wrapping
those trait objects, much like `CustomValue`. `LazyRecord`
implementations must implement these 2 functions:

```rust
// All column names
fn column_names(&self) -> Vec<&'static str>;

// Get 1 specific column value
fn get_column_value(&self, column: &str) -> Result<Value, ShellError>;
 ```

### Serializability

`Value` variants must implement `Serializable` and `Deserializable`, which poses some problems because I want to use unserializable things like `EngineState` in `LazyRecord`s. To work around this, I basically lie to the type system:

1. Add `#[typetag::serde(tag = "type")]` to `LazyRecord` to make it serializable
2. Any unserializable fields in `LazyRecord` implementations get marked with `#[serde(skip)]`
3. At the point where a `LazyRecord` normally would get serialized and sent to a plugin, I instead collect it into a regular `Value::Record` (which can be serialized)
This commit is contained in:
Reilly Wood
2023-01-18 19:27:26 -08:00
committed by GitHub
parent be32aeee70
commit 3b5172a8fa
19 changed files with 443 additions and 147 deletions

View File

@ -0,0 +1,34 @@
use crate::{ShellError, Span, Value};
use std::fmt;
// Trait definition for a lazy record (where columns are evaluated on-demand)
// typetag is needed to make this implement Serialize+Deserialize... even though we should never actually serialize a LazyRecord.
// To serialize a LazyRecord, collect it into a Value::Record with collect() first.
#[typetag::serde(tag = "type")]
pub trait LazyRecord: fmt::Debug + Send + Sync {
// All column names
fn column_names(&self) -> Vec<&'static str>;
// Get 1 specific column value
fn get_column_value(&self, column: &str) -> Result<Value, ShellError>;
fn span(&self) -> Span;
// Convert the lazy record into a regular Value::Record by collecting all its columns
fn collect(&self) -> Result<Value, ShellError> {
let mut cols = vec![];
let mut vals = vec![];
for column in self.column_names() {
cols.push(column.into());
let val = self.get_column_value(column)?;
vals.push(val);
}
Ok(Value::Record {
cols,
vals,
span: self.span(),
})
}
}

View File

@ -1,6 +1,7 @@
mod custom_value;
mod from;
mod from_value;
mod lazy_record;
mod range;
mod stream;
mod unit;
@ -17,6 +18,7 @@ pub use custom_value::CustomValue;
use fancy_regex::Regex;
pub use from_value::FromValue;
use indexmap::map::IndexMap;
pub use lazy_record::LazyRecord;
use nu_utils::get_system_locale;
use num_format::ToFormattedString;
pub use range::*;
@ -101,10 +103,16 @@ pub enum Value {
val: CellPath,
span: Span,
},
#[serde(skip_serializing)]
CustomValue {
val: Box<dyn CustomValue>,
span: Span,
},
#[serde(skip_serializing)]
LazyRecord {
val: Box<dyn LazyRecord>,
span: Span,
},
}
impl Clone for Value {
@ -138,6 +146,13 @@ impl Clone for Value {
vals: vals.clone(),
span: *span,
},
Value::LazyRecord { val, .. } => {
match val.collect() {
Ok(val) => val,
// this is a bit weird, but because clone() is infallible...
Err(error) => Value::Error { error },
}
}
Value::List { vals, span } => Value::List {
vals: vals.clone(),
span: *span,
@ -350,6 +365,7 @@ impl Value {
Value::Binary { span, .. } => Ok(*span),
Value::CellPath { span, .. } => Ok(*span),
Value::CustomValue { span, .. } => Ok(*span),
Value::LazyRecord { span, .. } => Ok(*span),
}
}
@ -372,6 +388,7 @@ impl Value {
Value::Range { span, .. } => *span = new_span,
Value::String { span, .. } => *span = new_span,
Value::Record { span, .. } => *span = new_span,
Value::LazyRecord { span, .. } => *span = new_span,
Value::List { span, .. } => *span = new_span,
Value::Closure { span, .. } => *span = new_span,
Value::Block { span, .. } => *span = new_span,
@ -426,6 +443,10 @@ impl Value {
None => Type::List(Box::new(ty.unwrap_or(Type::Any))),
}
}
Value::LazyRecord { val, .. } => match val.collect() {
Ok(val) => val.get_type(),
Err(..) => Type::Error,
},
Value::Nothing { .. } => Type::Nothing,
Value::Block { .. } => Type::Block,
Value::Closure { .. } => Type::Closure,
@ -512,6 +533,13 @@ impl Value {
.collect::<Vec<_>>()
.join(separator)
),
Value::LazyRecord { val, .. } => {
let collected = match val.collect() {
Ok(val) => val,
Err(error) => Value::Error { error },
};
collected.into_string(separator, config)
}
Value::Block { val, .. } => format!("<Block {}>", val),
Value::Closure { val, .. } => format!("<Closure {}>", val),
Value::Nothing { .. } => String::new(),
@ -556,6 +584,10 @@ impl Value {
cols.len(),
if cols.len() == 1 { "" } else { "s" }
),
Value::LazyRecord { val, .. } => match val.collect() {
Ok(val) => val.into_abbreviated_string(config),
Err(error) => format!("{:?}", error),
},
Value::Block { val, .. } => format!("<Block {}>", val),
Value::Closure { val, .. } => format!("<Closure {}>", val),
Value::Nothing { .. } => String::new(),
@ -603,6 +635,10 @@ impl Value {
.collect::<Vec<_>>()
.join(separator)
),
Value::LazyRecord { val, .. } => match val.collect() {
Ok(val) => val.debug_string(separator, config),
Err(error) => format!("{:?}", error),
},
Value::Block { val, .. } => format!("<Block {}>", val),
Value::Closure { val, .. } => format!("<Closure {}>", val),
Value::Nothing { .. } => String::new(),
@ -777,6 +813,30 @@ impl Value {
);
}
}
Value::LazyRecord { val, span } => {
let columns = val.column_names();
if columns.contains(&column_name.as_str()) {
current = val.get_column_value(column_name)?;
} else {
if from_user_input {
if let Some(suggestion) = did_you_mean(&columns, column_name) {
err_or_null!(
ShellError::DidYouMean(suggestion, *origin_span),
*origin_span
);
}
}
err_or_null!(
ShellError::CantFindColumn(
column_name.to_string(),
*origin_span,
*span,
),
*origin_span
);
}
}
// String access of Lists always means Table access.
// Create a List which contains each matching value for contained
// records in the source list.
@ -1567,6 +1627,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Less),
Value::String { .. } => Some(Ordering::Less),
Value::Record { .. } => Some(Ordering::Less),
Value::LazyRecord { .. } => Some(Ordering::Less),
Value::List { .. } => Some(Ordering::Less),
Value::Block { .. } => Some(Ordering::Less),
Value::Closure { .. } => Some(Ordering::Less),
@ -1586,6 +1647,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Less),
Value::String { .. } => Some(Ordering::Less),
Value::Record { .. } => Some(Ordering::Less),
Value::LazyRecord { .. } => Some(Ordering::Less),
Value::List { .. } => Some(Ordering::Less),
Value::Block { .. } => Some(Ordering::Less),
Value::Closure { .. } => Some(Ordering::Less),
@ -1605,6 +1667,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Less),
Value::String { .. } => Some(Ordering::Less),
Value::Record { .. } => Some(Ordering::Less),
Value::LazyRecord { .. } => Some(Ordering::Less),
Value::List { .. } => Some(Ordering::Less),
Value::Block { .. } => Some(Ordering::Less),
Value::Closure { .. } => Some(Ordering::Less),
@ -1624,6 +1687,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Less),
Value::String { .. } => Some(Ordering::Less),
Value::Record { .. } => Some(Ordering::Less),
Value::LazyRecord { .. } => Some(Ordering::Less),
Value::List { .. } => Some(Ordering::Less),
Value::Block { .. } => Some(Ordering::Less),
Value::Closure { .. } => Some(Ordering::Less),
@ -1643,6 +1707,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Less),
Value::String { .. } => Some(Ordering::Less),
Value::Record { .. } => Some(Ordering::Less),
Value::LazyRecord { .. } => Some(Ordering::Less),
Value::List { .. } => Some(Ordering::Less),
Value::Block { .. } => Some(Ordering::Less),
Value::Closure { .. } => Some(Ordering::Less),
@ -1662,6 +1727,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Less),
Value::String { .. } => Some(Ordering::Less),
Value::Record { .. } => Some(Ordering::Less),
Value::LazyRecord { .. } => Some(Ordering::Less),
Value::List { .. } => Some(Ordering::Less),
Value::Block { .. } => Some(Ordering::Less),
Value::Closure { .. } => Some(Ordering::Less),
@ -1681,6 +1747,7 @@ impl PartialOrd for Value {
Value::Range { val: rhs, .. } => lhs.partial_cmp(rhs),
Value::String { .. } => Some(Ordering::Less),
Value::Record { .. } => Some(Ordering::Less),
Value::LazyRecord { .. } => Some(Ordering::Less),
Value::List { .. } => Some(Ordering::Less),
Value::Block { .. } => Some(Ordering::Less),
Value::Closure { .. } => Some(Ordering::Less),
@ -1700,6 +1767,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Greater),
Value::String { val: rhs, .. } => lhs.partial_cmp(rhs),
Value::Record { .. } => Some(Ordering::Less),
Value::LazyRecord { .. } => Some(Ordering::Less),
Value::List { .. } => Some(Ordering::Less),
Value::Block { .. } => Some(Ordering::Less),
Value::Closure { .. } => Some(Ordering::Less),
@ -1745,6 +1813,10 @@ impl PartialOrd for Value {
result
}
}
Value::LazyRecord { val, .. } => match val.collect() {
Ok(rhs) => self.partial_cmp(&rhs),
Err(_) => None,
},
Value::List { .. } => Some(Ordering::Less),
Value::Block { .. } => Some(Ordering::Less),
Value::Closure { .. } => Some(Ordering::Less),
@ -1764,6 +1836,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Greater),
Value::String { .. } => Some(Ordering::Greater),
Value::Record { .. } => Some(Ordering::Greater),
Value::LazyRecord { .. } => Some(Ordering::Greater),
Value::List { vals: rhs, .. } => lhs.partial_cmp(rhs),
Value::Block { .. } => Some(Ordering::Less),
Value::Closure { .. } => Some(Ordering::Less),
@ -1784,6 +1857,7 @@ impl PartialOrd for Value {
Value::String { .. } => Some(Ordering::Greater),
Value::Record { .. } => Some(Ordering::Greater),
Value::List { .. } => Some(Ordering::Greater),
Value::LazyRecord { .. } => Some(Ordering::Greater),
Value::Block { val: rhs, .. } => lhs.partial_cmp(rhs),
Value::Closure { .. } => Some(Ordering::Less),
Value::Nothing { .. } => Some(Ordering::Less),
@ -1802,6 +1876,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Greater),
Value::String { .. } => Some(Ordering::Greater),
Value::Record { .. } => Some(Ordering::Greater),
Value::LazyRecord { .. } => Some(Ordering::Greater),
Value::List { .. } => Some(Ordering::Greater),
Value::Block { .. } => Some(Ordering::Greater),
Value::Closure { val: rhs, .. } => lhs.partial_cmp(rhs),
@ -1821,6 +1896,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Greater),
Value::String { .. } => Some(Ordering::Greater),
Value::Record { .. } => Some(Ordering::Greater),
Value::LazyRecord { .. } => Some(Ordering::Greater),
Value::List { .. } => Some(Ordering::Greater),
Value::Block { .. } => Some(Ordering::Greater),
Value::Closure { .. } => Some(Ordering::Greater),
@ -1840,6 +1916,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Greater),
Value::String { .. } => Some(Ordering::Greater),
Value::Record { .. } => Some(Ordering::Greater),
Value::LazyRecord { .. } => Some(Ordering::Greater),
Value::List { .. } => Some(Ordering::Greater),
Value::Block { .. } => Some(Ordering::Greater),
Value::Closure { .. } => Some(Ordering::Greater),
@ -1859,6 +1936,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Greater),
Value::String { .. } => Some(Ordering::Greater),
Value::Record { .. } => Some(Ordering::Greater),
Value::LazyRecord { .. } => Some(Ordering::Greater),
Value::List { .. } => Some(Ordering::Greater),
Value::Block { .. } => Some(Ordering::Greater),
Value::Closure { .. } => Some(Ordering::Greater),
@ -1878,6 +1956,7 @@ impl PartialOrd for Value {
Value::Range { .. } => Some(Ordering::Greater),
Value::String { .. } => Some(Ordering::Greater),
Value::Record { .. } => Some(Ordering::Greater),
Value::LazyRecord { .. } => Some(Ordering::Greater),
Value::List { .. } => Some(Ordering::Greater),
Value::Block { .. } => Some(Ordering::Greater),
Value::Closure { .. } => Some(Ordering::Greater),
@ -1888,6 +1967,10 @@ impl PartialOrd for Value {
Value::CustomValue { .. } => Some(Ordering::Less),
},
(Value::CustomValue { val: lhs, .. }, rhs) => lhs.partial_cmp(rhs),
(Value::LazyRecord { val, .. }, rhs) => match val.collect() {
Ok(val) => val.partial_cmp(rhs),
Err(_) => None,
},
}
}
}