Ian Manske c747ec75c9
Add command_prelude module (#12291)
# Description
When implementing a `Command`, one must also import all the types
present in the function signatures for `Command`. This makes it so that
we often import the same set of types in each command implementation
file. E.g., something like this:
```rust
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
    record, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData,
    ShellError, Signature, Span, Type, Value,
};
```

This PR adds the `nu_engine::command_prelude` module which contains the
necessary and commonly used types to implement a `Command`:
```rust
// command_prelude.rs
pub use crate::CallExt;
pub use nu_protocol::{
    ast::{Call, CellPath},
    engine::{Command, EngineState, Stack},
    record, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, IntoSpanned,
    PipelineData, Record, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value,
};
```

This should reduce the boilerplate needed to implement a command and
also gives us a place to track the breadth of the `Command` API. I tried
to be conservative with what went into the prelude modules, since it
might be hard/annoying to remove items from the prelude in the future.
Let me know if something should be included or excluded.
2024-03-26 21:17:30 +00:00

280 lines
9.6 KiB
Rust

use crate::dataframe::values::{Column, NuDataFrame};
use nu_engine::command_prelude::*;
use polars::{
chunked_array::ChunkedArray,
prelude::{
AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray,
QuantileInterpolOptions, Series, StringType,
},
};
#[derive(Clone)]
pub struct Summary;
impl Command for Summary {
fn name(&self) -> &str {
"dfr summary"
}
fn usage(&self) -> &str {
"For a dataframe, produces descriptive statistics (summary statistics) for its numeric columns."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.category(Category::Custom("dataframe".into()))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.named(
"quantiles",
SyntaxShape::Table(vec![]),
"provide optional quantiles",
Some('q'),
)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "list dataframe descriptives",
example: "[[a b]; [1 1] [1 1]] | dfr into-df | dfr summary",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"descriptor".to_string(),
vec![
Value::test_string("count"),
Value::test_string("sum"),
Value::test_string("mean"),
Value::test_string("median"),
Value::test_string("std"),
Value::test_string("min"),
Value::test_string("25%"),
Value::test_string("50%"),
Value::test_string("75%"),
Value::test_string("max"),
],
),
Column::new(
"a (i64)".to_string(),
vec![
Value::test_float(2.0),
Value::test_float(2.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(0.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
],
),
Column::new(
"b (i64)".to_string(),
vec![
Value::test_float(2.0),
Value::test_float(2.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(0.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let quantiles: Option<Vec<Value>> = call.get_flag(engine_state, stack, "quantiles")?;
let quantiles = quantiles.map(|values| {
values
.iter()
.map(|value| {
let span = value.span();
match value {
Value::Float { val, .. } => {
if (&0.0..=&1.0).contains(&val) {
Ok(*val)
} else {
Err(ShellError::GenericError {
error: "Incorrect value for quantile".into(),
msg: "value should be between 0 and 1".into(),
span: Some(span),
help: None,
inner: vec![],
})
}
}
Value::Error { error, .. } => Err(*error.clone()),
_ => Err(ShellError::GenericError {
error: "Incorrect value for quantile".into(),
msg: "value should be a float".into(),
span: Some(span),
help: None,
inner: vec![],
}),
}
})
.collect::<Result<Vec<f64>, ShellError>>()
});
let quantiles = match quantiles {
Some(quantiles) => quantiles?,
None => vec![0.25, 0.50, 0.75],
};
let mut quantiles_labels = quantiles
.iter()
.map(|q| Some(format!("{}%", q * 100.0)))
.collect::<Vec<Option<String>>>();
let mut labels = vec![
Some("count".to_string()),
Some("sum".to_string()),
Some("mean".to_string()),
Some("median".to_string()),
Some("std".to_string()),
Some("min".to_string()),
];
labels.append(&mut quantiles_labels);
labels.push(Some("max".to_string()));
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let names = ChunkedArray::<StringType>::from_slice_options("descriptor", &labels).into_series();
let head = std::iter::once(names);
let tail = df
.as_ref()
.get_columns()
.iter()
.filter(|col| !matches!(col.dtype(), &DataType::Object("object", _)))
.map(|col| {
let count = col.len() as f64;
let sum = col.sum_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mean = match col.mean_as_series().get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
};
let median = match col.median_as_series() {
Ok(v) => match v.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
},
_ => None,
};
let std = match col.std_as_series(0) {
Ok(v) => match v.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
},
_ => None,
};
let min = col.min_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mut quantiles = quantiles
.clone()
.into_iter()
.map(|q| {
col.quantile_as_series(q, QuantileInterpolOptions::default())
.ok()
.and_then(|ca| ca.cast(&DataType::Float64).ok())
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
})
.collect::<Vec<Option<f64>>>();
let max = col.max_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mut descriptors = vec![Some(count), sum, mean, median, std, min];
descriptors.append(&mut quantiles);
descriptors.push(max);
let name = format!("{} ({})", col.name(), col.dtype());
ChunkedArray::<Float64Type>::from_slice_options(&name, &descriptors).into_series()
});
let res = head.chain(tail).collect::<Vec<Series>>();
DataFrame::new(res)
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(Summary {})])
}
}