mirror of
https://github.com/nushell/nushell.git
synced 2025-05-30 14:50:02 +02:00
# Description When implementing a `Command`, one must also import all the types present in the function signatures for `Command`. This makes it so that we often import the same set of types in each command implementation file. E.g., something like this: ```rust use nu_protocol::ast::Call; use nu_protocol::engine::{Command, EngineState, Stack}; use nu_protocol::{ record, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData, ShellError, Signature, Span, Type, Value, }; ``` This PR adds the `nu_engine::command_prelude` module which contains the necessary and commonly used types to implement a `Command`: ```rust // command_prelude.rs pub use crate::CallExt; pub use nu_protocol::{ ast::{Call, CellPath}, engine::{Command, EngineState, Stack}, record, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, IntoSpanned, PipelineData, Record, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, }; ``` This should reduce the boilerplate needed to implement a command and also gives us a place to track the breadth of the `Command` API. I tried to be conservative with what went into the prelude modules, since it might be hard/annoying to remove items from the prelude in the future. Let me know if something should be included or excluded.
280 lines
9.6 KiB
Rust
280 lines
9.6 KiB
Rust
use crate::dataframe::values::{Column, NuDataFrame};
|
|
use nu_engine::command_prelude::*;
|
|
|
|
use polars::{
|
|
chunked_array::ChunkedArray,
|
|
prelude::{
|
|
AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray,
|
|
QuantileInterpolOptions, Series, StringType,
|
|
},
|
|
};
|
|
|
|
#[derive(Clone)]
|
|
pub struct Summary;
|
|
|
|
impl Command for Summary {
|
|
fn name(&self) -> &str {
|
|
"dfr summary"
|
|
}
|
|
|
|
fn usage(&self) -> &str {
|
|
"For a dataframe, produces descriptive statistics (summary statistics) for its numeric columns."
|
|
}
|
|
|
|
fn signature(&self) -> Signature {
|
|
Signature::build(self.name())
|
|
.category(Category::Custom("dataframe".into()))
|
|
.input_output_type(
|
|
Type::Custom("dataframe".into()),
|
|
Type::Custom("dataframe".into()),
|
|
)
|
|
.named(
|
|
"quantiles",
|
|
SyntaxShape::Table(vec![]),
|
|
"provide optional quantiles",
|
|
Some('q'),
|
|
)
|
|
}
|
|
|
|
fn examples(&self) -> Vec<Example> {
|
|
vec![Example {
|
|
description: "list dataframe descriptives",
|
|
example: "[[a b]; [1 1] [1 1]] | dfr into-df | dfr summary",
|
|
result: Some(
|
|
NuDataFrame::try_from_columns(
|
|
vec![
|
|
Column::new(
|
|
"descriptor".to_string(),
|
|
vec![
|
|
Value::test_string("count"),
|
|
Value::test_string("sum"),
|
|
Value::test_string("mean"),
|
|
Value::test_string("median"),
|
|
Value::test_string("std"),
|
|
Value::test_string("min"),
|
|
Value::test_string("25%"),
|
|
Value::test_string("50%"),
|
|
Value::test_string("75%"),
|
|
Value::test_string("max"),
|
|
],
|
|
),
|
|
Column::new(
|
|
"a (i64)".to_string(),
|
|
vec![
|
|
Value::test_float(2.0),
|
|
Value::test_float(2.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(0.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(1.0),
|
|
],
|
|
),
|
|
Column::new(
|
|
"b (i64)".to_string(),
|
|
vec![
|
|
Value::test_float(2.0),
|
|
Value::test_float(2.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(0.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(1.0),
|
|
Value::test_float(1.0),
|
|
],
|
|
),
|
|
],
|
|
None,
|
|
)
|
|
.expect("simple df for test should not fail")
|
|
.into_value(Span::test_data()),
|
|
),
|
|
}]
|
|
}
|
|
|
|
fn run(
|
|
&self,
|
|
engine_state: &EngineState,
|
|
stack: &mut Stack,
|
|
call: &Call,
|
|
input: PipelineData,
|
|
) -> Result<PipelineData, ShellError> {
|
|
command(engine_state, stack, call, input)
|
|
}
|
|
}
|
|
|
|
fn command(
|
|
engine_state: &EngineState,
|
|
stack: &mut Stack,
|
|
call: &Call,
|
|
input: PipelineData,
|
|
) -> Result<PipelineData, ShellError> {
|
|
let quantiles: Option<Vec<Value>> = call.get_flag(engine_state, stack, "quantiles")?;
|
|
let quantiles = quantiles.map(|values| {
|
|
values
|
|
.iter()
|
|
.map(|value| {
|
|
let span = value.span();
|
|
match value {
|
|
Value::Float { val, .. } => {
|
|
if (&0.0..=&1.0).contains(&val) {
|
|
Ok(*val)
|
|
} else {
|
|
Err(ShellError::GenericError {
|
|
error: "Incorrect value for quantile".into(),
|
|
msg: "value should be between 0 and 1".into(),
|
|
span: Some(span),
|
|
help: None,
|
|
inner: vec![],
|
|
})
|
|
}
|
|
}
|
|
Value::Error { error, .. } => Err(*error.clone()),
|
|
_ => Err(ShellError::GenericError {
|
|
error: "Incorrect value for quantile".into(),
|
|
msg: "value should be a float".into(),
|
|
span: Some(span),
|
|
help: None,
|
|
inner: vec![],
|
|
}),
|
|
}
|
|
})
|
|
.collect::<Result<Vec<f64>, ShellError>>()
|
|
});
|
|
|
|
let quantiles = match quantiles {
|
|
Some(quantiles) => quantiles?,
|
|
None => vec![0.25, 0.50, 0.75],
|
|
};
|
|
|
|
let mut quantiles_labels = quantiles
|
|
.iter()
|
|
.map(|q| Some(format!("{}%", q * 100.0)))
|
|
.collect::<Vec<Option<String>>>();
|
|
let mut labels = vec![
|
|
Some("count".to_string()),
|
|
Some("sum".to_string()),
|
|
Some("mean".to_string()),
|
|
Some("median".to_string()),
|
|
Some("std".to_string()),
|
|
Some("min".to_string()),
|
|
];
|
|
labels.append(&mut quantiles_labels);
|
|
labels.push(Some("max".to_string()));
|
|
|
|
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
|
|
|
let names = ChunkedArray::<StringType>::from_slice_options("descriptor", &labels).into_series();
|
|
|
|
let head = std::iter::once(names);
|
|
|
|
let tail = df
|
|
.as_ref()
|
|
.get_columns()
|
|
.iter()
|
|
.filter(|col| !matches!(col.dtype(), &DataType::Object("object", _)))
|
|
.map(|col| {
|
|
let count = col.len() as f64;
|
|
|
|
let sum = col.sum_as_series().ok().and_then(|series| {
|
|
series
|
|
.cast(&DataType::Float64)
|
|
.ok()
|
|
.and_then(|ca| match ca.get(0) {
|
|
Ok(AnyValue::Float64(v)) => Some(v),
|
|
_ => None,
|
|
})
|
|
});
|
|
|
|
let mean = match col.mean_as_series().get(0) {
|
|
Ok(AnyValue::Float64(v)) => Some(v),
|
|
_ => None,
|
|
};
|
|
|
|
let median = match col.median_as_series() {
|
|
Ok(v) => match v.get(0) {
|
|
Ok(AnyValue::Float64(v)) => Some(v),
|
|
_ => None,
|
|
},
|
|
_ => None,
|
|
};
|
|
|
|
let std = match col.std_as_series(0) {
|
|
Ok(v) => match v.get(0) {
|
|
Ok(AnyValue::Float64(v)) => Some(v),
|
|
_ => None,
|
|
},
|
|
_ => None,
|
|
};
|
|
|
|
let min = col.min_as_series().ok().and_then(|series| {
|
|
series
|
|
.cast(&DataType::Float64)
|
|
.ok()
|
|
.and_then(|ca| match ca.get(0) {
|
|
Ok(AnyValue::Float64(v)) => Some(v),
|
|
_ => None,
|
|
})
|
|
});
|
|
|
|
let mut quantiles = quantiles
|
|
.clone()
|
|
.into_iter()
|
|
.map(|q| {
|
|
col.quantile_as_series(q, QuantileInterpolOptions::default())
|
|
.ok()
|
|
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
|
.and_then(|ca| match ca.get(0) {
|
|
Ok(AnyValue::Float64(v)) => Some(v),
|
|
_ => None,
|
|
})
|
|
})
|
|
.collect::<Vec<Option<f64>>>();
|
|
|
|
let max = col.max_as_series().ok().and_then(|series| {
|
|
series
|
|
.cast(&DataType::Float64)
|
|
.ok()
|
|
.and_then(|ca| match ca.get(0) {
|
|
Ok(AnyValue::Float64(v)) => Some(v),
|
|
_ => None,
|
|
})
|
|
});
|
|
|
|
let mut descriptors = vec![Some(count), sum, mean, median, std, min];
|
|
descriptors.append(&mut quantiles);
|
|
descriptors.push(max);
|
|
|
|
let name = format!("{} ({})", col.name(), col.dtype());
|
|
ChunkedArray::<Float64Type>::from_slice_options(&name, &descriptors).into_series()
|
|
});
|
|
|
|
let res = head.chain(tail).collect::<Vec<Series>>();
|
|
|
|
DataFrame::new(res)
|
|
.map_err(|e| ShellError::GenericError {
|
|
error: "Dataframe Error".into(),
|
|
msg: e.to_string(),
|
|
span: Some(call.head),
|
|
help: None,
|
|
inner: vec![],
|
|
})
|
|
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::super::super::test_dataframe::test_dataframe;
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_examples() {
|
|
test_dataframe(vec![Box::new(Summary {})])
|
|
}
|
|
}
|