forked from extern/nushell
describe command (#3907)
This commit is contained in:
parent
b6728efcd4
commit
38848082ae
232
crates/nu-command/src/commands/dataframe/describe.rs
Normal file
232
crates/nu-command/src/commands/dataframe/describe.rs
Normal file
@ -0,0 +1,232 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::{
|
||||
chunked_array::ChunkedArray,
|
||||
prelude::{
|
||||
AnyValue, DataFrame as PolarsDF, DataType, Float64Type, IntoSeries, NewChunkedArray,
|
||||
Series, Utf8Type,
|
||||
},
|
||||
};
|
||||
|
||||
use super::utils::parse_polars_error;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe describe"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"[DataFrame] Describes dataframes numeric columns"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe describe")
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Describes dataframe",
|
||||
example: "[[a b]; [1 1] [1 1]] | dataframe to-df | dataframe describe",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"descriptor".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("count").into(),
|
||||
UntaggedValue::string("sum").into(),
|
||||
UntaggedValue::string("mean").into(),
|
||||
UntaggedValue::string("median").into(),
|
||||
UntaggedValue::string("std").into(),
|
||||
UntaggedValue::string("min").into(),
|
||||
UntaggedValue::string("25%").into(),
|
||||
UntaggedValue::string("50%").into(),
|
||||
UntaggedValue::string("75%").into(),
|
||||
UntaggedValue::string("max").into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"a (i64)".to_string(),
|
||||
vec![
|
||||
UntaggedValue::decimal_from_float(2.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(2.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(0.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b (i64)".to_string(),
|
||||
vec![
|
||||
UntaggedValue::decimal_from_float(2.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(2.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(0.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
UntaggedValue::decimal_from_float(1.0, Span::default()).into(),
|
||||
],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let names = ChunkedArray::<Utf8Type>::new_from_opt_slice(
|
||||
"descriptor",
|
||||
&[
|
||||
Some("count"),
|
||||
Some("sum"),
|
||||
Some("mean"),
|
||||
Some("median"),
|
||||
Some("std"),
|
||||
Some("min"),
|
||||
Some("25%"),
|
||||
Some("50%"),
|
||||
Some("75%"),
|
||||
Some("max"),
|
||||
],
|
||||
)
|
||||
.into_series();
|
||||
|
||||
let head = std::iter::once(names);
|
||||
|
||||
let tail = df.as_ref().get_columns().iter().map(|col| {
|
||||
let count = col.len() as f64;
|
||||
|
||||
let sum = match col.sum_as_series().cast_with_dtype(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
let mean = match col.mean_as_series().get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let median = match col.median_as_series().get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let std = match col.std_as_series().get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let min = match col.min_as_series().cast_with_dtype(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
let q_25 = match col.quantile_as_series(0.25) {
|
||||
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
let q_50 = match col.quantile_as_series(0.50) {
|
||||
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
let q_75 = match col.quantile_as_series(0.75) {
|
||||
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
let max = match col.max_as_series().cast_with_dtype(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
let name = format!("{} ({})", col.name(), col.dtype());
|
||||
ChunkedArray::<Float64Type>::new_from_opt_slice(
|
||||
name.as_str(),
|
||||
&[
|
||||
Some(count),
|
||||
sum,
|
||||
mean,
|
||||
median,
|
||||
std,
|
||||
min,
|
||||
q_25,
|
||||
q_50,
|
||||
q_75,
|
||||
max,
|
||||
],
|
||||
)
|
||||
.into_series()
|
||||
});
|
||||
|
||||
let res = head.chain(tail).collect::<Vec<Series>>();
|
||||
let df = PolarsDF::new(res).map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
let df = NuDataFrame::dataframe_to_value(df, tag);
|
||||
Ok(OutputStream::one(df))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
@ -2,6 +2,7 @@ pub mod aggregate;
|
||||
pub mod append;
|
||||
pub mod column;
|
||||
pub mod command;
|
||||
pub mod describe;
|
||||
pub mod drop;
|
||||
pub mod drop_duplicates;
|
||||
pub mod drop_nulls;
|
||||
@ -35,6 +36,7 @@ pub use aggregate::DataFrame as DataFrameAggregate;
|
||||
pub use append::DataFrame as DataFrameAppend;
|
||||
pub use column::DataFrame as DataFrameColumn;
|
||||
pub use command::Command as DataFrame;
|
||||
pub use describe::DataFrame as DataFrameDescribe;
|
||||
pub use drop::DataFrame as DataFrameDrop;
|
||||
pub use drop_duplicates::DataFrame as DataFrameDropDuplicates;
|
||||
pub use drop_nulls::DataFrame as DataFrameDropNulls;
|
||||
|
@ -27,19 +27,19 @@ pub use core_commands::*;
|
||||
pub use dataframe::{
|
||||
DataFrame, DataFrameAggregate, DataFrameAllFalse, DataFrameAllTrue, DataFrameAppend,
|
||||
DataFrameArgMax, DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique,
|
||||
DataFrameColumn, DataFrameConcatenate, DataFrameContains, DataFrameDTypes, DataFrameDrop,
|
||||
DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameFilter, DataFrameFirst,
|
||||
DataFrameGet, DataFrameGetDay, DataFrameGetHour, DataFrameGetMinute, DataFrameGetMonth,
|
||||
DataFrameGetNanoSecond, DataFrameGetOrdinal, DataFrameGetSecond, DataFrameGetWeek,
|
||||
DataFrameGetWeekDay, DataFrameGetYear, DataFrameGroupBy, DataFrameIsDuplicated, DataFrameIsIn,
|
||||
DataFrameIsNotNull, DataFrameIsNull, DataFrameIsUnique, DataFrameJoin, DataFrameLast,
|
||||
DataFrameList, DataFrameMelt, DataFrameNNull, DataFrameNUnique, DataFrameNot, DataFrameOpen,
|
||||
DataFramePivot, DataFrameReplace, DataFrameReplaceAll, DataFrameSample, DataFrameSelect,
|
||||
DataFrameSeriesRename, DataFrameSet, DataFrameSetWithIdx, DataFrameShape, DataFrameShift,
|
||||
DataFrameShow, DataFrameSlice, DataFrameSort, DataFrameStrFTime, DataFrameStringLengths,
|
||||
DataFrameStringSlice, DataFrameTake, DataFrameToCsv, DataFrameToDF, DataFrameToLowercase,
|
||||
DataFrameToParquet, DataFrameToUppercase, DataFrameUnique, DataFrameValueCounts,
|
||||
DataFrameWhere, DataFrameWithColumn,
|
||||
DataFrameColumn, DataFrameConcatenate, DataFrameContains, DataFrameDTypes, DataFrameDescribe,
|
||||
DataFrameDrop, DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameFilter,
|
||||
DataFrameFirst, DataFrameGet, DataFrameGetDay, DataFrameGetHour, DataFrameGetMinute,
|
||||
DataFrameGetMonth, DataFrameGetNanoSecond, DataFrameGetOrdinal, DataFrameGetSecond,
|
||||
DataFrameGetWeek, DataFrameGetWeekDay, DataFrameGetYear, DataFrameGroupBy,
|
||||
DataFrameIsDuplicated, DataFrameIsIn, DataFrameIsNotNull, DataFrameIsNull, DataFrameIsUnique,
|
||||
DataFrameJoin, DataFrameLast, DataFrameList, DataFrameMelt, DataFrameNNull, DataFrameNUnique,
|
||||
DataFrameNot, DataFrameOpen, DataFramePivot, DataFrameReplace, DataFrameReplaceAll,
|
||||
DataFrameSample, DataFrameSelect, DataFrameSeriesRename, DataFrameSet, DataFrameSetWithIdx,
|
||||
DataFrameShape, DataFrameShift, DataFrameShow, DataFrameSlice, DataFrameSort,
|
||||
DataFrameStrFTime, DataFrameStringLengths, DataFrameStringSlice, DataFrameTake, DataFrameToCsv,
|
||||
DataFrameToDF, DataFrameToLowercase, DataFrameToParquet, DataFrameToUppercase, DataFrameUnique,
|
||||
DataFrameValueCounts, DataFrameWhere, DataFrameWithColumn,
|
||||
};
|
||||
pub use env::*;
|
||||
pub use filesystem::*;
|
||||
|
@ -340,6 +340,7 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
|
||||
whole_stream_command(DataFrameGetOrdinal),
|
||||
whole_stream_command(DataFrameGetNanoSecond),
|
||||
whole_stream_command(DataFrameStrFTime),
|
||||
whole_stream_command(DataFrameDescribe),
|
||||
]);
|
||||
|
||||
#[cfg(feature = "clipboard-cli")]
|
||||
|
Loading…
Reference in New Issue
Block a user