From 6984185e612f7fedd0d95b44a4d73b3e9e5f0826 Mon Sep 17 00:00:00 2001 From: Fernando Herrera Date: Sat, 31 Jul 2021 15:02:32 +0100 Subject: [PATCH] Better representation in nested dataframes (#3875) * better dataframe representation in nested df * Error message correction --- .../src/commands/filters/reverse.rs | 3 ++ crates/nu-data/src/base/shape.rs | 42 +++++++++++++++++-- .../nu-protocol/src/dataframe/nu_dataframe.rs | 24 ++++++++++- .../nu-protocol/src/dataframe/nu_groupby.rs | 4 ++ crates/nu-value-ext/src/lib.rs | 16 ++++--- 5 files changed, 79 insertions(+), 10 deletions(-) diff --git a/crates/nu-command/src/commands/filters/reverse.rs b/crates/nu-command/src/commands/filters/reverse.rs index ebcf15042..dbd7ee939 100644 --- a/crates/nu-command/src/commands/filters/reverse.rs +++ b/crates/nu-command/src/commands/filters/reverse.rs @@ -37,7 +37,10 @@ impl WholeStreamCommand for Reverse { } } +#[allow(clippy::needless_collect)] fn reverse(args: CommandArgs) -> Result { + // Clippy warning should be ignored + // This collect is needed to apply rev let input = args.input.collect::>(); Ok((input.into_iter().rev().map(ReturnSuccess::value)).into_action_stream()) } diff --git a/crates/nu-data/src/base/shape.rs b/crates/nu-data/src/base/shape.rs index a0e41164f..a110ba076 100644 --- a/crates/nu-data/src/base/shape.rs +++ b/crates/nu-data/src/base/shape.rs @@ -13,6 +13,9 @@ use std::hash::{Hash, Hasher}; use std::path::PathBuf; use sys_locale::get_locale; +#[cfg(feature = "dataframe")] +use nu_protocol::dataframe::{FrameStruct, NuDataFrame}; + #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Deserialize, Serialize)] pub struct InlineRange { from: (InlineShape, RangeInclusion), @@ -45,9 +48,11 @@ pub enum InlineShape { // TODO: Error type Error, - // TODO: Dataframe type #[cfg(feature = "dataframe")] - DataFrame, + DataFrame(String), + + #[cfg(feature = "dataframe")] + FrameStruct(String), // Stream markers (used as bookend markers rather than actual values) BeginningOfStream, @@ -123,6 +128,24 @@ impl InlineShape { InlineShape::Table(vec) } + #[cfg(feature = "dataframe")] + pub fn from_df(df: &NuDataFrame) -> InlineShape { + let msg = format!("{} rows {} cols", df.as_ref().height(), df.as_ref().width()); + + InlineShape::DataFrame(msg) + } + + #[cfg(feature = "dataframe")] + pub fn from_frame_struct(s: &FrameStruct) -> InlineShape { + match s { + FrameStruct::GroupBy(groupby) => { + let msg = groupby.by().join(","); + let msg = format!("groupby {}", msg); + InlineShape::DataFrame(msg) + } + } + } + pub fn from_value<'a>(value: impl Into<&'a UntaggedValue>) -> InlineShape { match value.into() { UntaggedValue::Primitive(p) => InlineShape::from_primitive(p), @@ -131,7 +154,9 @@ impl InlineShape { UntaggedValue::Error(_) => InlineShape::Error, UntaggedValue::Block(_) => InlineShape::Block, #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => InlineShape::DataFrame, + UntaggedValue::DataFrame(df) => InlineShape::from_df(df), + #[cfg(feature = "dataframe")] + UntaggedValue::FrameStruct(s) => InlineShape::from_frame_struct(s), } } @@ -344,7 +369,16 @@ impl PrettyDebug for FormatInlineShape { InlineShape::Block => DbgDocBldr::opaque("block"), InlineShape::Error => DbgDocBldr::error("error"), #[cfg(feature = "dataframe")] - InlineShape::DataFrame => DbgDocBldr::error("dataframe_pretty_formatter"), + InlineShape::DataFrame(msg) => DbgDocBldr::delimit( + "[", + DbgDocBldr::kind("dataframe") + DbgDocBldr::space() + DbgDocBldr::primitive(msg), + "]", + ) + .group(), + #[cfg(feature = "dataframe")] + InlineShape::FrameStruct(msg) => { + DbgDocBldr::delimit("[", DbgDocBldr::primitive(msg), "]").group() + } InlineShape::BeginningOfStream => DbgDocBldr::blank(), InlineShape::EndOfStream => DbgDocBldr::blank(), } diff --git a/crates/nu-protocol/src/dataframe/nu_dataframe.rs b/crates/nu-protocol/src/dataframe/nu_dataframe.rs index f6150b95b..fe2022f06 100644 --- a/crates/nu-protocol/src/dataframe/nu_dataframe.rs +++ b/crates/nu-protocol/src/dataframe/nu_dataframe.rs @@ -173,7 +173,8 @@ impl NuDataFrame { UntaggedValue::Primitive(Primitive::Int(_)) | UntaggedValue::Primitive(Primitive::Decimal(_)) | UntaggedValue::Primitive(Primitive::String(_)) - | UntaggedValue::Primitive(Primitive::Boolean(_)) => { + | UntaggedValue::Primitive(Primitive::Boolean(_)) + | UntaggedValue::DataFrame(_) => { let key = format!("{}", 0); insert_value(value, key, &mut column_values)? } @@ -286,6 +287,27 @@ impl NuDataFrame { Ok(series.clone()) } + pub fn get_value(&self, row: usize, span: Span) -> Result { + let series = self.as_series(&Span::default())?; + let column = create_column(&series, row, row + 1)?; + + if column.len() == 0 { + Err(ShellError::labeled_error_with_secondary( + "Not a valid row", + format!("No value found for index {}", row), + span, + format!("Note that the column size is {}", series.len()), + span, + )) + } else { + let value = column + .into_iter() + .next() + .expect("already checked there is a value"); + Ok(value) + } + } + // Print is made out a head and if the dataframe is too large, then a tail pub fn print(&self) -> Result, ShellError> { let df = &self.as_ref(); diff --git a/crates/nu-protocol/src/dataframe/nu_groupby.rs b/crates/nu-protocol/src/dataframe/nu_groupby.rs index 33aa6022e..c5439b88b 100644 --- a/crates/nu-protocol/src/dataframe/nu_groupby.rs +++ b/crates/nu-protocol/src/dataframe/nu_groupby.rs @@ -23,6 +23,10 @@ impl NuGroupBy { } } + pub fn by(&self) -> &[String] { + &self.by + } + pub fn try_from_stream(input: &mut T, span: &Span) -> Result where T: Iterator, diff --git a/crates/nu-value-ext/src/lib.rs b/crates/nu-value-ext/src/lib.rs index a48f37f85..331ddf5a7 100644 --- a/crates/nu-value-ext/src/lib.rs +++ b/crates/nu-value-ext/src/lib.rs @@ -214,11 +214,17 @@ pub fn get_data_by_member(value: &Value, name: &PathMember) -> Result Err(ShellError::labeled_error( - "Integer as column", - "Only string as column name", - &name.span, - )), + UnspannedPathMember::Int(int) => { + if df.is_series() { + df.get_value(*int as usize, name.span) + } else { + Err(ShellError::labeled_error( + "Column not found", + "Column name not found in the dataframe", + name.span, + )) + } + } }, other => Err(ShellError::type_error( "row or table",