From c0cc9ce7cd205fc86d0604d5658c8da247a064af Mon Sep 17 00:00:00 2001 From: Fernando Herrera Date: Sat, 15 May 2021 08:24:11 +0100 Subject: [PATCH] Dataframe new commands (#3425) * Folder for dataframe commands * New commands for dataframe --- crates/nu-command/src/commands.rs | 2 +- .../src/commands/dataframe/command.rs | 38 +++++++++++ .../nu-command/src/commands/dataframe/list.rs | 53 +++++++++++++++ .../{dataframe.rs => dataframe/load.rs} | 68 ++++--------------- .../nu-command/src/commands/dataframe/mod.rs | 7 ++ .../src/commands/default_context.rs | 4 ++ 6 files changed, 115 insertions(+), 57 deletions(-) create mode 100644 crates/nu-command/src/commands/dataframe/command.rs create mode 100644 crates/nu-command/src/commands/dataframe/list.rs rename crates/nu-command/src/commands/{dataframe.rs => dataframe/load.rs} (53%) create mode 100644 crates/nu-command/src/commands/dataframe/mod.rs diff --git a/crates/nu-command/src/commands.rs b/crates/nu-command/src/commands.rs index 4654a1f01..b6f821f42 100644 --- a/crates/nu-command/src/commands.rs +++ b/crates/nu-command/src/commands.rs @@ -187,7 +187,7 @@ pub(crate) mod touch; pub(crate) use all::Command as All; pub(crate) use any::Command as Any; #[cfg(feature = "dataframe")] -pub(crate) use dataframe::Dataframe; +pub(crate) use dataframe::{Dataframe, DataframeList, DataframeLoad}; pub(crate) use enter::Enter; pub(crate) use every::Every; pub(crate) use exec::Exec; diff --git a/crates/nu-command/src/commands/dataframe/command.rs b/crates/nu-command/src/commands/dataframe/command.rs new file mode 100644 index 000000000..0ed38638e --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/command.rs @@ -0,0 +1,38 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuDataFrame, Signature, UntaggedValue}; + +pub struct Command; + +impl WholeStreamCommand for Command { + fn name(&self) -> &str { + "dataframe" + } + + fn usage(&self) -> &str { + "Creates a dataframe from pipelined Table or List " + } + + fn signature(&self) -> Signature { + Signature::build("dataframe") + } + + fn run(&self, args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let args = args.evaluate_once()?; + + let df = NuDataFrame::try_from_iter(args.input, &tag)?; + let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag)); + + Ok(init.to_output_stream()) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Takes an input stream and converts it to a dataframe", + example: "echo [[a b];[1 2] [3 4]] | dataframe", + result: None, + }] + } +} diff --git a/crates/nu-command/src/commands/dataframe/list.rs b/crates/nu-command/src/commands/dataframe/list.rs new file mode 100644 index 000000000..e1b4c5628 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/list.rs @@ -0,0 +1,53 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{Signature, TaggedDictBuilder, UntaggedValue, Value}; + +pub struct Dataframe; + +impl WholeStreamCommand for Dataframe { + fn name(&self) -> &str { + "dataframe list" + } + + fn usage(&self) -> &str { + "Lists stored dataframes" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe list") + } + + fn run(&self, args: CommandArgs) -> Result { + let args = args.evaluate_once()?; + + let mut dataframes: Vec = Vec::new(); + for (name, value) in args.context.scope.get_vars() { + if let UntaggedValue::Dataframe(df) = value.value { + let mut data = TaggedDictBuilder::new(value.tag); + + let polars_df = df.dataframe.unwrap(); + + let rows = polars_df.height(); + let cols = polars_df.width(); + + data.insert_value("name", name); + data.insert_value("file", df.name); + data.insert_value("rows", format!("{}", rows)); + data.insert_value("columns", format!("{}", cols)); + + dataframes.push(data.into_value()); + } + } + + Ok(OutputStream::from_stream(dataframes.into_iter())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Lists loaded dataframes in current scope", + example: "dataframe list", + result: None, + }] + } +} diff --git a/crates/nu-command/src/commands/dataframe.rs b/crates/nu-command/src/commands/dataframe/load.rs similarity index 53% rename from crates/nu-command/src/commands/dataframe.rs rename to crates/nu-command/src/commands/dataframe/load.rs index e2769a51a..44c10b0b4 100644 --- a/crates/nu-command/src/commands/dataframe.rs +++ b/crates/nu-command/src/commands/dataframe/load.rs @@ -3,82 +3,47 @@ use std::path::PathBuf; use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, hir::NamedValue, Signature, SyntaxShape, UntaggedValue}; +use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, UntaggedValue}; use nu_source::Tagged; use polars::prelude::{CsvReader, SerReader}; pub struct Dataframe; -#[derive(Deserialize)] -pub struct OpenArgs { - file: Tagged, -} - impl WholeStreamCommand for Dataframe { fn name(&self) -> &str { - "dataframe" + "dataframe load" } fn usage(&self) -> &str { - "Creates a dataframe from a csv file" + "Loads dataframe form csv or parquet file" } fn signature(&self) -> Signature { - Signature::build("dataframe").named( + Signature::build("dataframe load").required( "file", SyntaxShape::FilePath, "the file path to load values from", - Some('f'), ) } fn run(&self, args: CommandArgs) -> Result { - load_dataframe(args) + create_from_file(args) } fn examples(&self) -> Vec { - vec![ - Example { - description: "Takes a file name and creates a dataframe", - example: "dataframe -f test.csv", - result: None, - }, - Example { - description: "Takes an input stream and converts it to a dataframe", - example: "echo [[a b];[1 2] [3 4]] | dataframe", - result: None, - }, - ] + vec![Example { + description: "Takes a file name and creates a dataframe", + example: "dataframe load test.csv", + result: None, + }] } } -// Creates a dataframe from either a file or a table. -// If both options are found, then an error is returned to the user. -// The InputStream can have a table and a dictionary as input variable. -fn load_dataframe(args: CommandArgs) -> Result { - // The file has priority over stream input - if let Some(NamedValue::Value(_, _)) = args - .call_info() - .args - .named - .as_ref() - .map(|named| named.named.get("file")) - .flatten() - { - return create_from_file(args); - } - - create_from_input(args) -} - fn create_from_file(args: CommandArgs) -> Result { - // Command Tag. This marks where the command is located and the name - // of the command used let tag = args.call_info.name_tag.clone(); - - // Parsing the arguments that the function uses - let (OpenArgs { file }, _) = args.process()?; + let args = args.evaluate_once()?; + let file: Tagged = args.req(0)?; // Needs more detail and arguments while loading the dataframe // Options: @@ -128,12 +93,3 @@ fn create_from_file(args: CommandArgs) -> Result { Ok(init.to_output_stream()) } - -fn create_from_input(args: CommandArgs) -> Result { - let tag = args.call_info.name_tag.clone(); - let args = args.evaluate_once()?; - let df = NuDataFrame::try_from_iter(args.input, &tag)?; - let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag)); - - Ok(init.to_output_stream()) -} diff --git a/crates/nu-command/src/commands/dataframe/mod.rs b/crates/nu-command/src/commands/dataframe/mod.rs new file mode 100644 index 000000000..6e1ce3d0c --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/mod.rs @@ -0,0 +1,7 @@ +pub mod command; +pub mod list; +pub mod load; + +pub use command::Command as Dataframe; +pub use list::Dataframe as DataframeList; +pub use load::Dataframe as DataframeLoad; diff --git a/crates/nu-command/src/commands/default_context.rs b/crates/nu-command/src/commands/default_context.rs index b17f1bb33..9ba9d98d3 100644 --- a/crates/nu-command/src/commands/default_context.rs +++ b/crates/nu-command/src/commands/default_context.rs @@ -253,6 +253,10 @@ pub fn create_default_context(interactive: bool) -> Result