Complete Dataframe MVP (#3373)

* Dataframe MVP

* Removed test csv file

* Dataframe MVP

* Removed test csv file

* New revision polars

* New revision polars

* csv file reader

* argument parser for file reader

* Parser from Row primitive

* Column conversion

* Added as f32 and f64

* Parsing row to dataframe

* Removed repeated push to vector

* Accept table values to create dataframe

* Removed default serde

* Dataframe to rows to show data

* Save name of file with dataframe

* Usage example

* Upgrade polars version

* Clippy changes

* Added print function with head and tail

* Move dataframe struct to folder

* Lock file after running tests and merge

* Optional feature for dataframe

* Removed dataframe from plugins

* Update primitive.rs

Co-authored-by: JT <jonathandturner@users.noreply.github.com>
This commit is contained in:
Fernando Herrera
2021-05-12 02:01:31 +01:00
committed by GitHub
parent e73491441a
commit c80a9585b0
25 changed files with 1474 additions and 448 deletions

View File

@ -26,6 +26,8 @@ pub(crate) mod compact;
pub(crate) mod config;
pub(crate) mod constants;
pub(crate) mod cp;
#[cfg(feature = "dataframe")]
pub(crate) mod dataframe;
pub(crate) mod date;
pub(crate) mod debug;
pub(crate) mod def;
@ -184,6 +186,8 @@ pub(crate) use clear::Clear;
pub(crate) mod touch;
pub(crate) use all::Command as All;
pub(crate) use any::Command as Any;
#[cfg(feature = "dataframe")]
pub(crate) use dataframe::Dataframe;
pub(crate) use enter::Enter;
pub(crate) use every::Every;
pub(crate) use exec::Exec;

View File

@ -89,7 +89,7 @@ fn all(args: CommandArgs) -> Result<OutputStream, ShellError> {
UntaggedValue::boolean(true).into_value(&tag),
));
// Variables in nu are immutable. Having the same variable accross invocations
// Variables in nu are immutable. Having the same variable across invocations
// of evaluate_baseline_expr does not mutate the variables and those each
// invocations are independent of each other!
scope.enter_scope();

View File

@ -228,6 +228,20 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
out!("{:?}", row);
}
}
#[cfg(feature = "dataframe")]
Value {
value: UntaggedValue::Dataframe(df),
..
} => {
if let Some(table) = table {
// TODO. Configure the parameter rows from file. It can be
// adjusted to see a certain amount of values in the head
let command_args =
create_default_command_args(&context, df.print()?.into(), tag);
let result = table.run(command_args)?;
let _ = result.collect::<Vec<_>>();
}
}
Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
..

View File

@ -0,0 +1,139 @@
use std::path::PathBuf;
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, hir::NamedValue, Signature, SyntaxShape, UntaggedValue};
use nu_source::Tagged;
use polars::prelude::{CsvReader, SerReader};
pub struct Dataframe;
#[derive(Deserialize)]
pub struct OpenArgs {
file: Tagged<PathBuf>,
}
impl WholeStreamCommand for Dataframe {
fn name(&self) -> &str {
"dataframe"
}
fn usage(&self) -> &str {
"Creates a dataframe from a csv file"
}
fn signature(&self) -> Signature {
Signature::build("dataframe").named(
"file",
SyntaxShape::FilePath,
"the file path to load values from",
Some('f'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
load_dataframe(args)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes a file name and creates a dataframe",
example: "dataframe -f test.csv",
result: None,
},
Example {
description: "Takes an input stream and converts it to a dataframe",
example: "echo [[a b];[1 2] [3 4]] | dataframe",
result: None,
},
]
}
}
// Creates a dataframe from either a file or a table.
// If both options are found, then an error is returned to the user.
// The InputStream can have a table and a dictionary as input variable.
fn load_dataframe(args: CommandArgs) -> Result<OutputStream, ShellError> {
// The file has priority over stream input
if let Some(NamedValue::Value(_, _)) = args
.call_info()
.args
.named
.as_ref()
.map(|named| named.named.get("file"))
.flatten()
{
return create_from_file(args);
}
create_from_input(args)
}
fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
// Command Tag. This marks where the command is located and the name
// of the command used
let tag = args.call_info.name_tag.clone();
// Parsing the arguments that the function uses
let (OpenArgs { file }, _) = args.process()?;
// Needs more detail and arguments while loading the dataframe
// Options:
// - has header
// - infer schema
// - delimiter
// - csv or parquet <- extracted from extension
let csv_reader = match CsvReader::from_path(&file.item) {
Ok(csv_reader) => csv_reader,
Err(e) => {
return Err(ShellError::labeled_error(
"Unable to parse file",
format!("{}", e),
&file.tag,
))
}
};
let df = match csv_reader.infer_schema(None).has_header(true).finish() {
Ok(csv_reader) => csv_reader,
Err(e) => {
return Err(ShellError::labeled_error(
"Error while parsing dataframe",
format!("{}", e),
&file.tag,
))
}
};
let file_name = match file.item.into_os_string().into_string() {
Ok(name) => name,
Err(e) => {
return Err(ShellError::labeled_error(
"Error with file name",
format!("{:?}", e),
&file.tag,
))
}
};
let nu_dataframe = NuDataFrame {
dataframe: Some(df),
name: file_name,
};
let init = InputStream::one(UntaggedValue::Dataframe(nu_dataframe).into_value(&tag));
Ok(init.to_output_stream())
}
fn create_from_input(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let args = args.evaluate_once()?;
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag));
Ok(init.to_output_stream())
}

View File

@ -251,6 +251,8 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(Seq),
whole_stream_command(SeqDates),
whole_stream_command(TermSize),
#[cfg(feature = "dataframe")]
whole_stream_command(Dataframe),
]);
#[cfg(feature = "clipboard-cli")]

View File

@ -114,6 +114,8 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => {
serde_json::Value::Null
}
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => serde_json::Value::Null,
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array(
b.iter()
.map(|x| {

View File

@ -75,6 +75,8 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?),
UntaggedValue::Error(e) => return Err(e.clone()),
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => toml::Value::String("<Dataframe>".to_string()),
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
UntaggedValue::Primitive(Primitive::Binary(b)) => {
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
@ -218,7 +220,7 @@ mod tests {
[owner]
name = "Tom Preston-Werner"
dob = 1979-05-27T07:32:00-08:00 # First class dates
[dependencies]
rustyline = "4.1.0"
sysinfo = "0.8.4"

View File

@ -95,6 +95,8 @@ pub fn value_to_yaml_value(v: &Value) -> Result<serde_yaml::Value, ShellError> {
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => {
serde_yaml::Value::Null
}
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => serde_yaml::Value::Null,
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence(
b.iter()
.map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x)))

View File

@ -155,6 +155,14 @@ fn uniq(args: CommandArgs) -> Result<ActionStream, ShellError> {
item.0.tag.span,
))
}
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => {
return Err(ShellError::labeled_error(
"uniq -c cannot operate on dataframes.",
"source",
item.0.tag.span,
))
}
UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0,
}
};