Dataframe feature (#361)

* custom value trait

* functions for custom value trait

* custom trait behind flag

* open dataframe command

* command to-df for basic types

* follow path for dataframe

* dataframe operations

* dataframe not default feature

* custom as default feature

* corrected examples in command
This commit is contained in:
Fernando Herrera
2021-11-23 08:14:40 +00:00
committed by GitHub
parent a2aaeb38ed
commit 76019f434e
27 changed files with 3375 additions and 53 deletions

View File

@ -0,0 +1,5 @@
mod open;
mod to_df;
pub use open::OpenDataFrame;
pub use to_df::ToDataFrame;

View File

@ -0,0 +1,195 @@
use std::{fs::File, path::PathBuf};
use nu_dataframe::NuDataFrame;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
};
use polars::prelude::{CsvEncoding, CsvReader, JsonReader, ParquetReader, SerReader};
#[derive(Clone)]
pub struct OpenDataFrame;
impl Command for OpenDataFrame {
fn name(&self) -> &str {
"open-df"
}
fn usage(&self) -> &str {
"Opens csv, json or parquet file to create dataframe"
}
fn signature(&self) -> Signature {
Signature::build("open-df")
.required(
"file",
SyntaxShape::Filepath,
"file path to load values from",
)
.named(
"delimiter",
SyntaxShape::String,
"file delimiter character. CSV file",
Some('d'),
)
.switch(
"no-header",
"Indicates if file doesn't have header. CSV file",
None,
)
.named(
"infer-schema",
SyntaxShape::Number,
"Number of rows to infer the schema of the file. CSV file",
None,
)
.named(
"skip-rows",
SyntaxShape::Number,
"Number of rows to skip from file. CSV file",
None,
)
.named(
"columns",
SyntaxShape::List(Box::new(SyntaxShape::String)),
"Columns to be selected from csv file. CSV file",
None,
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes a file name and creates a dataframe",
example: "dataframe open test.csv",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
) -> Result<PipelineData, ShellError> {
let span = call.head;
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let df = match file.item.extension() {
Some(e) => match e.to_str() {
Some("csv") => from_csv(engine_state, stack, call),
Some("parquet") => from_parquet(engine_state, stack, call),
Some("json") => from_json(engine_state, stack, call),
_ => Err(ShellError::FileNotFoundCustom(
"Not a csv, parquet or json file".into(),
file.span,
)),
},
None => Err(ShellError::FileNotFoundCustom(
"File without extension".into(),
file.span,
)),
}?;
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value(
df, span,
)))
}
fn from_parquet(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
) -> Result<polars::prelude::DataFrame, ShellError> {
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?;
let reader = ParquetReader::new(r);
reader
.finish()
.map_err(|e| ShellError::InternalError(format!("{:?}", e)))
}
fn from_json(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
) -> Result<polars::prelude::DataFrame, ShellError> {
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?;
let reader = JsonReader::new(r);
reader
.finish()
.map_err(|e| ShellError::InternalError(e.to_string()))
}
fn from_csv(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
) -> Result<polars::prelude::DataFrame, ShellError> {
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let delimiter: Option<Spanned<String>> = call.get_flag(engine_state, stack, "delimiter")?;
let no_header: bool = call.has_flag("no_header");
let infer_schema: Option<usize> = call.get_flag(engine_state, stack, "infer_schema")?;
let skip_rows: Option<usize> = call.get_flag(engine_state, stack, "skip_rows")?;
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
let csv_reader = CsvReader::from_path(&file.item)
.map_err(|e| ShellError::InternalError(e.to_string()))?
.with_encoding(CsvEncoding::LossyUtf8);
let csv_reader = match delimiter {
None => csv_reader,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::InternalError(
"Delimiter has to be one char".into(),
));
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
csv_reader.with_delimiter(delimiter)
}
}
};
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match infer_schema {
None => csv_reader,
Some(r) => csv_reader.infer_schema(Some(r)),
};
let csv_reader = match skip_rows {
None => csv_reader,
Some(r) => csv_reader.with_skip_rows(r),
};
let csv_reader = match columns {
None => csv_reader,
Some(columns) => csv_reader.with_columns(Some(columns)),
};
csv_reader
.finish()
.map_err(|e| ShellError::InternalError(e.to_string()))
}

View File

@ -0,0 +1,59 @@
use nu_dataframe::NuDataFrame;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature,
};
#[derive(Clone)]
pub struct ToDataFrame;
impl Command for ToDataFrame {
fn name(&self) -> &str {
"to-df"
}
fn usage(&self) -> &str {
"Converts a List, Table or Dictionary into a dataframe"
}
fn signature(&self) -> Signature {
Signature::build("to-df").category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes a dictionary and creates a dataframe",
example: "[[a b];[1 2] [3 4]] | to-df",
result: None,
},
Example {
description: "Takes a list of tables and creates a dataframe",
example: "[[1 2 a] [3 4 b] [5 6 c]] | to-df",
result: None,
},
Example {
description: "Takes a list and creates a dataframe",
example: "[a b c] | to-df",
result: None,
},
Example {
description: "Takes a list of booleans and creates a dataframe",
example: "[$true $true $false] | to-df",
result: None,
},
]
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_iter(input.into_iter())?;
Ok(PipelineData::Value(NuDataFrame::to_value(df, call.head)))
}
}

View File

@ -143,6 +143,9 @@ pub fn create_default_context() -> EngineState {
#[cfg(feature = "plugin")]
bind_command!(Register);
#[cfg(feature = "dataframe")]
bind_command!(OpenDataFrame, ToDataFrame);
// This is a WIP proof of concept
bind_command!(ListGitBranches, Git, GitCheckout, Source);

View File

@ -70,8 +70,6 @@ pub fn value_to_json_value(v: &Value) -> Result<nu_json::Value, ShellError> {
Value::List { vals, .. } => nu_json::Value::Array(json_list(vals)?),
Value::Error { error } => return Err(error.clone()),
Value::Block { .. } | Value::Range { .. } => nu_json::Value::Null,
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => serde_json::Value::Null,
Value::Binary { val, .. } => {
nu_json::Value::Array(val.iter().map(|x| nu_json::Value::U64(*x as u64)).collect())
}
@ -82,6 +80,8 @@ pub fn value_to_json_value(v: &Value) -> Result<nu_json::Value, ShellError> {
}
nu_json::Value::Object(m)
}
#[cfg(feature = "custom")]
Value::CustomValue { val, .. } => val.to_json(),
})
}

View File

@ -13,6 +13,9 @@ mod strings;
mod system;
mod viewers;
#[cfg(feature = "dataframe")]
mod dataframe;
pub use conversions::*;
pub use core_commands::*;
pub use date::*;
@ -27,3 +30,6 @@ pub use math::*;
pub use strings::*;
pub use system::*;
pub use viewers::*;
#[cfg(feature = "dataframe")]
pub use dataframe::*;

View File

@ -107,6 +107,11 @@ impl Command for Table {
.into_pipeline_data())
}
PipelineData::Value(Value::Error { error }) => Err(error),
#[cfg(feature = "custom")]
PipelineData::Value(Value::CustomValue { val, span }) => {
let base_pipeline = val.to_base_value(span)?.into_pipeline_data();
self.run(engine_state, stack, call, base_pipeline)
}
x => Ok(x),
}
}