mirror of
https://github.com/nushell/nushell.git
synced 2025-06-30 14:40:06 +02:00
Complete Dataframe MVP (#3373)
* Dataframe MVP * Removed test csv file * Dataframe MVP * Removed test csv file * New revision polars * New revision polars * csv file reader * argument parser for file reader * Parser from Row primitive * Column conversion * Added as f32 and f64 * Parsing row to dataframe * Removed repeated push to vector * Accept table values to create dataframe * Removed default serde * Dataframe to rows to show data * Save name of file with dataframe * Usage example * Upgrade polars version * Clippy changes * Added print function with head and tail * Move dataframe struct to folder * Lock file after running tests and merge * Optional feature for dataframe * Removed dataframe from plugins * Update primitive.rs Co-authored-by: JT <jonathandturner@users.noreply.github.com>
This commit is contained in:
@ -26,6 +26,8 @@ pub(crate) mod compact;
|
||||
pub(crate) mod config;
|
||||
pub(crate) mod constants;
|
||||
pub(crate) mod cp;
|
||||
#[cfg(feature = "dataframe")]
|
||||
pub(crate) mod dataframe;
|
||||
pub(crate) mod date;
|
||||
pub(crate) mod debug;
|
||||
pub(crate) mod def;
|
||||
@ -184,6 +186,8 @@ pub(crate) use clear::Clear;
|
||||
pub(crate) mod touch;
|
||||
pub(crate) use all::Command as All;
|
||||
pub(crate) use any::Command as Any;
|
||||
#[cfg(feature = "dataframe")]
|
||||
pub(crate) use dataframe::Dataframe;
|
||||
pub(crate) use enter::Enter;
|
||||
pub(crate) use every::Every;
|
||||
pub(crate) use exec::Exec;
|
||||
|
@ -89,7 +89,7 @@ fn all(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
UntaggedValue::boolean(true).into_value(&tag),
|
||||
));
|
||||
|
||||
// Variables in nu are immutable. Having the same variable accross invocations
|
||||
// Variables in nu are immutable. Having the same variable across invocations
|
||||
// of evaluate_baseline_expr does not mutate the variables and those each
|
||||
// invocations are independent of each other!
|
||||
scope.enter_scope();
|
||||
|
@ -228,6 +228,20 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
out!("{:?}", row);
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
Value {
|
||||
value: UntaggedValue::Dataframe(df),
|
||||
..
|
||||
} => {
|
||||
if let Some(table) = table {
|
||||
// TODO. Configure the parameter rows from file. It can be
|
||||
// adjusted to see a certain amount of values in the head
|
||||
let command_args =
|
||||
create_default_command_args(&context, df.print()?.into(), tag);
|
||||
let result = table.run(command_args)?;
|
||||
let _ = result.collect::<Vec<_>>();
|
||||
}
|
||||
}
|
||||
Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||
..
|
||||
|
139
crates/nu-command/src/commands/dataframe.rs
Normal file
139
crates/nu-command/src/commands/dataframe.rs
Normal file
@ -0,0 +1,139 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, hir::NamedValue, Signature, SyntaxShape, UntaggedValue};
|
||||
|
||||
use nu_source::Tagged;
|
||||
use polars::prelude::{CsvReader, SerReader};
|
||||
|
||||
pub struct Dataframe;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct OpenArgs {
|
||||
file: Tagged<PathBuf>,
|
||||
}
|
||||
|
||||
impl WholeStreamCommand for Dataframe {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a dataframe from a csv file"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe").named(
|
||||
"file",
|
||||
SyntaxShape::FilePath,
|
||||
"the file path to load values from",
|
||||
Some('f'),
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
load_dataframe(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Takes a file name and creates a dataframe",
|
||||
example: "dataframe -f test.csv",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Takes an input stream and converts it to a dataframe",
|
||||
example: "echo [[a b];[1 2] [3 4]] | dataframe",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
// Creates a dataframe from either a file or a table.
|
||||
// If both options are found, then an error is returned to the user.
|
||||
// The InputStream can have a table and a dictionary as input variable.
|
||||
fn load_dataframe(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
// The file has priority over stream input
|
||||
if let Some(NamedValue::Value(_, _)) = args
|
||||
.call_info()
|
||||
.args
|
||||
.named
|
||||
.as_ref()
|
||||
.map(|named| named.named.get("file"))
|
||||
.flatten()
|
||||
{
|
||||
return create_from_file(args);
|
||||
}
|
||||
|
||||
create_from_input(args)
|
||||
}
|
||||
|
||||
fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
// Command Tag. This marks where the command is located and the name
|
||||
// of the command used
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
// Parsing the arguments that the function uses
|
||||
let (OpenArgs { file }, _) = args.process()?;
|
||||
|
||||
// Needs more detail and arguments while loading the dataframe
|
||||
// Options:
|
||||
// - has header
|
||||
// - infer schema
|
||||
// - delimiter
|
||||
// - csv or parquet <- extracted from extension
|
||||
let csv_reader = match CsvReader::from_path(&file.item) {
|
||||
Ok(csv_reader) => csv_reader,
|
||||
Err(e) => {
|
||||
return Err(ShellError::labeled_error(
|
||||
"Unable to parse file",
|
||||
format!("{}", e),
|
||||
&file.tag,
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let df = match csv_reader.infer_schema(None).has_header(true).finish() {
|
||||
Ok(csv_reader) => csv_reader,
|
||||
Err(e) => {
|
||||
return Err(ShellError::labeled_error(
|
||||
"Error while parsing dataframe",
|
||||
format!("{}", e),
|
||||
&file.tag,
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let file_name = match file.item.into_os_string().into_string() {
|
||||
Ok(name) => name,
|
||||
Err(e) => {
|
||||
return Err(ShellError::labeled_error(
|
||||
"Error with file name",
|
||||
format!("{:?}", e),
|
||||
&file.tag,
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let nu_dataframe = NuDataFrame {
|
||||
dataframe: Some(df),
|
||||
name: file_name,
|
||||
};
|
||||
|
||||
let init = InputStream::one(UntaggedValue::Dataframe(nu_dataframe).into_value(&tag));
|
||||
|
||||
Ok(init.to_output_stream())
|
||||
}
|
||||
|
||||
fn create_from_input(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let args = args.evaluate_once()?;
|
||||
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
|
||||
let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag));
|
||||
|
||||
Ok(init.to_output_stream())
|
||||
}
|
@ -251,6 +251,8 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
|
||||
whole_stream_command(Seq),
|
||||
whole_stream_command(SeqDates),
|
||||
whole_stream_command(TermSize),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(Dataframe),
|
||||
]);
|
||||
|
||||
#[cfg(feature = "clipboard-cli")]
|
||||
|
@ -114,6 +114,8 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
|
||||
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => {
|
||||
serde_json::Value::Null
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::Dataframe(_) => serde_json::Value::Null,
|
||||
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array(
|
||||
b.iter()
|
||||
.map(|x| {
|
||||
|
@ -75,6 +75,8 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
|
||||
UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?),
|
||||
UntaggedValue::Error(e) => return Err(e.clone()),
|
||||
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::Dataframe(_) => toml::Value::String("<Dataframe>".to_string()),
|
||||
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
|
||||
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
||||
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
|
||||
@ -218,7 +220,7 @@ mod tests {
|
||||
[owner]
|
||||
name = "Tom Preston-Werner"
|
||||
dob = 1979-05-27T07:32:00-08:00 # First class dates
|
||||
|
||||
|
||||
[dependencies]
|
||||
rustyline = "4.1.0"
|
||||
sysinfo = "0.8.4"
|
||||
|
@ -95,6 +95,8 @@ pub fn value_to_yaml_value(v: &Value) -> Result<serde_yaml::Value, ShellError> {
|
||||
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => {
|
||||
serde_yaml::Value::Null
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::Dataframe(_) => serde_yaml::Value::Null,
|
||||
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence(
|
||||
b.iter()
|
||||
.map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x)))
|
||||
|
@ -155,6 +155,14 @@ fn uniq(args: CommandArgs) -> Result<ActionStream, ShellError> {
|
||||
item.0.tag.span,
|
||||
))
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::Dataframe(_) => {
|
||||
return Err(ShellError::labeled_error(
|
||||
"uniq -c cannot operate on dataframes.",
|
||||
"source",
|
||||
item.0.tag.span,
|
||||
))
|
||||
}
|
||||
UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0,
|
||||
}
|
||||
};
|
||||
|
Reference in New Issue
Block a user