mirror of
https://github.com/nushell/nushell.git
synced 2025-08-09 15:25:06 +02:00
Dataframe feature (#361)
* custom value trait * functions for custom value trait * custom trait behind flag * open dataframe command * command to-df for basic types * follow path for dataframe * dataframe operations * dataframe not default feature * custom as default feature * corrected examples in command
This commit is contained in:
5
crates/nu-command/src/dataframe/mod.rs
Normal file
5
crates/nu-command/src/dataframe/mod.rs
Normal file
@ -0,0 +1,5 @@
|
||||
mod open;
|
||||
mod to_df;
|
||||
|
||||
pub use open::OpenDataFrame;
|
||||
pub use to_df::ToDataFrame;
|
195
crates/nu-command/src/dataframe/open.rs
Normal file
195
crates/nu-command/src/dataframe/open.rs
Normal file
@ -0,0 +1,195 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use nu_dataframe::NuDataFrame;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
};
|
||||
|
||||
use polars::prelude::{CsvEncoding, CsvReader, JsonReader, ParquetReader, SerReader};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct OpenDataFrame;
|
||||
|
||||
impl Command for OpenDataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"open-df"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Opens csv, json or parquet file to create dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("open-df")
|
||||
.required(
|
||||
"file",
|
||||
SyntaxShape::Filepath,
|
||||
"file path to load values from",
|
||||
)
|
||||
.named(
|
||||
"delimiter",
|
||||
SyntaxShape::String,
|
||||
"file delimiter character. CSV file",
|
||||
Some('d'),
|
||||
)
|
||||
.switch(
|
||||
"no-header",
|
||||
"Indicates if file doesn't have header. CSV file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"infer-schema",
|
||||
SyntaxShape::Number,
|
||||
"Number of rows to infer the schema of the file. CSV file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"skip-rows",
|
||||
SyntaxShape::Number,
|
||||
"Number of rows to skip from file. CSV file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"columns",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::String)),
|
||||
"Columns to be selected from csv file. CSV file",
|
||||
None,
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Takes a file name and creates a dataframe",
|
||||
example: "dataframe open test.csv",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let span = call.head;
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let df = match file.item.extension() {
|
||||
Some(e) => match e.to_str() {
|
||||
Some("csv") => from_csv(engine_state, stack, call),
|
||||
Some("parquet") => from_parquet(engine_state, stack, call),
|
||||
Some("json") => from_json(engine_state, stack, call),
|
||||
_ => Err(ShellError::FileNotFoundCustom(
|
||||
"Not a csv, parquet or json file".into(),
|
||||
file.span,
|
||||
)),
|
||||
},
|
||||
None => Err(ShellError::FileNotFoundCustom(
|
||||
"File without extension".into(),
|
||||
file.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value(
|
||||
df, span,
|
||||
)))
|
||||
}
|
||||
|
||||
fn from_parquet(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let reader = ParquetReader::new(r);
|
||||
|
||||
reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::InternalError(format!("{:?}", e)))
|
||||
}
|
||||
|
||||
fn from_json(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
|
||||
let reader = JsonReader::new(r);
|
||||
|
||||
reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))
|
||||
}
|
||||
|
||||
fn from_csv(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
let delimiter: Option<Spanned<String>> = call.get_flag(engine_state, stack, "delimiter")?;
|
||||
let no_header: bool = call.has_flag("no_header");
|
||||
let infer_schema: Option<usize> = call.get_flag(engine_state, stack, "infer_schema")?;
|
||||
let skip_rows: Option<usize> = call.get_flag(engine_state, stack, "skip_rows")?;
|
||||
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
|
||||
|
||||
let csv_reader = CsvReader::from_path(&file.item)
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?
|
||||
.with_encoding(CsvEncoding::LossyUtf8);
|
||||
|
||||
let csv_reader = match delimiter {
|
||||
None => csv_reader,
|
||||
Some(d) => {
|
||||
if d.item.len() != 1 {
|
||||
return Err(ShellError::InternalError(
|
||||
"Delimiter has to be one char".into(),
|
||||
));
|
||||
} else {
|
||||
let delimiter = match d.item.chars().next() {
|
||||
Some(d) => d as u8,
|
||||
None => unreachable!(),
|
||||
};
|
||||
csv_reader.with_delimiter(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let csv_reader = csv_reader.has_header(!no_header);
|
||||
|
||||
let csv_reader = match infer_schema {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.infer_schema(Some(r)),
|
||||
};
|
||||
|
||||
let csv_reader = match skip_rows {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.with_skip_rows(r),
|
||||
};
|
||||
|
||||
let csv_reader = match columns {
|
||||
None => csv_reader,
|
||||
Some(columns) => csv_reader.with_columns(Some(columns)),
|
||||
};
|
||||
|
||||
csv_reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))
|
||||
}
|
59
crates/nu-command/src/dataframe/to_df.rs
Normal file
59
crates/nu-command/src/dataframe/to_df.rs
Normal file
@ -0,0 +1,59 @@
|
||||
use nu_dataframe::NuDataFrame;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToDataFrame;
|
||||
|
||||
impl Command for ToDataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"to-df"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a List, Table or Dictionary into a dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("to-df").category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Takes a dictionary and creates a dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | to-df",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of tables and creates a dataframe",
|
||||
example: "[[1 2 a] [3 4 b] [5 6 c]] | to-df",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list and creates a dataframe",
|
||||
example: "[a b c] | to-df",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of booleans and creates a dataframe",
|
||||
example: "[$true $true $false] | to-df",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_iter(input.into_iter())?;
|
||||
Ok(PipelineData::Value(NuDataFrame::to_value(df, call.head)))
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user