forked from extern/nushell
move dataframe commands to nu-cmd-dataframe (#9241)
All of the dataframe commands ported over with no issues... ### 11 tests are commented out (for now) So 100 of the original 111 tests are passing with only 11 tests being ignored for now.. As per our conversation in the core team meeting on Wednesday I took @jntrnr suggestion and just commented out the tests dealing with [IntoDatetime](https://github.com/nushell/nushell/blob/main/crates/nu-command/src/conversions/into/mod.rs) Later on we can move this functionality out of nu-command if we decide it makes sense... ### The following tests were ignored... ```rust modified: crates/nu-cmd-dataframe/src/dataframe/series/date/get_day.rs modified: crates/nu-cmd-dataframe/src/dataframe/series/date/get_hour.rs modified: crates/nu-cmd-dataframe/src/dataframe/series/date/get_minute.rs modified: crates/nu-cmd-dataframe/src/dataframe/series/date/get_month.rs modified: crates/nu-cmd-dataframe/src/dataframe/series/date/get_nanosecond.rs modified: crates/nu-cmd-dataframe/src/dataframe/series/date/get_ordinal.rs modified: crates/nu-cmd-dataframe/src/dataframe/series/date/get_second.rs modified: crates/nu-cmd-dataframe/src/dataframe/series/date/get_week.rs modified: crates/nu-cmd-dataframe/src/dataframe/series/date/get_weekday.rs modified: crates/nu-cmd-dataframe/src/dataframe/series/date/get_year.rs modified: crates/nu-cmd-dataframe/src/dataframe/series/string/strftime.rs ```
This commit is contained in:
132
crates/nu-cmd-dataframe/src/dataframe/eager/append.rs
Normal file
132
crates/nu-cmd-dataframe/src/dataframe/eager/append.rs
Normal file
@ -0,0 +1,132 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{Axis, Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AppendDF;
|
||||
|
||||
impl Command for AppendDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr append"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Appends a new dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("other", SyntaxShape::Any, "dataframe to be appended")
|
||||
.switch("col", "appends in col orientation", Some('c'))
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Appends a dataframe as new columns",
|
||||
example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df);
|
||||
$a | dfr append $a"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"a_x".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b_x".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Appends a dataframe merging at the end of columns",
|
||||
example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df);
|
||||
$a | dfr append $a --col"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(2),
|
||||
Value::test_int(4),
|
||||
Value::test_int(2),
|
||||
Value::test_int(4),
|
||||
],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let other: Value = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let axis = if call.has_flag("col") {
|
||||
Axis::Column
|
||||
} else {
|
||||
Axis::Row
|
||||
};
|
||||
let df_other = NuDataFrame::try_from_value(other)?;
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
df.append_df(&df_other, axis, call.head)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(AppendDF {})])
|
||||
}
|
||||
}
|
81
crates/nu-cmd-dataframe/src/dataframe/eager/columns.rs
Normal file
81
crates/nu-cmd-dataframe/src/dataframe/eager/columns.rs
Normal file
@ -0,0 +1,81 @@
|
||||
use super::super::values::NuDataFrame;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ColumnsDF;
|
||||
|
||||
impl Command for ColumnsDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr columns"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Show dataframe columns."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Dataframe columns",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr columns",
|
||||
result: Some(Value::List {
|
||||
vals: vec![Value::test_string("a"), Value::test_string("b")],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let names: Vec<Value> = df
|
||||
.as_ref()
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|v| Value::string(*v, call.head))
|
||||
.collect();
|
||||
|
||||
let names = Value::List {
|
||||
vals: names,
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(names, None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(ColumnsDF {})])
|
||||
}
|
||||
}
|
119
crates/nu-cmd-dataframe/src/dataframe/eager/drop.rs
Normal file
119
crates/nu-cmd-dataframe/src/dataframe/eager/drop.rs
Normal file
@ -0,0 +1,119 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use super::super::values::utils::convert_columns;
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DropDF;
|
||||
|
||||
impl Command for DropDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr drop"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new dataframe by dropping the selected columns."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest("rest", SyntaxShape::Any, "column names to be dropped")
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr drop a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let (col_string, col_span) = convert_columns(columns, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let new_df = col_string
|
||||
.get(0)
|
||||
.ok_or_else(|| {
|
||||
ShellError::GenericError(
|
||||
"Empty names list".into(),
|
||||
"No column names were found".into(),
|
||||
Some(col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.and_then(|col| {
|
||||
df.as_ref().drop(&col.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error dropping column".into(),
|
||||
e.to_string(),
|
||||
Some(col.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
})?;
|
||||
|
||||
// If there are more columns in the drop selection list, these
|
||||
// are added from the resulting dataframe
|
||||
col_string
|
||||
.iter()
|
||||
.skip(1)
|
||||
.try_fold(new_df, |new_df, col| {
|
||||
new_df.drop(&col.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error dropping column".into(),
|
||||
e.to_string(),
|
||||
Some(col.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(DropDF {})])
|
||||
}
|
||||
}
|
122
crates/nu-cmd-dataframe/src/dataframe/eager/drop_duplicates.rs
Normal file
122
crates/nu-cmd-dataframe/src/dataframe/eager/drop_duplicates.rs
Normal file
@ -0,0 +1,122 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::UniqueKeepStrategy;
|
||||
|
||||
use super::super::values::utils::convert_columns_string;
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DropDuplicates;
|
||||
|
||||
impl Command for DropDuplicates {
|
||||
fn name(&self) -> &str {
|
||||
"dfr drop-duplicates"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Drops duplicate values in dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional(
|
||||
"subset",
|
||||
SyntaxShape::Table,
|
||||
"subset of columns to drop duplicates",
|
||||
)
|
||||
.switch("maintain", "maintain order", Some('m'))
|
||||
.switch(
|
||||
"last",
|
||||
"keeps last duplicate value (by default keeps first)",
|
||||
Some('l'),
|
||||
)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop duplicates",
|
||||
example: "[[a b]; [1 2] [3 4] [1 2]] | dfr into-df | dfr drop-duplicates",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(3), Value::test_int(1)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(2)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Option<Vec<Value>> = call.opt(engine_state, stack, 0)?;
|
||||
let (subset, col_span) = match columns {
|
||||
Some(cols) => {
|
||||
let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
|
||||
(Some(agg_string), col_span)
|
||||
}
|
||||
None => (None, call.head),
|
||||
};
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
|
||||
|
||||
let keep_strategy = if call.has_flag("last") {
|
||||
UniqueKeepStrategy::Last
|
||||
} else {
|
||||
UniqueKeepStrategy::First
|
||||
};
|
||||
|
||||
df.as_ref()
|
||||
.unique(subset_slice, keep_strategy, None)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error dropping duplicates".into(),
|
||||
e.to_string(),
|
||||
Some(col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(DropDuplicates {})])
|
||||
}
|
||||
}
|
138
crates/nu-cmd-dataframe/src/dataframe/eager/drop_nulls.rs
Normal file
138
crates/nu-cmd-dataframe/src/dataframe/eager/drop_nulls.rs
Normal file
@ -0,0 +1,138 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use super::super::values::utils::convert_columns_string;
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DropNulls;
|
||||
|
||||
impl Command for DropNulls {
|
||||
fn name(&self) -> &str {
|
||||
"dfr drop-nulls"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Drops null values in dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional(
|
||||
"subset",
|
||||
SyntaxShape::Table,
|
||||
"subset of columns to drop nulls",
|
||||
)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "drop null values in dataframe",
|
||||
example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | dfr into-df);
|
||||
let res = ($df.b / $df.b);
|
||||
let a = ($df | dfr with-column $res --name res);
|
||||
$a | dfr drop-nulls"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(1)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"res".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(1)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "drop null values in dataframe",
|
||||
example: r#"let s = ([1 2 0 0 3 4] | dfr into-df);
|
||||
($s / $s) | dfr drop-nulls"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"div_0_0".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let columns: Option<Vec<Value>> = call.opt(engine_state, stack, 0)?;
|
||||
|
||||
let (subset, col_span) = match columns {
|
||||
Some(cols) => {
|
||||
let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
|
||||
(Some(agg_string), col_span)
|
||||
}
|
||||
None => (None, call.head),
|
||||
};
|
||||
|
||||
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
|
||||
|
||||
df.as_ref()
|
||||
.drop_nulls(subset_slice)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error dropping nulls".into(),
|
||||
e.to_string(),
|
||||
Some(col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::super::WithColumn;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(DropNulls {}), Box::new(WithColumn {})])
|
||||
}
|
||||
}
|
105
crates/nu-cmd-dataframe/src/dataframe/eager/dtypes.rs
Normal file
105
crates/nu-cmd-dataframe/src/dataframe/eager/dtypes.rs
Normal file
@ -0,0 +1,105 @@
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DataTypes;
|
||||
|
||||
impl Command for DataTypes {
|
||||
fn name(&self) -> &str {
|
||||
"dfr dtypes"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Show dataframe data types."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Dataframe dtypes",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr dtypes",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"column".to_string(),
|
||||
vec![Value::test_string("a"), Value::test_string("b")],
|
||||
),
|
||||
Column::new(
|
||||
"dtype".to_string(),
|
||||
vec![Value::test_string("i64"), Value::test_string("i64")],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut dtypes: Vec<Value> = Vec::new();
|
||||
let names: Vec<Value> = df
|
||||
.as_ref()
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
let dtype = df
|
||||
.as_ref()
|
||||
.column(v)
|
||||
.expect("using name from list of names from dataframe")
|
||||
.dtype();
|
||||
|
||||
let dtype_str = dtype.to_string();
|
||||
dtypes.push(Value::String {
|
||||
val: dtype_str,
|
||||
span: call.head,
|
||||
});
|
||||
|
||||
Value::string(*v, call.head)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let names_col = Column::new("column".to_string(), names);
|
||||
let dtypes_col = Column::new("dtype".to_string(), dtypes);
|
||||
|
||||
NuDataFrame::try_from_columns(vec![names_col, dtypes_col])
|
||||
.map(|df| PipelineData::Value(df.into_value(call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(DataTypes {})])
|
||||
}
|
||||
}
|
141
crates/nu-cmd-dataframe/src/dataframe/eager/dummies.rs
Normal file
141
crates/nu-cmd-dataframe/src/dataframe/eager/dummies.rs
Normal file
@ -0,0 +1,141 @@
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::DataFrameOps;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Dummies;
|
||||
|
||||
impl Command for Dummies {
|
||||
fn name(&self) -> &str {
|
||||
"dfr dummies"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new dataframe with dummy variables."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create new dataframe with dummy variables from a dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr dummies",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a_1".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(0)],
|
||||
),
|
||||
Column::new(
|
||||
"a_3".to_string(),
|
||||
vec![Value::test_int(0), Value::test_int(1)],
|
||||
),
|
||||
Column::new(
|
||||
"b_2".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(0)],
|
||||
),
|
||||
Column::new(
|
||||
"b_4".to_string(),
|
||||
vec![Value::test_int(0), Value::test_int(1)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Create new dataframe with dummy variables from a series",
|
||||
example: "[1 2 2 3 3] | dfr into-df | dfr dummies",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"0_1".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"0_2".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"0_3".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
df.as_ref()
|
||||
.to_dummies(None)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error calculating dummies".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
Some("The only allowed column types for dummies are String or Int".into()),
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(Dummies {})])
|
||||
}
|
||||
}
|
158
crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs
Normal file
158
crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs
Normal file
@ -0,0 +1,158 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::LazyFrame;
|
||||
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FilterWith;
|
||||
|
||||
impl Command for FilterWith {
|
||||
fn name(&self) -> &str {
|
||||
"dfr filter-with"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Filters dataframe using a mask or expression as reference."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"mask or expression",
|
||||
SyntaxShape::Any,
|
||||
"boolean mask used to filter data",
|
||||
)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Filter dataframe using a bool mask",
|
||||
example: r#"let mask = ([true false] | dfr into-df);
|
||||
[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with $mask"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Filter dataframe using an expression",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with ((dfr col a) > 1)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(3)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuLazyFrame::can_downcast(&value) {
|
||||
let df = NuLazyFrame::try_from_value(value)?;
|
||||
command_lazy(engine_state, stack, call, df)
|
||||
} else {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command_eager(engine_state, stack, call, df)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let mask_value: Value = call.req(engine_state, stack, 0)?;
|
||||
let mask_span = mask_value.span()?;
|
||||
|
||||
if NuExpression::can_downcast(&mask_value) {
|
||||
let expression = NuExpression::try_from_value(mask_value)?;
|
||||
let lazy = NuLazyFrame::new(true, df.lazy());
|
||||
let lazy = lazy.apply_with_expr(expression, LazyFrame::filter);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuLazyFrame::into_value(lazy, call.head)?,
|
||||
None,
|
||||
))
|
||||
} else {
|
||||
let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?;
|
||||
let mask = mask.bool().map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error casting to bool".into(),
|
||||
e.to_string(),
|
||||
Some(mask_span),
|
||||
Some("Perhaps you want to use a series with booleans as mask".into()),
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
df.as_ref()
|
||||
.filter(mask)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error filtering dataframe".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
Some("The only allowed column types for dummies are String or Int".into()),
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr: Value = call.req(engine_state, stack, 0)?;
|
||||
let expr = NuExpression::try_from_value(expr)?;
|
||||
|
||||
let lazy = lazy.apply_with_expr(expr, LazyFrame::filter);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuLazyFrame::into_value(lazy, call.head)?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
use crate::dataframe::expressions::ExprCol;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(FilterWith {}), Box::new(ExprCol {})])
|
||||
}
|
||||
}
|
105
crates/nu-cmd-dataframe/src/dataframe/eager/first.rs
Normal file
105
crates/nu-cmd-dataframe/src/dataframe/eager/first.rs
Normal file
@ -0,0 +1,105 @@
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FirstDF;
|
||||
|
||||
impl Command for FirstDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr first"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Show only the first number of rows."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional(
|
||||
"rows",
|
||||
SyntaxShape::Int,
|
||||
"starting from the front, the number of rows to return",
|
||||
)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Return the first row of a dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Return the first two rows of a dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first 2",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
command(engine_state, stack, call, df)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.opt(engine_state, stack, 0)?;
|
||||
let rows = rows.unwrap_or(1);
|
||||
|
||||
let res = df.as_ref().head(Some(rows));
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(FirstDF {})])
|
||||
}
|
||||
}
|
92
crates/nu-cmd-dataframe/src/dataframe/eager/get.rs
Normal file
92
crates/nu-cmd-dataframe/src/dataframe/eager/get.rs
Normal file
@ -0,0 +1,92 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::utils::convert_columns_string;
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetDF;
|
||||
|
||||
impl Command for GetDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr get"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates dataframe with the selected columns."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest("rest", SyntaxShape::Any, "column names to sort dataframe")
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns the selected column",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr get a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
df.as_ref()
|
||||
.select(col_string)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error selecting columns".into(),
|
||||
e.to_string(),
|
||||
Some(col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(GetDF {})])
|
||||
}
|
||||
}
|
81
crates/nu-cmd-dataframe/src/dataframe/eager/last.rs
Normal file
81
crates/nu-cmd-dataframe/src/dataframe/eager/last.rs
Normal file
@ -0,0 +1,81 @@
|
||||
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LastDF;
|
||||
|
||||
impl Command for LastDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr last"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe with tail rows or creates a last expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional("rows", SyntaxShape::Int, "Number of rows for tail")
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with last rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(3)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
command(engine_state, stack, call, df)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.opt(engine_state, stack, 0)?;
|
||||
let rows = rows.unwrap_or(DEFAULT_ROWS);
|
||||
|
||||
let res = df.as_ref().tail(Some(rows));
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(LastDF {})])
|
||||
}
|
||||
}
|
89
crates/nu-cmd-dataframe/src/dataframe/eager/list.rs
Normal file
89
crates/nu-cmd-dataframe/src/dataframe/eager/list.rs
Normal file
@ -0,0 +1,89 @@
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ListDF;
|
||||
|
||||
impl Command for ListDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr ls"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Lists stored dataframes."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates a new dataframe and shows it in the dataframe list",
|
||||
example: r#"let test = ([[a b];[1 2] [3 4]] | dfr into-df);
|
||||
ls"#,
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let mut vals: Vec<(String, Value)> = vec![];
|
||||
|
||||
for overlay_frame in engine_state.active_overlays(&[]) {
|
||||
for var in &overlay_frame.vars {
|
||||
if let Ok(value) = stack.get_var(*var.1, call.head) {
|
||||
let name = String::from_utf8_lossy(var.0).to_string();
|
||||
vals.push((name, value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let vals = vals
|
||||
.into_iter()
|
||||
.filter_map(|(name, value)| {
|
||||
NuDataFrame::try_from_value(value).ok().map(|df| (name, df))
|
||||
})
|
||||
.map(|(name, df)| {
|
||||
let name = Value::String {
|
||||
val: name,
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
let columns = Value::int(df.as_ref().width() as i64, call.head);
|
||||
|
||||
let rows = Value::int(df.as_ref().height() as i64, call.head);
|
||||
|
||||
let cols = vec![
|
||||
"name".to_string(),
|
||||
"columns".to_string(),
|
||||
"rows".to_string(),
|
||||
];
|
||||
let vals = vec![name, columns, rows];
|
||||
|
||||
Value::Record {
|
||||
cols,
|
||||
vals,
|
||||
span: call.head,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
|
||||
let list = Value::List {
|
||||
vals,
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
Ok(list.into_pipeline_data())
|
||||
}
|
||||
}
|
259
crates/nu-cmd-dataframe/src/dataframe/eager/melt.rs
Normal file
259
crates/nu-cmd-dataframe/src/dataframe/eager/melt.rs
Normal file
@ -0,0 +1,259 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::utils::convert_columns_string;
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MeltDF;
|
||||
|
||||
impl Command for MeltDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr melt"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Unpivot a DataFrame from wide to long format."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required_named(
|
||||
"columns",
|
||||
SyntaxShape::Table,
|
||||
"column names for melting",
|
||||
Some('c'),
|
||||
)
|
||||
.required_named(
|
||||
"values",
|
||||
SyntaxShape::Table,
|
||||
"column names used as value columns",
|
||||
Some('v'),
|
||||
)
|
||||
.named(
|
||||
"variable-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for variable column",
|
||||
Some('r'),
|
||||
)
|
||||
.named(
|
||||
"value-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for value column",
|
||||
Some('l'),
|
||||
)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "melt dataframe",
|
||||
example:
|
||||
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | dfr into-df | dfr melt -c [b c] -v [a d]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"variable".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"value".to_string(),
|
||||
vec![
|
||||
Value::test_string("x"),
|
||||
Value::test_string("y"),
|
||||
Value::test_string("z"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let id_col: Vec<Value> = call
|
||||
.get_flag(engine_state, stack, "columns")?
|
||||
.expect("required value");
|
||||
let val_col: Vec<Value> = call
|
||||
.get_flag(engine_state, stack, "values")?
|
||||
.expect("required value");
|
||||
|
||||
let value_name: Option<Spanned<String>> = call.get_flag(engine_state, stack, "value-name")?;
|
||||
let variable_name: Option<Spanned<String>> =
|
||||
call.get_flag(engine_state, stack, "variable-name")?;
|
||||
|
||||
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?;
|
||||
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?;
|
||||
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?;
|
||||
|
||||
let mut res = df
|
||||
.as_ref()
|
||||
.melt(&id_col_string, &val_col_string)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error calculating melt".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
if let Some(name) = &variable_name {
|
||||
res.rename("variable", &name.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error renaming column".into(),
|
||||
e.to_string(),
|
||||
Some(name.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Some(name) = &value_name {
|
||||
res.rename("value", &name.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error renaming column".into(),
|
||||
e.to_string(),
|
||||
Some(name.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
df: &polars::prelude::DataFrame,
|
||||
cols: &[T],
|
||||
col_span: Span,
|
||||
) -> Result<(), ShellError> {
|
||||
if cols.is_empty() {
|
||||
return Err(ShellError::GenericError(
|
||||
"Merge error".into(),
|
||||
"empty column list".into(),
|
||||
Some(col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
|
||||
// Checking if they are same type
|
||||
if cols.len() > 1 {
|
||||
for w in cols.windows(2) {
|
||||
let l_series = df.column(w[0].as_ref()).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error selecting columns".into(),
|
||||
e.to_string(),
|
||||
Some(col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let r_series = df.column(w[1].as_ref()).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error selecting columns".into(),
|
||||
e.to_string(),
|
||||
Some(col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
if l_series.dtype() != r_series.dtype() {
|
||||
return Err(ShellError::GenericError(
|
||||
"Merge error".into(),
|
||||
"found different column types in list".into(),
|
||||
Some(col_span),
|
||||
Some(format!(
|
||||
"datatypes {} and {} are incompatible",
|
||||
l_series.dtype(),
|
||||
r_series.dtype()
|
||||
)),
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(MeltDF {})])
|
||||
}
|
||||
}
|
103
crates/nu-cmd-dataframe/src/dataframe/eager/mod.rs
Normal file
103
crates/nu-cmd-dataframe/src/dataframe/eager/mod.rs
Normal file
@ -0,0 +1,103 @@
|
||||
mod append;
|
||||
mod columns;
|
||||
mod drop;
|
||||
mod drop_duplicates;
|
||||
mod drop_nulls;
|
||||
mod dtypes;
|
||||
mod dummies;
|
||||
mod filter_with;
|
||||
mod first;
|
||||
mod get;
|
||||
mod last;
|
||||
mod list;
|
||||
mod melt;
|
||||
mod open;
|
||||
mod query_df;
|
||||
mod rename;
|
||||
mod sample;
|
||||
mod shape;
|
||||
mod slice;
|
||||
mod sql_context;
|
||||
mod sql_expr;
|
||||
mod summary;
|
||||
mod take;
|
||||
mod to_arrow;
|
||||
mod to_csv;
|
||||
mod to_df;
|
||||
mod to_nu;
|
||||
mod to_parquet;
|
||||
mod with_column;
|
||||
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
|
||||
pub use self::open::OpenDataFrame;
|
||||
pub use append::AppendDF;
|
||||
pub use columns::ColumnsDF;
|
||||
pub use drop::DropDF;
|
||||
pub use drop_duplicates::DropDuplicates;
|
||||
pub use drop_nulls::DropNulls;
|
||||
pub use dtypes::DataTypes;
|
||||
pub use dummies::Dummies;
|
||||
pub use filter_with::FilterWith;
|
||||
pub use first::FirstDF;
|
||||
pub use get::GetDF;
|
||||
pub use last::LastDF;
|
||||
pub use list::ListDF;
|
||||
pub use melt::MeltDF;
|
||||
pub use query_df::QueryDf;
|
||||
pub use rename::RenameDF;
|
||||
pub use sample::SampleDF;
|
||||
pub use shape::ShapeDF;
|
||||
pub use slice::SliceDF;
|
||||
pub use sql_context::SQLContext;
|
||||
pub use sql_expr::parse_sql_expr;
|
||||
pub use summary::Summary;
|
||||
pub use take::TakeDF;
|
||||
pub use to_arrow::ToArrow;
|
||||
pub use to_csv::ToCSV;
|
||||
pub use to_df::ToDataFrame;
|
||||
pub use to_nu::ToNu;
|
||||
pub use to_parquet::ToParquet;
|
||||
pub use with_column::WithColumn;
|
||||
|
||||
pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
|
||||
macro_rules! bind_command {
|
||||
( $command:expr ) => {
|
||||
working_set.add_decl(Box::new($command));
|
||||
};
|
||||
( $( $command:expr ),* ) => {
|
||||
$( working_set.add_decl(Box::new($command)); )*
|
||||
};
|
||||
}
|
||||
|
||||
// Dataframe commands
|
||||
bind_command!(
|
||||
AppendDF,
|
||||
ColumnsDF,
|
||||
DataTypes,
|
||||
Summary,
|
||||
DropDF,
|
||||
DropDuplicates,
|
||||
DropNulls,
|
||||
Dummies,
|
||||
FilterWith,
|
||||
FirstDF,
|
||||
GetDF,
|
||||
LastDF,
|
||||
ListDF,
|
||||
MeltDF,
|
||||
OpenDataFrame,
|
||||
QueryDf,
|
||||
RenameDF,
|
||||
SampleDF,
|
||||
ShapeDF,
|
||||
SliceDF,
|
||||
TakeDF,
|
||||
ToArrow,
|
||||
ToCSV,
|
||||
ToDataFrame,
|
||||
ToNu,
|
||||
ToParquet,
|
||||
WithColumn
|
||||
);
|
||||
}
|
431
crates/nu-cmd-dataframe/src/dataframe/eager/open.rs
Normal file
431
crates/nu-cmd-dataframe/src/dataframe/eager/open.rs
Normal file
@ -0,0 +1,431 @@
|
||||
use super::super::values::{NuDataFrame, NuLazyFrame};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use std::{fs::File, io::BufReader, path::PathBuf};
|
||||
|
||||
use polars::prelude::{
|
||||
CsvEncoding, CsvReader, IpcReader, JsonReader, LazyCsvReader, LazyFileListReader, LazyFrame,
|
||||
ParallelStrategy, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct OpenDataFrame;
|
||||
|
||||
impl Command for OpenDataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dfr open"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Opens CSV, JSON, arrow, or parquet file to create dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"file",
|
||||
SyntaxShape::Filepath,
|
||||
"file path to load values from",
|
||||
)
|
||||
.switch("lazy", "creates a lazy dataframe", Some('l'))
|
||||
.named(
|
||||
"type",
|
||||
SyntaxShape::String,
|
||||
"File type: csv, tsv, json, parquet, arrow. If omitted, derive from file extension",
|
||||
Some('t'),
|
||||
)
|
||||
.named(
|
||||
"delimiter",
|
||||
SyntaxShape::String,
|
||||
"file delimiter character. CSV file",
|
||||
Some('d'),
|
||||
)
|
||||
.switch(
|
||||
"no-header",
|
||||
"Indicates if file doesn't have header. CSV file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"infer-schema",
|
||||
SyntaxShape::Number,
|
||||
"Number of rows to infer the schema of the file. CSV file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"skip-rows",
|
||||
SyntaxShape::Number,
|
||||
"Number of rows to skip from file. CSV file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"columns",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::String)),
|
||||
"Columns to be selected from csv file. CSV and Parquet file",
|
||||
None,
|
||||
)
|
||||
.input_type(Type::Any)
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Takes a file name and creates a dataframe",
|
||||
example: "dfr open test.csv",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let type_option: Option<Spanned<String>> = call.get_flag(engine_state, stack, "type")?;
|
||||
|
||||
let type_id = match &type_option {
|
||||
Some(ref t) => Some((t.item.to_owned(), "Invalid type", t.span)),
|
||||
None => file.item.extension().map(|e| {
|
||||
(
|
||||
e.to_string_lossy().into_owned(),
|
||||
"Invalid extension",
|
||||
file.span,
|
||||
)
|
||||
}),
|
||||
};
|
||||
|
||||
match type_id {
|
||||
Some((e, msg, blamed)) => match e.as_str() {
|
||||
"csv" | "tsv" => from_csv(engine_state, stack, call),
|
||||
"parquet" => from_parquet(engine_state, stack, call),
|
||||
"ipc" | "arrow" => from_ipc(engine_state, stack, call),
|
||||
"json" => from_json(engine_state, stack, call),
|
||||
_ => Err(ShellError::FileNotFoundCustom(
|
||||
format!("{msg}. Supported values: csv, tsv, parquet, ipc, arrow, json"),
|
||||
blamed,
|
||||
)),
|
||||
},
|
||||
None => Err(ShellError::FileNotFoundCustom(
|
||||
"File without extension".into(),
|
||||
file.span,
|
||||
)),
|
||||
}
|
||||
.map(|value| PipelineData::Value(value, None))
|
||||
}
|
||||
|
||||
fn from_parquet(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
) -> Result<Value, ShellError> {
|
||||
if call.has_flag("lazy") {
|
||||
let file: String = call.req(engine_state, stack, 0)?;
|
||||
let args = ScanArgsParquet {
|
||||
n_rows: None,
|
||||
cache: true,
|
||||
parallel: ParallelStrategy::Auto,
|
||||
rechunk: false,
|
||||
row_count: None,
|
||||
low_memory: false,
|
||||
cloud_options: None,
|
||||
use_statistics: false,
|
||||
};
|
||||
|
||||
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Parquet reader error".into(),
|
||||
format!("{e:?}"),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?
|
||||
.into();
|
||||
|
||||
df.into_value(call.head)
|
||||
} else {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
|
||||
|
||||
let r = File::open(&file.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error opening file".into(),
|
||||
e.to_string(),
|
||||
Some(file.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
let reader = ParquetReader::new(r);
|
||||
|
||||
let reader = match columns {
|
||||
None => reader,
|
||||
Some(columns) => reader.with_columns(Some(columns)),
|
||||
};
|
||||
|
||||
let df: NuDataFrame = reader
|
||||
.finish()
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Parquet reader error".into(),
|
||||
format!("{e:?}"),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?
|
||||
.into();
|
||||
|
||||
Ok(df.into_value(call.head))
|
||||
}
|
||||
}
|
||||
|
||||
fn from_ipc(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
) -> Result<Value, ShellError> {
|
||||
if call.has_flag("lazy") {
|
||||
let file: String = call.req(engine_state, stack, 0)?;
|
||||
let args = ScanArgsIpc {
|
||||
n_rows: None,
|
||||
cache: true,
|
||||
rechunk: false,
|
||||
row_count: None,
|
||||
memmap: true,
|
||||
};
|
||||
|
||||
let df: NuLazyFrame = LazyFrame::scan_ipc(file, args)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"IPC reader error".into(),
|
||||
format!("{e:?}"),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?
|
||||
.into();
|
||||
|
||||
df.into_value(call.head)
|
||||
} else {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
|
||||
|
||||
let r = File::open(&file.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error opening file".into(),
|
||||
e.to_string(),
|
||||
Some(file.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
let reader = IpcReader::new(r);
|
||||
|
||||
let reader = match columns {
|
||||
None => reader,
|
||||
Some(columns) => reader.with_columns(Some(columns)),
|
||||
};
|
||||
|
||||
let df: NuDataFrame = reader
|
||||
.finish()
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"IPC reader error".into(),
|
||||
format!("{e:?}"),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?
|
||||
.into();
|
||||
|
||||
Ok(df.into_value(call.head))
|
||||
}
|
||||
}
|
||||
|
||||
fn from_json(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
) -> Result<Value, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
let file = File::open(&file.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error opening file".into(),
|
||||
e.to_string(),
|
||||
Some(file.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let buf_reader = BufReader::new(file);
|
||||
let reader = JsonReader::new(buf_reader);
|
||||
|
||||
let df: NuDataFrame = reader
|
||||
.finish()
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Json reader error".into(),
|
||||
format!("{e:?}"),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?
|
||||
.into();
|
||||
|
||||
Ok(df.into_value(call.head))
|
||||
}
|
||||
|
||||
fn from_csv(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
) -> Result<Value, ShellError> {
|
||||
let delimiter: Option<Spanned<String>> = call.get_flag(engine_state, stack, "delimiter")?;
|
||||
let no_header: bool = call.has_flag("no-header");
|
||||
let infer_schema: Option<usize> = call.get_flag(engine_state, stack, "infer-schema")?;
|
||||
let skip_rows: Option<usize> = call.get_flag(engine_state, stack, "skip-rows")?;
|
||||
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
|
||||
|
||||
if call.has_flag("lazy") {
|
||||
let file: String = call.req(engine_state, stack, 0)?;
|
||||
let csv_reader = LazyCsvReader::new(file);
|
||||
|
||||
let csv_reader = match delimiter {
|
||||
None => csv_reader,
|
||||
Some(d) => {
|
||||
if d.item.len() != 1 {
|
||||
return Err(ShellError::GenericError(
|
||||
"Incorrect delimiter".into(),
|
||||
"Delimiter has to be one character".into(),
|
||||
Some(d.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
));
|
||||
} else {
|
||||
let delimiter = match d.item.chars().next() {
|
||||
Some(d) => d as u8,
|
||||
None => unreachable!(),
|
||||
};
|
||||
csv_reader.with_delimiter(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let csv_reader = csv_reader.has_header(!no_header);
|
||||
|
||||
let csv_reader = match infer_schema {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.with_infer_schema_length(Some(r)),
|
||||
};
|
||||
|
||||
let csv_reader = match skip_rows {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.with_skip_rows(r),
|
||||
};
|
||||
|
||||
let df: NuLazyFrame = csv_reader
|
||||
.finish()
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Parquet reader error".into(),
|
||||
format!("{e:?}"),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?
|
||||
.into();
|
||||
|
||||
df.into_value(call.head)
|
||||
} else {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
let csv_reader = CsvReader::from_path(&file.item)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating CSV reader".into(),
|
||||
e.to_string(),
|
||||
Some(file.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?
|
||||
.with_encoding(CsvEncoding::LossyUtf8);
|
||||
|
||||
let csv_reader = match delimiter {
|
||||
None => csv_reader,
|
||||
Some(d) => {
|
||||
if d.item.len() != 1 {
|
||||
return Err(ShellError::GenericError(
|
||||
"Incorrect delimiter".into(),
|
||||
"Delimiter has to be one character".into(),
|
||||
Some(d.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
));
|
||||
} else {
|
||||
let delimiter = match d.item.chars().next() {
|
||||
Some(d) => d as u8,
|
||||
None => unreachable!(),
|
||||
};
|
||||
csv_reader.with_delimiter(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let csv_reader = csv_reader.has_header(!no_header);
|
||||
|
||||
let csv_reader = match infer_schema {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.infer_schema(Some(r)),
|
||||
};
|
||||
|
||||
let csv_reader = match skip_rows {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.with_skip_rows(r),
|
||||
};
|
||||
|
||||
let csv_reader = match columns {
|
||||
None => csv_reader,
|
||||
Some(columns) => csv_reader.with_columns(Some(columns)),
|
||||
};
|
||||
|
||||
let df: NuDataFrame = csv_reader
|
||||
.finish()
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Parquet reader error".into(),
|
||||
format!("{e:?}"),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?
|
||||
.into();
|
||||
|
||||
Ok(df.into_value(call.head))
|
||||
}
|
||||
}
|
106
crates/nu-cmd-dataframe/src/dataframe/eager/query_df.rs
Normal file
106
crates/nu-cmd-dataframe/src/dataframe/eager/query_df.rs
Normal file
@ -0,0 +1,106 @@
|
||||
use super::super::values::NuDataFrame;
|
||||
use crate::dataframe::values::Column;
|
||||
use crate::dataframe::{eager::SQLContext, values::NuLazyFrame};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
// attribution:
|
||||
// sql_context.rs, and sql_expr.rs were copied from polars-sql. thank you.
|
||||
// maybe we should just use the crate at some point but it's not published yet.
|
||||
// https://github.com/pola-rs/polars/tree/master/polars-sql
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct QueryDf;
|
||||
|
||||
impl Command for QueryDf {
|
||||
fn name(&self) -> &str {
|
||||
"dfr query"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Query dataframe using SQL. Note: The dataframe is always named 'df' in your query's from clause."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("sql", SyntaxShape::String, "sql query")
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["dataframe", "sql", "search"]
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Query dataframe using SQL",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr query 'select a from df'",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let sql_query: String = call.req(engine_state, stack, 0)?;
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut ctx = SQLContext::new();
|
||||
ctx.register("df", &df.df);
|
||||
let df_sql = ctx.execute(&sql_query).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Dataframe Error".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
let lazy = NuLazyFrame::new(false, df_sql);
|
||||
|
||||
let eager = lazy.collect(call.head)?;
|
||||
let value = Value::CustomValue {
|
||||
val: Box::new(eager),
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(QueryDf {})])
|
||||
}
|
||||
}
|
180
crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs
Normal file
180
crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs
Normal file
@ -0,0 +1,180 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::{utils::extract_strings, values::NuLazyFrame};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RenameDF;
|
||||
|
||||
impl Command for RenameDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr rename"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Rename a dataframe column."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"columns",
|
||||
SyntaxShape::Any,
|
||||
"Column(s) to be renamed. A string or list of strings",
|
||||
)
|
||||
.required(
|
||||
"new names",
|
||||
SyntaxShape::Any,
|
||||
"New names for the selected column(s). A string or list of strings",
|
||||
)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Renames a series",
|
||||
example: "[5 6 7 8] | dfr into-df | dfr rename '0' new_name",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"new_name".to_string(),
|
||||
vec![
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(7),
|
||||
Value::test_int(8),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Renames a dataframe column",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename a a_new",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a_new".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Renames two dataframe columns",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename [a b] [a_new b_new]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a_new".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b_new".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuLazyFrame::can_downcast(&value) {
|
||||
let df = NuLazyFrame::try_from_value(value)?;
|
||||
command_lazy(engine_state, stack, call, df)
|
||||
} else {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command_eager(engine_state, stack, call, df)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
mut df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Value = call.req(engine_state, stack, 0)?;
|
||||
let columns = extract_strings(columns)?;
|
||||
|
||||
let new_names: Value = call.req(engine_state, stack, 1)?;
|
||||
let new_names = extract_strings(new_names)?;
|
||||
|
||||
for (from, to) in columns.iter().zip(new_names.iter()) {
|
||||
df.as_mut().rename(from, to).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error renaming".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(PipelineData::Value(df.into_value(call.head), None))
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Value = call.req(engine_state, stack, 0)?;
|
||||
let columns = extract_strings(columns)?;
|
||||
|
||||
let new_names: Value = call.req(engine_state, stack, 1)?;
|
||||
let new_names = extract_strings(new_names)?;
|
||||
|
||||
if columns.len() != new_names.len() {
|
||||
let value: Value = call.req(engine_state, stack, 1)?;
|
||||
return Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "New name list has different size to column list".into(),
|
||||
span: value.span()?,
|
||||
});
|
||||
}
|
||||
|
||||
let lazy = lazy.into_polars();
|
||||
let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into();
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(RenameDF {})])
|
||||
}
|
||||
}
|
132
crates/nu-cmd-dataframe/src/dataframe/eager/sample.rs
Normal file
132
crates/nu-cmd-dataframe/src/dataframe/eager/sample.rs
Normal file
@ -0,0 +1,132 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Type,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SampleDF;
|
||||
|
||||
impl Command for SampleDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr sample"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Create sample dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"n-rows",
|
||||
SyntaxShape::Int,
|
||||
"number of rows to be taken from dataframe",
|
||||
Some('n'),
|
||||
)
|
||||
.named(
|
||||
"fraction",
|
||||
SyntaxShape::Number,
|
||||
"fraction of dataframe to be taken",
|
||||
Some('f'),
|
||||
)
|
||||
.named(
|
||||
"seed",
|
||||
SyntaxShape::Number,
|
||||
"seed for the selection",
|
||||
Some('s'),
|
||||
)
|
||||
.switch("replace", "sample with replace", Some('e'))
|
||||
.switch("shuffle", "shuffle sample", Some('u'))
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Sample rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr sample -n 1",
|
||||
result: None, // No expected value because sampling is random
|
||||
},
|
||||
Example {
|
||||
description: "Shows sample row using fraction and replace",
|
||||
example: "[[a b]; [1 2] [3 4] [5 6]] | dfr into-df | dfr sample -f 0.5 -e",
|
||||
result: None, // No expected value because sampling is random
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<Spanned<usize>> = call.get_flag(engine_state, stack, "n-rows")?;
|
||||
let fraction: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "fraction")?;
|
||||
let seed: Option<u64> = call
|
||||
.get_flag::<i64>(engine_state, stack, "seed")?
|
||||
.map(|val| val as u64);
|
||||
let replace: bool = call.has_flag("replace");
|
||||
let shuffle: bool = call.has_flag("shuffle");
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
match (rows, fraction) {
|
||||
(Some(rows), None) => df
|
||||
.as_ref()
|
||||
.sample_n(rows.item, replace, shuffle, seed)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating sample".into(),
|
||||
e.to_string(),
|
||||
Some(rows.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
}),
|
||||
(None, Some(frac)) => df
|
||||
.as_ref()
|
||||
.sample_frac(frac.item, replace, shuffle, seed)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating sample".into(),
|
||||
e.to_string(),
|
||||
Some(frac.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
}),
|
||||
(Some(_), Some(_)) => Err(ShellError::GenericError(
|
||||
"Incompatible flags".into(),
|
||||
"Only one selection criterion allowed".into(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)),
|
||||
(None, None) => Err(ShellError::GenericError(
|
||||
"No selection".into(),
|
||||
"No selection criterion was found".into(),
|
||||
Some(call.head),
|
||||
Some("Perhaps you want to use the flag -n or -f".into()),
|
||||
Vec::new(),
|
||||
)),
|
||||
}
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
84
crates/nu-cmd-dataframe/src/dataframe/eager/shape.rs
Normal file
84
crates/nu-cmd-dataframe/src/dataframe/eager/shape.rs
Normal file
@ -0,0 +1,84 @@
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::Column;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ShapeDF;
|
||||
|
||||
impl Command for ShapeDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr shape"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Shows column and row size for a dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Shows row and column shape",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr shape",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("rows".to_string(), vec![Value::test_int(2)]),
|
||||
Column::new("columns".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let rows = Value::int(df.as_ref().height() as i64, call.head);
|
||||
|
||||
let cols = Value::int(df.as_ref().width() as i64, call.head);
|
||||
|
||||
let rows_col = Column::new("rows".to_string(), vec![rows]);
|
||||
let cols_col = Column::new("columns".to_string(), vec![cols]);
|
||||
|
||||
NuDataFrame::try_from_columns(vec![rows_col, cols_col])
|
||||
.map(|df| PipelineData::Value(df.into_value(call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(ShapeDF {})])
|
||||
}
|
||||
}
|
87
crates/nu-cmd-dataframe/src/dataframe/eager/slice.rs
Normal file
87
crates/nu-cmd-dataframe/src/dataframe/eager/slice.rs
Normal file
@ -0,0 +1,87 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::Column;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SliceDF;
|
||||
|
||||
impl Command for SliceDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr slice"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe from a slice of rows."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("offset", SyntaxShape::Int, "start of slice")
|
||||
.required("size", SyntaxShape::Int, "size of slice")
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe from a slice of the rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr slice 0 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let offset: i64 = call.req(engine_state, stack, 0)?;
|
||||
let size: usize = call.req(engine_state, stack, 1)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let res = df.as_ref().slice(offset, size);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(SliceDF {})])
|
||||
}
|
||||
}
|
222
crates/nu-cmd-dataframe/src/dataframe/eager/sql_context.rs
Normal file
222
crates/nu-cmd-dataframe/src/dataframe/eager/sql_context.rs
Normal file
@ -0,0 +1,222 @@
|
||||
use crate::dataframe::eager::sql_expr::parse_sql_expr;
|
||||
use polars::error::{ErrString, PolarsError};
|
||||
use polars::prelude::{col, DataFrame, DataType, IntoLazy, LazyFrame};
|
||||
use sqlparser::ast::{
|
||||
Expr as SqlExpr, Select, SelectItem, SetExpr, Statement, TableFactor, Value as SQLValue,
|
||||
};
|
||||
use sqlparser::dialect::GenericDialect;
|
||||
use sqlparser::parser::Parser;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SQLContext {
|
||||
table_map: HashMap<String, LazyFrame>,
|
||||
dialect: GenericDialect,
|
||||
}
|
||||
|
||||
impl SQLContext {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
table_map: HashMap::new(),
|
||||
dialect: GenericDialect::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register(&mut self, name: &str, df: &DataFrame) {
|
||||
self.table_map.insert(name.to_owned(), df.clone().lazy());
|
||||
}
|
||||
|
||||
fn execute_select(&self, select_stmt: &Select) -> Result<LazyFrame, PolarsError> {
|
||||
// Determine involved dataframe
|
||||
// Implicit join require some more work in query parsers, Explicit join are preferred for now.
|
||||
let tbl = select_stmt.from.get(0).ok_or_else(|| {
|
||||
PolarsError::ComputeError(ErrString::from("No table found in select statement"))
|
||||
})?;
|
||||
let mut alias_map = HashMap::new();
|
||||
let tbl_name = match &tbl.relation {
|
||||
TableFactor::Table { name, alias, .. } => {
|
||||
let tbl_name = name
|
||||
.0
|
||||
.get(0)
|
||||
.ok_or_else(|| {
|
||||
PolarsError::ComputeError(ErrString::from(
|
||||
"No table found in select statement",
|
||||
))
|
||||
})?
|
||||
.value
|
||||
.to_string();
|
||||
if self.table_map.contains_key(&tbl_name) {
|
||||
if let Some(alias) = alias {
|
||||
alias_map.insert(alias.name.value.clone(), tbl_name.to_owned());
|
||||
};
|
||||
tbl_name
|
||||
} else {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("Table name {tbl_name} was not found").into(),
|
||||
));
|
||||
}
|
||||
}
|
||||
// Support bare table, optional with alias for now
|
||||
_ => return Err(PolarsError::ComputeError("Not implemented".into())),
|
||||
};
|
||||
let df = &self.table_map[&tbl_name];
|
||||
let mut raw_projection_before_alias: HashMap<String, usize> = HashMap::new();
|
||||
let mut contain_wildcard = false;
|
||||
// Filter Expression
|
||||
let df = match select_stmt.selection.as_ref() {
|
||||
Some(expr) => {
|
||||
let filter_expression = parse_sql_expr(expr)?;
|
||||
df.clone().filter(filter_expression)
|
||||
}
|
||||
None => df.clone(),
|
||||
};
|
||||
// Column Projections
|
||||
let projection = select_stmt
|
||||
.projection
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, select_item)| {
|
||||
Ok(match select_item {
|
||||
SelectItem::UnnamedExpr(expr) => {
|
||||
let expr = parse_sql_expr(expr)?;
|
||||
raw_projection_before_alias.insert(format!("{expr:?}"), i);
|
||||
expr
|
||||
}
|
||||
SelectItem::ExprWithAlias { expr, alias } => {
|
||||
let expr = parse_sql_expr(expr)?;
|
||||
raw_projection_before_alias.insert(format!("{expr:?}"), i);
|
||||
expr.alias(&alias.value)
|
||||
}
|
||||
SelectItem::QualifiedWildcard(_, _) | SelectItem::Wildcard(_) => {
|
||||
contain_wildcard = true;
|
||||
col("*")
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>, PolarsError>>()?;
|
||||
// Check for group by
|
||||
// After projection since there might be number.
|
||||
let group_by = select_stmt
|
||||
.group_by
|
||||
.iter()
|
||||
.map(
|
||||
|e|match e {
|
||||
SqlExpr::Value(SQLValue::Number(idx, _)) => {
|
||||
let idx = match idx.parse::<usize>() {
|
||||
Ok(0)| Err(_) => Err(
|
||||
PolarsError::ComputeError(
|
||||
format!("Group-By Error: Only positive number or expression are supported, got {idx}").into()
|
||||
)),
|
||||
Ok(idx) => Ok(idx)
|
||||
}?;
|
||||
Ok(projection[idx].clone())
|
||||
}
|
||||
SqlExpr::Value(_) => Err(
|
||||
PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported".into())
|
||||
),
|
||||
_ => parse_sql_expr(e)
|
||||
}
|
||||
)
|
||||
.collect::<Result<Vec<_>, PolarsError>>()?;
|
||||
|
||||
let df = if group_by.is_empty() {
|
||||
df.select(projection)
|
||||
} else {
|
||||
// check groupby and projection due to difference between SQL and polars
|
||||
// Return error on wild card, shouldn't process this
|
||||
if contain_wildcard {
|
||||
return Err(PolarsError::ComputeError(
|
||||
"Group-By Error: Can't process wildcard in group-by".into(),
|
||||
));
|
||||
}
|
||||
// Default polars group by will have group by columns at the front
|
||||
// need some container to contain position of group by columns and its position
|
||||
// at the final agg projection, check the schema for the existence of group by column
|
||||
// and its projections columns, keeping the original index
|
||||
let (exclude_expr, groupby_pos): (Vec<_>, Vec<_>) = group_by
|
||||
.iter()
|
||||
.map(|expr| raw_projection_before_alias.get(&format!("{expr:?}")))
|
||||
.enumerate()
|
||||
.filter(|(_, proj_p)| proj_p.is_some())
|
||||
.map(|(gb_p, proj_p)| (*proj_p.unwrap_or(&0), (*proj_p.unwrap_or(&0), gb_p)))
|
||||
.unzip();
|
||||
let (agg_projection, agg_proj_pos): (Vec<_>, Vec<_>) = projection
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| !exclude_expr.contains(i))
|
||||
.enumerate()
|
||||
.map(|(agg_pj, (proj_p, expr))| (expr.clone(), (proj_p, agg_pj + group_by.len())))
|
||||
.unzip();
|
||||
let agg_df = df.groupby(group_by).agg(agg_projection);
|
||||
let mut final_proj_pos = groupby_pos
|
||||
.into_iter()
|
||||
.chain(agg_proj_pos.into_iter())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
final_proj_pos.sort_by(|(proj_pa, _), (proj_pb, _)| proj_pa.cmp(proj_pb));
|
||||
let final_proj = final_proj_pos
|
||||
.into_iter()
|
||||
.map(|(_, shm_p)| {
|
||||
col(agg_df
|
||||
.clone()
|
||||
// FIXME: had to do this mess to get get_index to work, not sure why. need help
|
||||
.collect()
|
||||
.unwrap_or_default()
|
||||
.schema()
|
||||
.get_at_index(shm_p)
|
||||
.unwrap_or((&"".into(), &DataType::Null))
|
||||
.0)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
agg_df.select(final_proj)
|
||||
};
|
||||
Ok(df)
|
||||
}
|
||||
|
||||
pub fn execute(&self, query: &str) -> Result<LazyFrame, PolarsError> {
|
||||
let ast = Parser::parse_sql(&self.dialect, query)
|
||||
.map_err(|e| PolarsError::ComputeError(format!("{e:?}").into()))?;
|
||||
if ast.len() != 1 {
|
||||
Err(PolarsError::ComputeError(
|
||||
"One and only one statement at a time please".into(),
|
||||
))
|
||||
} else {
|
||||
let ast = ast
|
||||
.get(0)
|
||||
.ok_or_else(|| PolarsError::ComputeError(ErrString::from("No statement found")))?;
|
||||
Ok(match ast {
|
||||
Statement::Query(query) => {
|
||||
let rs = match &*query.body {
|
||||
SetExpr::Select(select_stmt) => self.execute_select(select_stmt)?,
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
"INSERT, UPDATE is not supported for polars".into(),
|
||||
))
|
||||
}
|
||||
};
|
||||
match &query.limit {
|
||||
Some(SqlExpr::Value(SQLValue::Number(nrow, _))) => {
|
||||
let nrow = nrow.parse().map_err(|err| {
|
||||
PolarsError::ComputeError(
|
||||
format!("Conversion Error: {err:?}").into(),
|
||||
)
|
||||
})?;
|
||||
rs.limit(nrow)
|
||||
}
|
||||
None => rs,
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
"Only support number argument to LIMIT clause".into(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("Statement type {ast:?} is not supported").into(),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
183
crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs
Normal file
183
crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs
Normal file
@ -0,0 +1,183 @@
|
||||
use polars::error::PolarsError;
|
||||
use polars::prelude::{col, lit, DataType, Expr, LiteralValue, PolarsResult as Result, TimeUnit};
|
||||
|
||||
use sqlparser::ast::{
|
||||
BinaryOperator as SQLBinaryOperator, DataType as SQLDataType, Expr as SqlExpr,
|
||||
Function as SQLFunction, Value as SqlValue, WindowSpec,
|
||||
};
|
||||
|
||||
fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
|
||||
Ok(match data_type {
|
||||
SQLDataType::Char(_)
|
||||
| SQLDataType::Varchar(_)
|
||||
| SQLDataType::Uuid
|
||||
| SQLDataType::Clob(_)
|
||||
| SQLDataType::Text
|
||||
| SQLDataType::String => DataType::Utf8,
|
||||
SQLDataType::Float(_) => DataType::Float32,
|
||||
SQLDataType::Real => DataType::Float32,
|
||||
SQLDataType::Double => DataType::Float64,
|
||||
SQLDataType::TinyInt(_) => DataType::Int8,
|
||||
SQLDataType::UnsignedTinyInt(_) => DataType::UInt8,
|
||||
SQLDataType::SmallInt(_) => DataType::Int16,
|
||||
SQLDataType::UnsignedSmallInt(_) => DataType::UInt16,
|
||||
SQLDataType::Int(_) => DataType::Int32,
|
||||
SQLDataType::UnsignedInt(_) => DataType::UInt32,
|
||||
SQLDataType::BigInt(_) => DataType::Int64,
|
||||
SQLDataType::UnsignedBigInt(_) => DataType::UInt64,
|
||||
|
||||
SQLDataType::Boolean => DataType::Boolean,
|
||||
SQLDataType::Date => DataType::Date,
|
||||
SQLDataType::Time(_, _) => DataType::Time,
|
||||
SQLDataType::Timestamp(_, _) => DataType::Datetime(TimeUnit::Milliseconds, None),
|
||||
SQLDataType::Interval => DataType::Duration(TimeUnit::Milliseconds),
|
||||
SQLDataType::Array(inner_type) => match inner_type {
|
||||
Some(inner_type) => DataType::List(Box::new(map_sql_polars_datatype(inner_type)?)),
|
||||
None => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
"SQL Datatype Array(None) was not supported in polars-sql yet!".into(),
|
||||
))
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("SQL Datatype {data_type:?} was not supported in polars-sql yet!").into(),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_(expr: Expr, data_type: &SQLDataType) -> Result<Expr> {
|
||||
let polars_type = map_sql_polars_datatype(data_type)?;
|
||||
Ok(expr.cast(polars_type))
|
||||
}
|
||||
|
||||
fn binary_op_(left: Expr, right: Expr, op: &SQLBinaryOperator) -> Result<Expr> {
|
||||
Ok(match op {
|
||||
SQLBinaryOperator::Plus => left + right,
|
||||
SQLBinaryOperator::Minus => left - right,
|
||||
SQLBinaryOperator::Multiply => left * right,
|
||||
SQLBinaryOperator::Divide => left / right,
|
||||
SQLBinaryOperator::Modulo => left % right,
|
||||
SQLBinaryOperator::StringConcat => left.cast(DataType::Utf8) + right.cast(DataType::Utf8),
|
||||
SQLBinaryOperator::Gt => left.gt(right),
|
||||
SQLBinaryOperator::Lt => left.lt(right),
|
||||
SQLBinaryOperator::GtEq => left.gt_eq(right),
|
||||
SQLBinaryOperator::LtEq => left.lt_eq(right),
|
||||
SQLBinaryOperator::Eq => left.eq(right),
|
||||
SQLBinaryOperator::NotEq => left.eq(right).not(),
|
||||
SQLBinaryOperator::And => left.and(right),
|
||||
SQLBinaryOperator::Or => left.or(right),
|
||||
SQLBinaryOperator::Xor => left.xor(right),
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("SQL Operator {op:?} was not supported in polars-sql yet!").into(),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn literal_expr(value: &SqlValue) -> Result<Expr> {
|
||||
Ok(match value {
|
||||
SqlValue::Number(s, _) => {
|
||||
// Check for existence of decimal separator dot
|
||||
if s.contains('.') {
|
||||
s.parse::<f64>().map(lit).map_err(|_| {
|
||||
PolarsError::ComputeError(format!("Can't parse literal {s:?}").into())
|
||||
})
|
||||
} else {
|
||||
s.parse::<i64>().map(lit).map_err(|_| {
|
||||
PolarsError::ComputeError(format!("Can't parse literal {s:?}").into())
|
||||
})
|
||||
}?
|
||||
}
|
||||
SqlValue::SingleQuotedString(s) => lit(s.clone()),
|
||||
SqlValue::NationalStringLiteral(s) => lit(s.clone()),
|
||||
SqlValue::HexStringLiteral(s) => lit(s.clone()),
|
||||
SqlValue::DoubleQuotedString(s) => lit(s.clone()),
|
||||
SqlValue::Boolean(b) => lit(*b),
|
||||
SqlValue::Null => Expr::Literal(LiteralValue::Null),
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("Parsing SQL Value {value:?} was not supported in polars-sql yet!").into(),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse_sql_expr(expr: &SqlExpr) -> Result<Expr> {
|
||||
Ok(match expr {
|
||||
SqlExpr::Identifier(e) => col(&e.value),
|
||||
SqlExpr::BinaryOp { left, op, right } => {
|
||||
let left = parse_sql_expr(left)?;
|
||||
let right = parse_sql_expr(right)?;
|
||||
binary_op_(left, right, op)?
|
||||
}
|
||||
SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?,
|
||||
SqlExpr::Cast { expr, data_type } => cast_(parse_sql_expr(expr)?, data_type)?,
|
||||
SqlExpr::Nested(expr) => parse_sql_expr(expr)?,
|
||||
SqlExpr::Value(value) => literal_expr(value)?,
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("Expression: {expr:?} was not supported in polars-sql yet!").into(),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn apply_window_spec(expr: Expr, window_spec: &Option<WindowSpec>) -> Result<Expr> {
|
||||
Ok(match &window_spec {
|
||||
Some(window_spec) => {
|
||||
// Process for simple window specification, partition by first
|
||||
let partition_by = window_spec
|
||||
.partition_by
|
||||
.iter()
|
||||
.map(parse_sql_expr)
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
expr.over(partition_by)
|
||||
// Order by and Row range may not be supported at the moment
|
||||
}
|
||||
None => expr,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
|
||||
use sqlparser::ast::{FunctionArg, FunctionArgExpr};
|
||||
// Function name mostly do not have name space, so it mostly take the first args
|
||||
let function_name = sql_function.name.0[0].value.to_lowercase();
|
||||
let args = sql_function
|
||||
.args
|
||||
.iter()
|
||||
.map(|arg| match arg {
|
||||
FunctionArg::Named { arg, .. } => arg,
|
||||
FunctionArg::Unnamed(arg) => arg,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(
|
||||
match (
|
||||
function_name.as_str(),
|
||||
args.as_slice(),
|
||||
sql_function.distinct,
|
||||
) {
|
||||
("sum", [FunctionArgExpr::Expr(expr)], false) => {
|
||||
apply_window_spec(parse_sql_expr(expr)?, &sql_function.over)?.sum()
|
||||
}
|
||||
("count", [FunctionArgExpr::Expr(expr)], false) => {
|
||||
apply_window_spec(parse_sql_expr(expr)?, &sql_function.over)?.count()
|
||||
}
|
||||
("count", [FunctionArgExpr::Expr(expr)], true) => {
|
||||
apply_window_spec(parse_sql_expr(expr)?, &sql_function.over)?.n_unique()
|
||||
}
|
||||
// Special case for wildcard args to count function.
|
||||
("count", [FunctionArgExpr::Wildcard], false) => lit(1i32).count(),
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!(
|
||||
"Function {function_name:?} with args {args:?} was not supported in polars-sql yet!"
|
||||
)
|
||||
.into(),
|
||||
))
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
271
crates/nu-cmd-dataframe/src/dataframe/eager/summary.rs
Normal file
271
crates/nu-cmd-dataframe/src/dataframe/eager/summary.rs
Normal file
@ -0,0 +1,271 @@
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::{
|
||||
chunked_array::ChunkedArray,
|
||||
prelude::{
|
||||
AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray,
|
||||
QuantileInterpolOptions, Series, Utf8Type,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Summary;
|
||||
|
||||
impl Command for Summary {
|
||||
fn name(&self) -> &str {
|
||||
"dfr summary"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"For a dataframe, produces descriptive statistics (summary statistics) for its numeric columns."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.named(
|
||||
"quantiles",
|
||||
SyntaxShape::Table,
|
||||
"provide optional quantiles",
|
||||
Some('q'),
|
||||
)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "list dataframe descriptives",
|
||||
example: "[[a b]; [1 1] [1 1]] | dfr into-df | dfr summary",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"descriptor".to_string(),
|
||||
vec![
|
||||
Value::test_string("count"),
|
||||
Value::test_string("sum"),
|
||||
Value::test_string("mean"),
|
||||
Value::test_string("median"),
|
||||
Value::test_string("std"),
|
||||
Value::test_string("min"),
|
||||
Value::test_string("25%"),
|
||||
Value::test_string("50%"),
|
||||
Value::test_string("75%"),
|
||||
Value::test_string("max"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"a (i64)".to_string(),
|
||||
vec![
|
||||
Value::test_float(2.0),
|
||||
Value::test_float(2.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(0.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b (i64)".to_string(),
|
||||
vec![
|
||||
Value::test_float(2.0),
|
||||
Value::test_float(2.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(0.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let quantiles: Option<Vec<Value>> = call.get_flag(engine_state, stack, "quantiles")?;
|
||||
let quantiles = quantiles.map(|values| {
|
||||
values
|
||||
.iter()
|
||||
.map(|value| match value {
|
||||
Value::Float { val, span } => {
|
||||
if (&0.0..=&1.0).contains(&val) {
|
||||
Ok(*val)
|
||||
} else {
|
||||
Err(ShellError::GenericError(
|
||||
"Incorrect value for quantile".to_string(),
|
||||
"value should be between 0 and 1".to_string(),
|
||||
Some(*span),
|
||||
None,
|
||||
Vec::new(),
|
||||
))
|
||||
}
|
||||
}
|
||||
_ => match value.span() {
|
||||
Ok(span) => Err(ShellError::GenericError(
|
||||
"Incorrect value for quantile".to_string(),
|
||||
"value should be a float".to_string(),
|
||||
Some(span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)),
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
})
|
||||
.collect::<Result<Vec<f64>, ShellError>>()
|
||||
});
|
||||
|
||||
let quantiles = match quantiles {
|
||||
Some(quantiles) => quantiles?,
|
||||
None => vec![0.25, 0.50, 0.75],
|
||||
};
|
||||
|
||||
let mut quantiles_labels = quantiles
|
||||
.iter()
|
||||
.map(|q| Some(format!("{}%", q * 100.0)))
|
||||
.collect::<Vec<Option<String>>>();
|
||||
let mut labels = vec![
|
||||
Some("count".to_string()),
|
||||
Some("sum".to_string()),
|
||||
Some("mean".to_string()),
|
||||
Some("median".to_string()),
|
||||
Some("std".to_string()),
|
||||
Some("min".to_string()),
|
||||
];
|
||||
labels.append(&mut quantiles_labels);
|
||||
labels.push(Some("max".to_string()));
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let names = ChunkedArray::<Utf8Type>::from_slice_options("descriptor", &labels).into_series();
|
||||
|
||||
let head = std::iter::once(names);
|
||||
|
||||
let tail = df
|
||||
.as_ref()
|
||||
.get_columns()
|
||||
.iter()
|
||||
.filter(|col| col.dtype() != &DataType::Object("object"))
|
||||
.map(|col| {
|
||||
let count = col.len() as f64;
|
||||
|
||||
let sum = col
|
||||
.sum_as_series()
|
||||
.cast(&DataType::Float64)
|
||||
.ok()
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let mean = match col.mean_as_series().get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let median = match col.median_as_series().get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let std = match col.std_as_series(0).get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let min = col
|
||||
.min_as_series()
|
||||
.cast(&DataType::Float64)
|
||||
.ok()
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let mut quantiles = quantiles
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|q| {
|
||||
col.quantile_as_series(q, QuantileInterpolOptions::default())
|
||||
.ok()
|
||||
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<Option<f64>>>();
|
||||
|
||||
let max = col
|
||||
.max_as_series()
|
||||
.cast(&DataType::Float64)
|
||||
.ok()
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let mut descriptors = vec![Some(count), sum, mean, median, std, min];
|
||||
descriptors.append(&mut quantiles);
|
||||
descriptors.push(max);
|
||||
|
||||
let name = format!("{} ({})", col.name(), col.dtype());
|
||||
ChunkedArray::<Float64Type>::from_slice_options(&name, &descriptors).into_series()
|
||||
});
|
||||
|
||||
let res = head.chain(tail).collect::<Vec<Series>>();
|
||||
|
||||
DataFrame::new(res)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Dataframe Error".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(Summary {})])
|
||||
}
|
||||
}
|
153
crates/nu-cmd-dataframe/src/dataframe/eager/take.rs
Normal file
153
crates/nu-cmd-dataframe/src/dataframe/eager/take.rs
Normal file
@ -0,0 +1,153 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::DataType;
|
||||
|
||||
use crate::dataframe::values::Column;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TakeDF;
|
||||
|
||||
impl Command for TakeDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr take"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe using the given indices."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"indices",
|
||||
SyntaxShape::Any,
|
||||
"list of indices used to take data",
|
||||
)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Takes selected rows from dataframe",
|
||||
example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | dfr into-df);
|
||||
let indices = ([0 2] | dfr into-df);
|
||||
$df | dfr take $indices"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes selected rows from series",
|
||||
example: r#"let series = ([4 1 5 2 4 3] | dfr into-df);
|
||||
let indices = ([0 2] | dfr into-df);
|
||||
$series | dfr take $indices"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(5)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let index_value: Value = call.req(engine_state, stack, 0)?;
|
||||
let index_span = index_value.span()?;
|
||||
let index = NuDataFrame::try_from_value(index_value)?.as_series(index_span)?;
|
||||
|
||||
let casted = match index.dtype() {
|
||||
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => {
|
||||
index.cast(&DataType::UInt32).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error casting index list".into(),
|
||||
e.to_string(),
|
||||
Some(index_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
}
|
||||
_ => Err(ShellError::GenericError(
|
||||
"Incorrect type".into(),
|
||||
"Series with incorrect type".into(),
|
||||
Some(call.head),
|
||||
Some("Consider using a Series with type int type".into()),
|
||||
Vec::new(),
|
||||
)),
|
||||
}?;
|
||||
|
||||
let indices = casted.u32().map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error casting index list".into(),
|
||||
e.to_string(),
|
||||
Some(index_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
NuDataFrame::try_from_pipeline(input, call.head).and_then(|df| {
|
||||
df.as_ref()
|
||||
.take(indices)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error taking values".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(TakeDF {})])
|
||||
}
|
||||
}
|
94
crates/nu-cmd-dataframe/src/dataframe/eager/to_arrow.rs
Normal file
94
crates/nu-cmd-dataframe/src/dataframe/eager/to_arrow.rs
Normal file
@ -0,0 +1,94 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::{IpcWriter, SerWriter};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToArrow;
|
||||
|
||||
impl Command for ToArrow {
|
||||
fn name(&self) -> &str {
|
||||
"dfr to-arrow"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Saves dataframe to arrow file."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Saves dataframe to arrow file",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-arrow test.arrow",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error with file name".into(),
|
||||
e.to_string(),
|
||||
Some(file_name.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
IpcWriter::new(&mut file).finish(df.as_mut()).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error saving file".into(),
|
||||
e.to_string(),
|
||||
Some(file_name.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let file_value = Value::String {
|
||||
val: format!("saved {:?}", &file_name.item),
|
||||
span: file_name.span,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
Value::List {
|
||||
vals: vec![file_value],
|
||||
span: call.head,
|
||||
},
|
||||
None,
|
||||
))
|
||||
}
|
140
crates/nu-cmd-dataframe/src/dataframe/eager/to_csv.rs
Normal file
140
crates/nu-cmd-dataframe/src/dataframe/eager/to_csv.rs
Normal file
@ -0,0 +1,140 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::{CsvWriter, SerWriter};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToCSV;
|
||||
|
||||
impl Command for ToCSV {
|
||||
fn name(&self) -> &str {
|
||||
"dfr to-csv"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Saves dataframe to CSV file."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
|
||||
.named(
|
||||
"delimiter",
|
||||
SyntaxShape::String,
|
||||
"file delimiter character",
|
||||
Some('d'),
|
||||
)
|
||||
.switch("no-header", "Indicates if file doesn't have header", None)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Saves dataframe to CSV file",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Saves dataframe to CSV file using other delimiter",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv -d '|'",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
let delimiter: Option<Spanned<String>> = call.get_flag(engine_state, stack, "delimiter")?;
|
||||
let no_header: bool = call.has_flag("no-header");
|
||||
|
||||
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error with file name".into(),
|
||||
e.to_string(),
|
||||
Some(file_name.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let writer = CsvWriter::new(&mut file);
|
||||
|
||||
let writer = if no_header {
|
||||
writer.has_header(false)
|
||||
} else {
|
||||
writer.has_header(true)
|
||||
};
|
||||
|
||||
let mut writer = match delimiter {
|
||||
None => writer,
|
||||
Some(d) => {
|
||||
if d.item.len() != 1 {
|
||||
return Err(ShellError::GenericError(
|
||||
"Incorrect delimiter".into(),
|
||||
"Delimiter has to be one char".into(),
|
||||
Some(d.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
));
|
||||
} else {
|
||||
let delimiter = match d.item.chars().next() {
|
||||
Some(d) => d as u8,
|
||||
None => unreachable!(),
|
||||
};
|
||||
|
||||
writer.with_delimiter(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
writer.finish(df.as_mut()).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error writing to file".into(),
|
||||
e.to_string(),
|
||||
Some(file_name.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let file_value = Value::String {
|
||||
val: format!("saved {:?}", &file_name.item),
|
||||
span: file_name.span,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
Value::List {
|
||||
vals: vec![file_value],
|
||||
span: call.head,
|
||||
},
|
||||
None,
|
||||
))
|
||||
}
|
130
crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs
Normal file
130
crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs
Normal file
@ -0,0 +1,130 @@
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToDataFrame;
|
||||
|
||||
impl Command for ToDataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dfr into-df"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a list, table or record into a dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_type(Type::Any)
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Takes a dictionary and creates a dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | dfr into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of tables and creates a dataframe",
|
||||
example: "[[1 2 a] [3 4 b] [5 6 c]] | dfr into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
|
||||
),
|
||||
Column::new(
|
||||
"1".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"2".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list and creates a dataframe",
|
||||
example: "[a b c] | dfr into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of booleans and creates a dataframe",
|
||||
example: "[true true false] | dfr into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
NuDataFrame::try_from_iter(input.into_iter())
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(ToDataFrame {})])
|
||||
}
|
||||
}
|
124
crates/nu-cmd-dataframe/src/dataframe/eager/to_nu.rs
Normal file
124
crates/nu-cmd-dataframe/src/dataframe/eager/to_nu.rs
Normal file
@ -0,0 +1,124 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToNu;
|
||||
|
||||
impl Command for ToNu {
|
||||
fn name(&self) -> &str {
|
||||
"dfr into-nu"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a section of the dataframe into nushell Table."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"rows",
|
||||
SyntaxShape::Number,
|
||||
"number of rows to be shown",
|
||||
Some('n'),
|
||||
)
|
||||
.switch("tail", "shows tail rows", Some('t'))
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
let cols = vec!["index".into(), "a".into(), "b".into()];
|
||||
let rec_1 = Value::Record {
|
||||
cols: cols.clone(),
|
||||
vals: vec![Value::test_int(0), Value::test_int(1), Value::test_int(2)],
|
||||
span: Span::test_data(),
|
||||
};
|
||||
let rec_2 = Value::Record {
|
||||
cols: cols.clone(),
|
||||
vals: vec![Value::test_int(1), Value::test_int(3), Value::test_int(4)],
|
||||
span: Span::test_data(),
|
||||
};
|
||||
let rec_3 = Value::Record {
|
||||
cols,
|
||||
vals: vec![Value::test_int(2), Value::test_int(3), Value::test_int(4)],
|
||||
span: Span::test_data(),
|
||||
};
|
||||
|
||||
vec![
|
||||
Example {
|
||||
description: "Shows head rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr into-nu",
|
||||
result: Some(Value::List {
|
||||
vals: vec![rec_1, rec_2],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
Example {
|
||||
description: "Shows tail rows from dataframe",
|
||||
example: "[[a b]; [1 2] [5 6] [3 4]] | dfr into-df | dfr into-nu -t -n 1",
|
||||
result: Some(Value::List {
|
||||
vals: vec![rec_3],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.get_flag(engine_state, stack, "rows")?;
|
||||
let tail: bool = call.has_flag("tail");
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let values = if tail {
|
||||
df.tail(rows, call.head)?
|
||||
} else {
|
||||
// if rows is specified, return those rows, otherwise return everything
|
||||
if rows.is_some() {
|
||||
df.head(rows, call.head)?
|
||||
} else {
|
||||
df.head(Some(df.height()), call.head)?
|
||||
}
|
||||
};
|
||||
|
||||
let value = Value::List {
|
||||
vals: values,
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(ToNu {})])
|
||||
}
|
||||
}
|
94
crates/nu-cmd-dataframe/src/dataframe/eager/to_parquet.rs
Normal file
94
crates/nu-cmd-dataframe/src/dataframe/eager/to_parquet.rs
Normal file
@ -0,0 +1,94 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::ParquetWriter;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToParquet;
|
||||
|
||||
impl Command for ToParquet {
|
||||
fn name(&self) -> &str {
|
||||
"dfr to-parquet"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Saves dataframe to parquet file."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Saves dataframe to parquet file",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-parquet test.parquet",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error with file name".into(),
|
||||
e.to_string(),
|
||||
Some(file_name.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
ParquetWriter::new(file).finish(df.as_mut()).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error saving file".into(),
|
||||
e.to_string(),
|
||||
Some(file_name.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let file_value = Value::String {
|
||||
val: format!("saved {:?}", &file_name.item),
|
||||
span: file_name.span,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
Value::List {
|
||||
vals: vec![file_value],
|
||||
span: call.head,
|
||||
},
|
||||
None,
|
||||
))
|
||||
}
|
209
crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs
Normal file
209
crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs
Normal file
@ -0,0 +1,209 @@
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct WithColumn;
|
||||
|
||||
impl Command for WithColumn {
|
||||
fn name(&self) -> &str {
|
||||
"dfr with-column"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Adds a series to the dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named("name", SyntaxShape::String, "new column name", Some('n'))
|
||||
.rest(
|
||||
"series or expressions",
|
||||
SyntaxShape::Any,
|
||||
"series to be added or expressions used to define the new columns",
|
||||
)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Adds a series to the dataframe",
|
||||
example: r#"[[a b]; [1 2] [3 4]]
|
||||
| dfr into-df
|
||||
| dfr with-column ([5 6] | dfr into-df) --name c"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(5), Value::test_int(6)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Adds a series to the dataframe",
|
||||
example: r#"[[a b]; [1 2] [3 4]]
|
||||
| dfr into-lazy
|
||||
| dfr with-column [
|
||||
((dfr col a) * 2 | dfr as "c")
|
||||
((dfr col a) * 3 | dfr as "d")
|
||||
]
|
||||
| dfr collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"d".to_string(),
|
||||
vec![Value::test_int(3), Value::test_int(9)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuLazyFrame::can_downcast(&value) {
|
||||
let df = NuLazyFrame::try_from_value(value)?;
|
||||
command_lazy(engine_state, stack, call, df)
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command_eager(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert {
|
||||
to_type: "lazy or eager dataframe".into(),
|
||||
from_type: value.get_type().to_string(),
|
||||
span: value.span()?,
|
||||
help: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
mut df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let new_column: Value = call.req(engine_state, stack, 0)?;
|
||||
let column_span = new_column.span()?;
|
||||
|
||||
if NuExpression::can_downcast(&new_column) {
|
||||
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let value = Value::List {
|
||||
vals,
|
||||
span: call.head,
|
||||
};
|
||||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
let lazy = NuLazyFrame::new(true, df.lazy().with_columns(&expressions));
|
||||
|
||||
let df = lazy.collect(call.head)?;
|
||||
|
||||
Ok(PipelineData::Value(df.into_value(call.head), None))
|
||||
} else {
|
||||
let mut other = NuDataFrame::try_from_value(new_column)?.as_series(column_span)?;
|
||||
|
||||
let name = match call.get_flag::<String>(engine_state, stack, "name")? {
|
||||
Some(name) => name,
|
||||
None => other.name().to_string(),
|
||||
};
|
||||
|
||||
let series = other.rename(&name).clone();
|
||||
|
||||
df.as_mut()
|
||||
.with_column(series)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error adding column to dataframe".into(),
|
||||
e.to_string(),
|
||||
Some(column_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| {
|
||||
PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(df.clone(), call.head),
|
||||
None,
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let value = Value::List {
|
||||
vals,
|
||||
span: call.head,
|
||||
};
|
||||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
|
||||
let lazy: NuLazyFrame = lazy.into_polars().with_columns(&expressions).into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuLazyFrame::into_value(lazy, call.head)?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
use crate::dataframe::expressions::ExprAlias;
|
||||
use crate::dataframe::expressions::ExprCol;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![
|
||||
Box::new(WithColumn {}),
|
||||
Box::new(ExprAlias {}),
|
||||
Box::new(ExprCol {}),
|
||||
])
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user