The ability to specify a schema when using dfr open and dfr into-df (#11634)

# Description

There are times where explicitly specifying a schema for a dataframe is
needed such as:
- Opening CSV and JSON lines files and needing provide more information
to polars to keep it from failing or in a desire to override default
type conversion
- When converting a nushell value to a dataframe and wanting to override
the default conversion behaviors.

This pull requests provides:
- A flag to allow specifying a schema when using dfr into-df
- A flag to allow specifying a schema when using dfr open that works for
CSV and JSON types
- A new command `dfr schema` which displays schema information and will
allow display support schema dtypes

Schema is specified creating a record that has the key value and the
dtype. Examples usages:

```
{a:1, b:{a:2}} | dfr into-df -s {a: u8, b: {a: i32}} | dfr schema
{a: 1, b: {a: [1 2 3]}, c: [a b c]} | dfr into-df -s {a: u8, b: {a: list<u64>}, c: list<str>} | dfr schema
 dfr open -s {pid: i32, ppid: i32, name: str, status: str, cpu: f64, mem: i64, virtual: i64} /tmp/ps.jsonl  | dfr schema
```

Supported dtypes:
null                                                   
bool                                                   
u8                                                     
u16                                                    
u32                                                    
u64                                                    
i8                                                     
i16                                                    
i32                                                    
i64                                                    
f32                                                    
f64                                                    
str                                                    
binary                                                 
date                                                   
datetime[time_unit: (ms, us, ns) timezone (optional)]  
duration[time_unit: (ms, us, ns)]                      
time                                                   
object                                                 
unknown                                                
list[dtype]


structs are also supported but are specified via another record:
{a: u8, b: {d: str}}

Another feature with the dfr schema command is that it returns the data
back in a format that can be passed to provide a valid schema that can
be passed in as schema argument:

<img width="638" alt="Screenshot 2024-01-29 at 10 23 58"
src="https://github.com/nushell/nushell/assets/56345/b49c3bff-5cda-4c86-975a-dfd91d991373">

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright 2024-01-29 11:26:04 -08:00 committed by GitHub
parent d03ad6a257
commit f879c00f9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
90 changed files with 2408 additions and 1277 deletions

View File

@ -37,7 +37,8 @@ impl Command for AppendDF {
example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df); example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df);
$a | dfr append $a"#, $a | dfr append $a"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
@ -54,7 +55,9 @@ impl Command for AppendDF {
"b_x".to_string(), "b_x".to_string(),
vec![Value::test_int(2), Value::test_int(4)], vec![Value::test_int(2), Value::test_int(4)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -64,7 +67,8 @@ impl Command for AppendDF {
example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df); example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df);
$a | dfr append $a --col"#, $a | dfr append $a --col"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![ vec![
@ -83,7 +87,9 @@ impl Command for AppendDF {
Value::test_int(4), Value::test_int(4),
], ],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for DropDF {
description: "drop column a", description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr drop a", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr drop a",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"b".to_string(), "b".to_string(),
vec![Value::test_int(2), Value::test_int(4)], vec![Value::test_int(2), Value::test_int(4)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -46,7 +46,8 @@ impl Command for DropDuplicates {
description: "drop duplicates", description: "drop duplicates",
example: "[[a b]; [1 2] [3 4] [1 2]] | dfr into-df | dfr drop-duplicates", example: "[[a b]; [1 2] [3 4] [1 2]] | dfr into-df | dfr drop-duplicates",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(3), Value::test_int(1)], vec![Value::test_int(3), Value::test_int(1)],
@ -55,7 +56,9 @@ impl Command for DropDuplicates {
"b".to_string(), "b".to_string(),
vec![Value::test_int(4), Value::test_int(2)], vec![Value::test_int(4), Value::test_int(2)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -43,7 +43,8 @@ impl Command for DropNulls {
let a = ($df | dfr with-column $res --name res); let a = ($df | dfr with-column $res --name res);
$a | dfr drop-nulls"#, $a | dfr drop-nulls"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(1)], vec![Value::test_int(1), Value::test_int(1)],
@ -56,7 +57,9 @@ impl Command for DropNulls {
"res".to_string(), "res".to_string(),
vec![Value::test_int(1), Value::test_int(1)], vec![Value::test_int(1), Value::test_int(1)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -66,7 +69,8 @@ impl Command for DropNulls {
example: r#"let s = ([1 2 0 0 3 4] | dfr into-df); example: r#"let s = ([1 2 0 0 3 4] | dfr into-df);
($s / $s) | dfr drop-nulls"#, ($s / $s) | dfr drop-nulls"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"div_0_0".to_string(), "div_0_0".to_string(),
vec![ vec![
Value::test_int(1), Value::test_int(1),
@ -74,7 +78,9 @@ impl Command for DropNulls {
Value::test_int(1), Value::test_int(1),
Value::test_int(1), Value::test_int(1),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -31,7 +31,8 @@ impl Command for DataTypes {
description: "Dataframe dtypes", description: "Dataframe dtypes",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr dtypes", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr dtypes",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"column".to_string(), "column".to_string(),
vec![Value::test_string("a"), Value::test_string("b")], vec![Value::test_string("a"), Value::test_string("b")],
@ -40,7 +41,9 @@ impl Command for DataTypes {
"dtype".to_string(), "dtype".to_string(),
vec![Value::test_string("i64"), Value::test_string("i64")], vec![Value::test_string("i64"), Value::test_string("i64")],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -79,6 +82,7 @@ fn command(
.dtype(); .dtype();
let dtype_str = dtype.to_string(); let dtype_str = dtype.to_string();
dtypes.push(Value::string(dtype_str, call.head)); dtypes.push(Value::string(dtype_str, call.head));
Value::string(*v, call.head) Value::string(*v, call.head)
@ -88,7 +92,7 @@ fn command(
let names_col = Column::new("column".to_string(), names); let names_col = Column::new("column".to_string(), names);
let dtypes_col = Column::new("dtype".to_string(), dtypes); let dtypes_col = Column::new("dtype".to_string(), dtypes);
NuDataFrame::try_from_columns(vec![names_col, dtypes_col]) NuDataFrame::try_from_columns(vec![names_col, dtypes_col], None)
.map(|df| PipelineData::Value(df.into_value(call.head), None)) .map(|df| PipelineData::Value(df.into_value(call.head), None))
} }

View File

@ -43,10 +43,13 @@ impl Command for FilterWith {
example: r#"let mask = ([true false] | dfr into-df); example: r#"let mask = ([true false] | dfr into-df);
[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with $mask"#, [[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with $mask"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]), Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]), Column::new("b".to_string(), vec![Value::test_int(2)]),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -55,10 +58,13 @@ impl Command for FilterWith {
description: "Filter dataframe using an expression", description: "Filter dataframe using an expression",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with ((dfr col a) > 1)", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with ((dfr col a) > 1)",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(3)]), Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]), Column::new("b".to_string(), vec![Value::test_int(4)]),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -44,10 +44,13 @@ impl Command for FirstDF {
description: "Return the first row of a dataframe", description: "Return the first row of a dataframe",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]), Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]), Column::new("b".to_string(), vec![Value::test_int(2)]),
]) ],
None,
)
.expect("should not fail") .expect("should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -56,7 +59,8 @@ impl Command for FirstDF {
description: "Return the first two rows of a dataframe", description: "Return the first two rows of a dataframe",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first 2", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first 2",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
@ -65,7 +69,9 @@ impl Command for FirstDF {
"b".to_string(), "b".to_string(),
vec![Value::test_int(2), Value::test_int(4)], vec![Value::test_int(2), Value::test_int(4)],
), ),
]) ],
None,
)
.expect("should not fail") .expect("should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -36,10 +36,13 @@ impl Command for GetDF {
description: "Returns the selected column", description: "Returns the selected column",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr get a", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr get a",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -40,10 +40,13 @@ impl Command for LastDF {
description: "Create new dataframe with last rows", description: "Create new dataframe with last rows",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(3)]), Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]), Column::new("b".to_string(), vec![Value::test_int(4)]),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -106,7 +106,7 @@ impl Command for MeltDF {
Value::test_string("c"), Value::test_string("c"),
], ],
), ),
]) ], None)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -15,6 +15,7 @@ mod open;
mod query_df; mod query_df;
mod rename; mod rename;
mod sample; mod sample;
mod schema;
mod shape; mod shape;
mod slice; mod slice;
mod sql_context; mod sql_context;
@ -49,6 +50,7 @@ pub use melt::MeltDF;
pub use query_df::QueryDf; pub use query_df::QueryDf;
pub use rename::RenameDF; pub use rename::RenameDF;
pub use sample::SampleDF; pub use sample::SampleDF;
pub use schema::SchemaDF;
pub use shape::ShapeDF; pub use shape::ShapeDF;
pub use slice::SliceDF; pub use slice::SliceDF;
pub use sql_context::SQLContext; pub use sql_context::SQLContext;
@ -93,6 +95,7 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
QueryDf, QueryDf,
RenameDF, RenameDF,
SampleDF, SampleDF,
SchemaDF,
ShapeDF, ShapeDF,
SliceDF, SliceDF,
TakeDF, TakeDF,

View File

@ -1,3 +1,5 @@
use crate::dataframe::values::NuSchema;
use super::super::values::{NuDataFrame, NuLazyFrame}; use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
@ -70,6 +72,12 @@ impl Command for OpenDataFrame {
"Columns to be selected from csv file. CSV and Parquet file", "Columns to be selected from csv file. CSV and Parquet file",
None, None,
) )
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s')
)
.input_output_type(Type::Any, Type::Custom("dataframe".into())) .input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
@ -305,10 +313,19 @@ fn from_json(
help: None, help: None,
inner: vec![], inner: vec![],
})?; })?;
let maybe_schema = call
.get_flag(engine_state, stack, "schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let buf_reader = BufReader::new(file); let buf_reader = BufReader::new(file);
let reader = JsonReader::new(buf_reader); let reader = JsonReader::new(buf_reader);
let reader = match maybe_schema {
Some(schema) => reader.with_schema(schema.into()),
None => reader,
};
let df: NuDataFrame = reader let df: NuDataFrame = reader
.finish() .finish()
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {
@ -329,6 +346,10 @@ fn from_jsonl(
call: &Call, call: &Call,
) -> Result<Value, ShellError> { ) -> Result<Value, ShellError> {
let infer_schema: Option<usize> = call.get_flag(engine_state, stack, "infer-schema")?; let infer_schema: Option<usize> = call.get_flag(engine_state, stack, "infer-schema")?;
let maybe_schema = call
.get_flag(engine_state, stack, "schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?; let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let file = File::open(&file.item).map_err(|e| ShellError::GenericError { let file = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(), error: "Error opening file".into(),
@ -343,6 +364,11 @@ fn from_jsonl(
.with_json_format(JsonFormat::JsonLines) .with_json_format(JsonFormat::JsonLines)
.infer_schema_len(infer_schema); .infer_schema_len(infer_schema);
let reader = match maybe_schema {
Some(schema) => reader.with_schema(schema.into()),
None => reader,
};
let df: NuDataFrame = reader let df: NuDataFrame = reader
.finish() .finish()
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {
@ -368,6 +394,11 @@ fn from_csv(
let skip_rows: Option<usize> = call.get_flag(engine_state, stack, "skip-rows")?; let skip_rows: Option<usize> = call.get_flag(engine_state, stack, "skip-rows")?;
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?; let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
let maybe_schema = call
.get_flag(engine_state, stack, "schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
if call.has_flag(engine_state, stack, "lazy")? { if call.has_flag(engine_state, stack, "lazy")? {
let file: String = call.req(engine_state, stack, 0)?; let file: String = call.req(engine_state, stack, 0)?;
let csv_reader = LazyCsvReader::new(file); let csv_reader = LazyCsvReader::new(file);
@ -395,6 +426,11 @@ fn from_csv(
let csv_reader = csv_reader.has_header(!no_header); let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema { let csv_reader = match infer_schema {
None => csv_reader, None => csv_reader,
Some(r) => csv_reader.with_infer_schema_length(Some(r)), Some(r) => csv_reader.with_infer_schema_length(Some(r)),
@ -452,6 +488,11 @@ fn from_csv(
let csv_reader = csv_reader.has_header(!no_header); let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema { let csv_reader = match infer_schema {
None => csv_reader, None => csv_reader,
Some(r) => csv_reader.infer_schema(Some(r)), Some(r) => csv_reader.infer_schema(Some(r)),

View File

@ -44,10 +44,13 @@ impl Command for QueryDf {
description: "Query dataframe using SQL", description: "Query dataframe using SQL",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr query 'select a from df'", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr query 'select a from df'",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -46,7 +46,8 @@ impl Command for RenameDF {
description: "Renames a series", description: "Renames a series",
example: "[5 6 7 8] | dfr into-df | dfr rename '0' new_name", example: "[5 6 7 8] | dfr into-df | dfr rename '0' new_name",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"new_name".to_string(), "new_name".to_string(),
vec![ vec![
Value::test_int(5), Value::test_int(5),
@ -54,7 +55,9 @@ impl Command for RenameDF {
Value::test_int(7), Value::test_int(7),
Value::test_int(8), Value::test_int(8),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -63,7 +66,8 @@ impl Command for RenameDF {
description: "Renames a dataframe column", description: "Renames a dataframe column",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename a a_new", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename a a_new",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a_new".to_string(), "a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
@ -72,7 +76,9 @@ impl Command for RenameDF {
"b".to_string(), "b".to_string(),
vec![Value::test_int(2), Value::test_int(4)], vec![Value::test_int(2), Value::test_int(4)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -81,7 +87,8 @@ impl Command for RenameDF {
description: "Renames two dataframe columns", description: "Renames two dataframe columns",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename [a b] [a_new b_new]", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename [a b] [a_new b_new]",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a_new".to_string(), "a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
@ -90,7 +97,9 @@ impl Command for RenameDF {
"b_new".to_string(), "b_new".to_string(),
vec![Value::test_int(2), Value::test_int(4)], vec![Value::test_int(2), Value::test_int(4)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -0,0 +1,119 @@
use super::super::values::NuDataFrame;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, Record, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct SchemaDF;
impl Command for SchemaDF {
fn name(&self) -> &str {
"dfr schema"
}
fn usage(&self) -> &str {
"Show schema for a dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("datatype-list", "creates a lazy dataframe", Some('l'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Dataframe schema",
example: r#"[[a b]; [1 "foo"] [3 "bar"]] | dfr into-df | dfr schema"#,
result: Some(Value::record(
Record::from_raw_cols_vals(
vec!["a".to_string(), "b".to_string()],
vec![
Value::string("i64", Span::test_data()),
Value::string("str", Span::test_data()),
],
),
Span::test_data(),
)),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
if call.has_flag(engine_state, stack, "datatype-list")? {
Ok(PipelineData::Value(datatype_list(Span::unknown()), None))
} else {
command(engine_state, stack, call, input)
}
}
}
fn command(
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let schema = df.schema();
let value: Value = schema.into();
Ok(PipelineData::Value(value, None))
}
fn datatype_list(span: Span) -> Value {
let types: Vec<Value> = [
("null", ""),
("bool", ""),
("u8", ""),
("u16", ""),
("u32", ""),
("u64", ""),
("i8", ""),
("i16", ""),
("i32", ""),
("i64", ""),
("f32", ""),
("f64", ""),
("str", ""),
("binary", ""),
("date", ""),
("datetime<time_unit: (ms, us, ns) timezone (optional)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns. Timezone wildcard is *. Other Timezone examples: UTC, America/Los_Angeles."),
("duration<time_unit: (ms, us, ns)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns."),
("time", ""),
("object", ""),
("unknown", ""),
("list<dtype>", ""),
]
.iter()
.map(|(dtype, note)| {
Value::record(Record::from_raw_cols_vals(
vec!["dtype".to_string(), "note".to_string()],
vec![Value::string(*dtype, span), Value::string(*note, span)],
),span)
})
.collect();
Value::list(types, span)
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(SchemaDF {})])
}
}

View File

@ -34,10 +34,13 @@ impl Command for ShapeDF {
description: "Shows row and column shape", description: "Shows row and column shape",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr shape", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr shape",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("rows".to_string(), vec![Value::test_int(2)]), Column::new("rows".to_string(), vec![Value::test_int(2)]),
Column::new("columns".to_string(), vec![Value::test_int(2)]), Column::new("columns".to_string(), vec![Value::test_int(2)]),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -70,7 +73,7 @@ fn command(
let rows_col = Column::new("rows".to_string(), vec![rows]); let rows_col = Column::new("rows".to_string(), vec![rows]);
let cols_col = Column::new("columns".to_string(), vec![cols]); let cols_col = Column::new("columns".to_string(), vec![cols]);
NuDataFrame::try_from_columns(vec![rows_col, cols_col]) NuDataFrame::try_from_columns(vec![rows_col, cols_col], None)
.map(|df| PipelineData::Value(df.into_value(call.head), None)) .map(|df| PipelineData::Value(df.into_value(call.head), None))
} }

View File

@ -37,10 +37,13 @@ impl Command for SliceDF {
description: "Create new dataframe from a slice of the rows", description: "Create new dataframe from a slice of the rows",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr slice 0 1", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr slice 0 1",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]), Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]), Column::new("b".to_string(), vec![Value::test_int(2)]),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -46,7 +46,8 @@ impl Command for Summary {
description: "list dataframe descriptives", description: "list dataframe descriptives",
example: "[[a b]; [1 1] [1 1]] | dfr into-df | dfr summary", example: "[[a b]; [1 1] [1 1]] | dfr into-df | dfr summary",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"descriptor".to_string(), "descriptor".to_string(),
vec![ vec![
@ -92,7 +93,9 @@ impl Command for Summary {
Value::test_float(1.0), Value::test_float(1.0),
], ],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -44,7 +44,8 @@ impl Command for TakeDF {
let indices = ([0 2] | dfr into-df); let indices = ([0 2] | dfr into-df);
$df | dfr take $indices"#, $df | dfr take $indices"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(4), Value::test_int(4)], vec![Value::test_int(4), Value::test_int(4)],
@ -53,7 +54,9 @@ impl Command for TakeDF {
"b".to_string(), "b".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -64,10 +67,13 @@ impl Command for TakeDF {
let indices = ([0 2] | dfr into-df); let indices = ([0 2] | dfr into-df);
$series | dfr take $indices"#, $series | dfr take $indices"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(4), Value::test_int(5)], vec![Value::test_int(4), Value::test_int(5)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -1,10 +1,14 @@
use crate::dataframe::values::NuSchema;
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
}; };
use polars::prelude::*;
#[derive(Clone)] #[derive(Clone)]
pub struct ToDataFrame; pub struct ToDataFrame;
@ -20,6 +24,12 @@ impl Command for ToDataFrame {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s'),
)
.input_output_type(Type::Any, Type::Custom("dataframe".into())) .input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
@ -30,7 +40,8 @@ impl Command for ToDataFrame {
description: "Takes a dictionary and creates a dataframe", description: "Takes a dictionary and creates a dataframe",
example: "[[a b];[1 2] [3 4]] | dfr into-df", example: "[[a b];[1 2] [3 4]] | dfr into-df",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
@ -39,7 +50,9 @@ impl Command for ToDataFrame {
"b".to_string(), "b".to_string(),
vec![Value::test_int(2), Value::test_int(4)], vec![Value::test_int(2), Value::test_int(4)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -48,7 +61,8 @@ impl Command for ToDataFrame {
description: "Takes a list of tables and creates a dataframe", description: "Takes a list of tables and creates a dataframe",
example: "[[1 2 a] [3 4 b] [5 6 c]] | dfr into-df", example: "[[1 2 a] [3 4 b] [5 6 c]] | dfr into-df",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)], vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
@ -65,7 +79,9 @@ impl Command for ToDataFrame {
Value::test_string("c"), Value::test_string("c"),
], ],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -74,14 +90,17 @@ impl Command for ToDataFrame {
description: "Takes a list and creates a dataframe", description: "Takes a list and creates a dataframe",
example: "[a b c] | dfr into-df", example: "[a b c] | dfr into-df",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_string("a"), Value::test_string("a"),
Value::test_string("b"), Value::test_string("b"),
Value::test_string("c"), Value::test_string("c"),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -90,14 +109,41 @@ impl Command for ToDataFrame {
description: "Takes a list of booleans and creates a dataframe", description: "Takes a list of booleans and creates a dataframe",
example: "[true true false] | dfr into-df", example: "[true true false] | dfr into-df",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(false), Value::test_bool(false),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Convert to a dataframe and provide a schema",
example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| dfr into-df -s {a: u8, b: {a: list<u64>}, c: list<str>}",
result: Some(
NuDataFrame::try_from_series(vec![
Series::new("a", &[1u8]),
{
let dtype = DataType::Struct(vec![Field::new("a", DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("a", &[1u64, 2, 3]))], vec![Field::new("a", DataType::String)]))); 1];
Series::from_any_values_and_dtype("b", &vals, &dtype, false)
.expect("Struct series should not fail")
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))];
Series::from_any_values_and_dtype("c", &vals, &dtype, false)
.expect("List series should not fail")
}
], Span::test_data())
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -107,12 +153,17 @@ impl Command for ToDataFrame {
fn run( fn run(
&self, &self,
_engine_state: &EngineState, engine_state: &EngineState,
_stack: &mut Stack, stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
NuDataFrame::try_from_iter(input.into_iter()) let maybe_schema = call
.get_flag(engine_state, stack, "schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }
} }

View File

@ -42,7 +42,8 @@ impl Command for WithColumn {
| dfr into-df | dfr into-df
| dfr with-column ([5 6] | dfr into-df) --name c"#, | dfr with-column ([5 6] | dfr into-df) --name c"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
@ -55,7 +56,9 @@ impl Command for WithColumn {
"c".to_string(), "c".to_string(),
vec![Value::test_int(5), Value::test_int(6)], vec![Value::test_int(5), Value::test_int(6)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -70,7 +73,8 @@ impl Command for WithColumn {
] ]
| dfr collect"#, | dfr collect"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
@ -87,7 +91,9 @@ impl Command for WithColumn {
"d".to_string(), "d".to_string(),
vec![Value::test_int(3), Value::test_int(9)], vec![Value::test_int(3), Value::test_int(9)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -32,10 +32,13 @@ impl Command for ExprArgWhere {
example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df); example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df);
$df | dfr select (dfr arg-where ((dfr col b) >= 2) | dfr as b_arg)", $df | dfr select (dfr arg-where ((dfr col b) >= 2) | dfr as b_arg)",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"b_arg".to_string(), "b_arg".to_string(),
vec![Value::test_int(1), Value::test_int(2)], vec![Value::test_int(1), Value::test_int(2)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -41,7 +41,8 @@ impl Command for ExprConcatStr {
example: r#"let df = ([[a b c]; [one two 1] [three four 2]] | dfr into-df); example: r#"let df = ([[a b c]; [one two 1] [three four 2]] | dfr into-df);
$df | dfr with-column ((dfr concat-str "-" [(dfr col a) (dfr col b) ((dfr col c) * 2)]) | dfr as concat)"#, $df | dfr with-column ((dfr concat-str "-" [(dfr col a) (dfr col b) ((dfr col c) * 2)]) | dfr as concat)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("three")], vec![Value::test_string("one"), Value::test_string("three")],
@ -61,7 +62,9 @@ impl Command for ExprConcatStr {
Value::test_string("three-four-4"), Value::test_string("three-four-4"),
], ],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -52,10 +52,13 @@ impl Command for ExprDatePart {
description: "Creates an expression to capture the year date part", description: "Creates an expression to capture the year date part",
example: r#"[["2021-12-30T01:02:03.123456789"]] | dfr into-df | dfr as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | dfr with-column [(dfr col datetime | dfr datepart year | dfr as datetime_year )]"#, example: r#"[["2021-12-30T01:02:03.123456789"]] | dfr into-df | dfr as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | dfr with-column [(dfr col datetime | dfr datepart year | dfr as datetime_year )]"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("datetime".to_string(), vec![Value::test_date(dt)]), Column::new("datetime".to_string(), vec![Value::test_date(dt)]),
Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]), Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -407,10 +407,13 @@ lazy_expr_command!(
description: "Max value from columns in a dataframe", description: "Max value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr max", example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr max",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(6)],), Column::new("a".to_string(), vec![Value::test_int(6)],),
Column::new("b".to_string(), vec![Value::test_int(4)],), Column::new("b".to_string(), vec![Value::test_int(4)],),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -422,7 +425,8 @@ lazy_expr_command!(
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr max)"#, | dfr agg (dfr col b | dfr max)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
@ -431,7 +435,9 @@ lazy_expr_command!(
"b".to_string(), "b".to_string(),
vec![Value::test_int(4), Value::test_int(1)], vec![Value::test_int(4), Value::test_int(1)],
), ),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -452,10 +458,13 @@ lazy_expr_command!(
description: "Min value from columns in a dataframe", description: "Min value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr min", example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr min",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)],), Column::new("a".to_string(), vec![Value::test_int(1)],),
Column::new("b".to_string(), vec![Value::test_int(1)],), Column::new("b".to_string(), vec![Value::test_int(1)],),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -467,7 +476,8 @@ lazy_expr_command!(
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr min)"#, | dfr agg (dfr col b | dfr min)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
@ -476,7 +486,9 @@ lazy_expr_command!(
"b".to_string(), "b".to_string(),
vec![Value::test_int(2), Value::test_int(1)], vec![Value::test_int(2), Value::test_int(1)],
), ),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -497,10 +509,13 @@ lazy_expr_command!(
description: "Sums all columns in a dataframe", description: "Sums all columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sum", example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sum",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(11)],), Column::new("a".to_string(), vec![Value::test_int(11)],),
Column::new("b".to_string(), vec![Value::test_int(7)],), Column::new("b".to_string(), vec![Value::test_int(7)],),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -512,7 +527,8 @@ lazy_expr_command!(
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr sum)"#, | dfr agg (dfr col b | dfr sum)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
@ -521,7 +537,9 @@ lazy_expr_command!(
"b".to_string(), "b".to_string(),
vec![Value::test_int(6), Value::test_int(1)], vec![Value::test_int(6), Value::test_int(1)],
), ),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -542,10 +560,13 @@ lazy_expr_command!(
description: "Mean value from columns in a dataframe", description: "Mean value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr mean", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr mean",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],), Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(2.0)],), Column::new("b".to_string(), vec![Value::test_float(2.0)],),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -557,7 +578,8 @@ lazy_expr_command!(
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr mean)"#, | dfr agg (dfr col b | dfr mean)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
@ -566,7 +588,9 @@ lazy_expr_command!(
"b".to_string(), "b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)], vec![Value::test_float(3.0), Value::test_float(1.0)],
), ),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -589,7 +613,8 @@ expr_command!(
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr median)"#, | dfr agg (dfr col b | dfr median)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
@ -598,7 +623,9 @@ expr_command!(
"b".to_string(), "b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)], vec![Value::test_float(3.0), Value::test_float(1.0)],
), ),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -618,10 +645,13 @@ lazy_expr_command!(
description: "Std value from columns in a dataframe", description: "Std value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr std", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr std",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(2.0)],), Column::new("a".to_string(), vec![Value::test_float(2.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],), Column::new("b".to_string(), vec![Value::test_float(0.0)],),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -633,7 +663,8 @@ lazy_expr_command!(
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr std)"#, | dfr agg (dfr col b | dfr std)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
@ -642,7 +673,9 @@ lazy_expr_command!(
"b".to_string(), "b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)], vec![Value::test_float(0.0), Value::test_float(0.0)],
), ),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -665,10 +698,13 @@ lazy_expr_command!(
"Var value from columns in a dataframe or aggregates columns to their var value", "Var value from columns in a dataframe or aggregates columns to their var value",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr var", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr var",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],), Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],), Column::new("b".to_string(), vec![Value::test_float(0.0)],),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -680,7 +716,8 @@ lazy_expr_command!(
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr var)"#, | dfr agg (dfr col b | dfr var)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
@ -689,7 +726,9 @@ lazy_expr_command!(
"b".to_string(), "b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)], vec![Value::test_float(0.0), Value::test_float(0.0)],
), ),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -39,7 +39,8 @@ impl Command for ExprIsIn {
example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df); example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df);
$df | dfr with-column (dfr col a | dfr is-in [one two] | dfr as a_in)"#, $df | dfr with-column (dfr col a | dfr is-in [one two] | dfr as a_in)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![ vec![
@ -60,7 +61,9 @@ impl Command for ExprIsIn {
Value::test_bool(false), Value::test_bool(false),
], ],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -81,7 +84,8 @@ impl Command for ExprIsIn {
let list: Vec<Value> = call.req(engine_state, stack, 0)?; let list: Vec<Value> = call.req(engine_state, stack, 0)?;
let expr = NuExpression::try_from_pipeline(input, call.head)?; let expr = NuExpression::try_from_pipeline(input, call.head)?;
let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)])?; let values =
NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)], None)?;
let list = values.as_series(call.head)?; let list = values.as_series(call.head)?;
if matches!(list.dtype(), DataType::Object(..)) { if matches!(list.dtype(), DataType::Object(..)) {

View File

@ -54,7 +54,8 @@ impl Command for ExprOtherwise {
) )
| dfr collect"#, | dfr collect"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
@ -71,7 +72,9 @@ impl Command for ExprOtherwise {
"d".to_string(), "d".to_string(),
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -41,7 +41,8 @@ impl Command for ExprQuantile {
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr quantile 0.5)"#, | dfr agg (dfr col b | dfr quantile 0.5)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
@ -50,7 +51,9 @@ impl Command for ExprQuantile {
"b".to_string(), "b".to_string(),
vec![Value::test_float(4.0), Value::test_float(1.0)], vec![Value::test_float(4.0), Value::test_float(1.0)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -62,7 +62,8 @@ impl Command for ExprWhen {
) )
| dfr collect"#, | dfr collect"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
@ -79,7 +80,9 @@ impl Command for ExprWhen {
"d".to_string(), "d".to_string(),
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -47,7 +47,8 @@ impl Command for LazyAggregate {
(dfr col b | dfr sum | dfr as "b_sum") (dfr col b | dfr sum | dfr as "b_sum")
]"#, ]"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(2)], vec![Value::test_int(1), Value::test_int(2)],
@ -64,7 +65,9 @@ impl Command for LazyAggregate {
"b_sum".to_string(), "b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)], vec![Value::test_int(6), Value::test_int(10)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -81,7 +84,8 @@ impl Command for LazyAggregate {
] ]
| dfr collect"#, | dfr collect"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(2)], vec![Value::test_int(1), Value::test_int(2)],
@ -98,7 +102,9 @@ impl Command for LazyAggregate {
"b_sum".to_string(), "b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)], vec![Value::test_int(6), Value::test_int(10)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -33,7 +33,8 @@ impl Command for LazyCollect {
description: "drop duplicates", description: "drop duplicates",
example: "[[a b]; [1 2] [3 4]] | dfr into-lazy | dfr collect", example: "[[a b]; [1 2] [3 4]] | dfr into-lazy | dfr collect",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(3)],
@ -42,7 +43,9 @@ impl Command for LazyCollect {
"b".to_string(), "b".to_string(),
vec![Value::test_int(2), Value::test_int(4)], vec![Value::test_int(2), Value::test_int(4)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -69,7 +69,7 @@ impl Command for LazyExplode {
Value::test_string("Skiing"), Value::test_string("Skiing"),
Value::test_string("Football"), Value::test_string("Football"),
]), ]),
]).expect("simple df for test should not fail") ], None).expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
) )
}, },
@ -86,7 +86,7 @@ impl Command for LazyExplode {
Value::test_string("Skiing"), Value::test_string("Skiing"),
Value::test_string("Football"), Value::test_string("Football"),
]), ]),
]).expect("simple df for test should not fail") ], None).expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}, },

View File

@ -38,7 +38,8 @@ impl Command for LazyFetch {
description: "Fetch a rows from the dataframe", description: "Fetch a rows from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr fetch 2", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr fetch 2",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(6), Value::test_int(4)], vec![Value::test_int(6), Value::test_int(4)],
@ -47,7 +48,9 @@ impl Command for LazyFetch {
"b".to_string(), "b".to_string(),
vec![Value::test_int(2), Value::test_int(2)], vec![Value::test_int(2), Value::test_int(2)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -38,7 +38,8 @@ impl Command for LazyFillNA {
description: "Fills the NaN values with 0", description: "Fills the NaN values with 0",
example: "[1 2 NaN 3 NaN] | dfr into-df | dfr fill-nan 0", example: "[1 2 NaN 3 NaN] | dfr into-df | dfr fill-nan 0",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_int(1), Value::test_int(1),
@ -47,7 +48,9 @@ impl Command for LazyFillNA {
Value::test_int(3), Value::test_int(3),
Value::test_int(0), Value::test_int(0),
], ],
)]) )],
None,
)
.expect("Df for test should not fail") .expect("Df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -56,7 +59,8 @@ impl Command for LazyFillNA {
description: "Fills the NaN values of a whole dataframe", description: "Fills the NaN values of a whole dataframe",
example: "[[a b]; [0.2 1] [0.1 NaN]] | dfr into-df | dfr fill-nan 0", example: "[[a b]; [0.2 1] [0.1 NaN]] | dfr into-df | dfr fill-nan 0",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_float(0.2), Value::test_float(0.1)], vec![Value::test_float(0.2), Value::test_float(0.1)],
@ -65,7 +69,9 @@ impl Command for LazyFillNA {
"b".to_string(), "b".to_string(),
vec![Value::test_int(1), Value::test_int(0)], vec![Value::test_int(1), Value::test_int(0)],
), ),
]) ],
None,
)
.expect("Df for test should not fail") .expect("Df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -123,7 +129,7 @@ impl Command for LazyFillNA {
}) })
.collect::<Vec<Column>>(); .collect::<Vec<Column>>();
Ok(PipelineData::Value( Ok(PipelineData::Value(
NuDataFrame::try_from_columns(dataframe)?.into_value(call.head), NuDataFrame::try_from_columns(dataframe, None)?.into_value(call.head),
None, None,
)) ))
} }

View File

@ -37,7 +37,8 @@ impl Command for LazyFillNull {
description: "Fills the null values by 0", description: "Fills the null values by 0",
example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr fill-null 0", example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr fill-null 0",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_int(0), Value::test_int(0),
@ -46,7 +47,9 @@ impl Command for LazyFillNull {
Value::test_int(2), Value::test_int(2),
Value::test_int(2), Value::test_int(2),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -38,7 +38,8 @@ impl Command for LazyFilter {
description: "Filter dataframe using an expression", description: "Filter dataframe using an expression",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr filter ((dfr col a) >= 4)", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr filter ((dfr col a) >= 4)",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(6), Value::test_int(4)], vec![Value::test_int(6), Value::test_int(4)],
@ -47,7 +48,9 @@ impl Command for LazyFilter {
"b".to_string(), "b".to_string(),
vec![Value::test_int(2), Value::test_int(2)], vec![Value::test_int(2), Value::test_int(2)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -71,7 +71,7 @@ Example {
Value::test_string("Skiing"), Value::test_string("Skiing"),
Value::test_string("Football"), Value::test_string("Football"),
]), ]),
]).expect("simple df for test should not fail") ], None).expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
) )
}, },
@ -88,7 +88,7 @@ Example {
Value::test_string("Skiing"), Value::test_string("Skiing"),
Value::test_string("Football"), Value::test_string("Football"),
]), ]),
]).expect("simple df for test should not fail") ], None).expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}, },

View File

@ -46,7 +46,8 @@ impl Command for ToLazyGroupBy {
(dfr col b | dfr sum | dfr as "b_sum") (dfr col b | dfr sum | dfr as "b_sum")
]"#, ]"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(2)], vec![Value::test_int(1), Value::test_int(2)],
@ -63,7 +64,9 @@ impl Command for ToLazyGroupBy {
"b_sum".to_string(), "b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)], vec![Value::test_int(6), Value::test_int(10)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -80,7 +83,8 @@ impl Command for ToLazyGroupBy {
] ]
| dfr collect"#, | dfr collect"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(1), Value::test_int(2)], vec![Value::test_int(1), Value::test_int(2)],
@ -97,7 +101,9 @@ impl Command for ToLazyGroupBy {
"b_sum".to_string(), "b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)], vec![Value::test_int(6), Value::test_int(10)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -53,7 +53,8 @@ impl Command for LazyJoin {
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy); let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy);
$df_a | dfr join $df_b a foo | dfr collect"#, $df_a | dfr join $df_b a foo | dfr collect"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![ vec![
@ -99,7 +100,9 @@ impl Command for LazyJoin {
Value::test_string("let"), Value::test_string("let"),
], ],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -110,7 +113,8 @@ impl Command for LazyJoin {
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy); let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy);
$df_a | dfr join $df_b a foo"#, $df_a | dfr join $df_b a foo"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![ vec![
@ -156,7 +160,9 @@ impl Command for LazyJoin {
Value::test_string("let"), Value::test_string("let"),
], ],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -188,7 +188,8 @@ lazy_command!(
description: "Reverses the dataframe.", description: "Reverses the dataframe.",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr reverse", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr reverse",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),], vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),],
@ -197,7 +198,9 @@ lazy_command!(
"b".to_string(), "b".to_string(),
vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),], vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),],
), ),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -231,10 +234,13 @@ lazy_command!(
description: "Median value from columns in a dataframe", description: "Median value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr median", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr median",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],), Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(2.0)],), Column::new("b".to_string(), vec![Value::test_float(2.0)],),
]) ],
None
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -38,10 +38,13 @@ impl Command for LazyQuantile {
description: "quantile value from columns in a dataframe", description: "quantile value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr quantile 0.5", example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr quantile 0.5",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)]), Column::new("a".to_string(), vec![Value::test_float(4.0)]),
Column::new("b".to_string(), vec![Value::test_float(2.0)]), Column::new("b".to_string(), vec![Value::test_float(2.0)]),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -37,10 +37,13 @@ impl Command for LazySelect {
description: "Select a column from the dataframe", description: "Select a column from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr select a", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr select a",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)], vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -60,7 +60,7 @@ impl Command for LazySortBy {
"b".to_string(), "b".to_string(),
vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)], vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)],
), ),
]) ], None)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -89,7 +89,7 @@ impl Command for LazySortBy {
Value::test_int(2), Value::test_int(2),
], ],
), ),
]) ], None)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -1,9 +1,12 @@
use crate::dataframe::values::NuSchema;
use super::super::values::{NuDataFrame, NuLazyFrame}; use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Type, Value, Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Type, Value,
}; };
#[derive(Clone)] #[derive(Clone)]
@ -20,6 +23,12 @@ impl Command for ToLazyFrame {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s'),
)
.input_output_type(Type::Any, Type::Custom("dataframe".into())) .input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("lazyframe".into())) .category(Category::Custom("lazyframe".into()))
} }
@ -34,12 +43,17 @@ impl Command for ToLazyFrame {
fn run( fn run(
&self, &self,
_engine_state: &EngineState, engine_state: &EngineState,
_stack: &mut Stack, stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_iter(input.into_iter())?; let maybe_schema = call
.get_flag(engine_state, stack, "schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let df = NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)?;
let lazy = NuLazyFrame::from_dataframe(df); let lazy = NuLazyFrame::from_dataframe(df);
let value = Value::custom_value(Box::new(lazy), call.head); let value = Value::custom_value(Box::new(lazy), call.head);

View File

@ -33,10 +33,13 @@ impl Command for AllFalse {
description: "Returns true if all values are false", description: "Returns true if all values are false",
example: "[false false false] | dfr into-df | dfr all-false", example: "[false false false] | dfr into-df | dfr all-false",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"all_false".to_string(), "all_false".to_string(),
vec![Value::test_bool(true)], vec![Value::test_bool(true)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -47,10 +50,13 @@ impl Command for AllFalse {
let res = ($s > 9); let res = ($s > 9);
$res | dfr all-false"#, $res | dfr all-false"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"all_false".to_string(), "all_false".to_string(),
vec![Value::test_bool(false)], vec![Value::test_bool(false)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -88,7 +94,10 @@ fn command(
let value = Value::bool(!bool.any(), call.head); let value = Value::bool(!bool.any(), call.head);
NuDataFrame::try_from_columns(vec![Column::new("all_false".to_string(), vec![value])]) NuDataFrame::try_from_columns(
vec![Column::new("all_false".to_string(), vec![value])],
None,
)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View File

@ -33,10 +33,13 @@ impl Command for AllTrue {
description: "Returns true if all values are true", description: "Returns true if all values are true",
example: "[true true true] | dfr into-df | dfr all-true", example: "[true true true] | dfr into-df | dfr all-true",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"all_true".to_string(), "all_true".to_string(),
vec![Value::test_bool(true)], vec![Value::test_bool(true)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -47,10 +50,13 @@ impl Command for AllTrue {
let res = ($s > 9); let res = ($s > 9);
$res | dfr all-true"#, $res | dfr all-true"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"all_true".to_string(), "all_true".to_string(),
vec![Value::test_bool(false)], vec![Value::test_bool(false)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -88,7 +94,7 @@ fn command(
let value = Value::bool(bool.all(), call.head); let value = Value::bool(bool.all(), call.head);
NuDataFrame::try_from_columns(vec![Column::new("all_true".to_string(), vec![value])]) NuDataFrame::try_from_columns(vec![Column::new("all_true".to_string(), vec![value])], None)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View File

@ -37,10 +37,10 @@ impl Command for ArgMax {
description: "Returns index for max value", description: "Returns index for max value",
example: "[1 3 2] | dfr into-df | dfr arg-max", example: "[1 3 2] | dfr into-df | dfr arg-max",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
"arg_max".to_string(), vec![Column::new("arg_max".to_string(), vec![Value::test_int(1)])],
vec![Value::test_int(1)], None,
)]) )
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -37,10 +37,10 @@ impl Command for ArgMin {
description: "Returns index for min value", description: "Returns index for min value",
example: "[1 3 2] | dfr into-df | dfr arg-min", example: "[1 3 2] | dfr into-df | dfr arg-min",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
"arg_min".to_string(), vec![Column::new("arg_min".to_string(), vec![Value::test_int(0)])],
vec![Value::test_int(0)], None,
)]) )
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -69,7 +69,8 @@ impl Command for Cumulative {
description: "Cumulative sum for a series", description: "Cumulative sum for a series",
example: "[1 2 3 4 5] | dfr into-df | dfr cumulative sum", example: "[1 2 3 4 5] | dfr into-df | dfr cumulative sum",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0_cumulative_sum".to_string(), "0_cumulative_sum".to_string(),
vec![ vec![
Value::test_int(1), Value::test_int(1),
@ -78,7 +79,9 @@ impl Command for Cumulative {
Value::test_int(10), Value::test_int(10),
Value::test_int(15), Value::test_int(15),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -53,7 +53,8 @@ impl Command for AsDateTime {
description: "Converts string to datetime", description: "Converts string to datetime",
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#, example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"datetime".to_string(), "datetime".to_string(),
vec![ vec![
Value::date( Value::date(
@ -73,7 +74,9 @@ impl Command for AsDateTime {
Span::test_data(), Span::test_data(),
), ),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -82,7 +85,8 @@ impl Command for AsDateTime {
description: "Converts string to datetime with high resolutions", description: "Converts string to datetime with high resolutions",
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S.%9f""#, example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S.%9f""#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"datetime".to_string(), "datetime".to_string(),
vec![ vec![
Value::date( Value::date(
@ -102,7 +106,9 @@ impl Command for AsDateTime {
Span::test_data(), Span::test_data(),
), ),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for GetDay {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr get-day"#, $df | dfr get-day"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(4), Value::test_int(4)], vec![Value::test_int(4), Value::test_int(4)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for GetHour {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr get-hour"#, $df | dfr get-hour"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(16), Value::test_int(16)], vec![Value::test_int(16), Value::test_int(16)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for GetMinute {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr get-minute"#, $df | dfr get-minute"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(39), Value::test_int(39)], vec![Value::test_int(39), Value::test_int(39)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for GetMonth {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr get-month"#, $df | dfr get-month"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(8), Value::test_int(8)], vec![Value::test_int(8), Value::test_int(8)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for GetNanosecond {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr get-nanosecond"#, $df | dfr get-nanosecond"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(0), Value::test_int(0)], vec![Value::test_int(0), Value::test_int(0)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for GetOrdinal {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr get-ordinal"#, $df | dfr get-ordinal"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(217), Value::test_int(217)], vec![Value::test_int(217), Value::test_int(217)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for GetSecond {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr get-second"#, $df | dfr get-second"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(18), Value::test_int(18)], vec![Value::test_int(18), Value::test_int(18)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for GetWeek {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr get-week"#, $df | dfr get-week"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(32), Value::test_int(32)], vec![Value::test_int(32), Value::test_int(32)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for GetWeekDay {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr get-weekday"#, $df | dfr get-weekday"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(2), Value::test_int(2)], vec![Value::test_int(2), Value::test_int(2)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -35,10 +35,13 @@ impl Command for GetYear {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr get-year"#, $df | dfr get-year"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(2020), Value::test_int(2020)], vec![Value::test_int(2020), Value::test_int(2020)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -46,7 +46,8 @@ impl Command for ArgSort {
description: "Returns indexes for a sorted series", description: "Returns indexes for a sorted series",
example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort", example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"arg_sort".to_string(), "arg_sort".to_string(),
vec![ vec![
Value::test_int(0), Value::test_int(0),
@ -55,7 +56,9 @@ impl Command for ArgSort {
Value::test_int(3), Value::test_int(3),
Value::test_int(4), Value::test_int(4),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -64,7 +67,8 @@ impl Command for ArgSort {
description: "Returns indexes for a sorted series", description: "Returns indexes for a sorted series",
example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort --reverse", example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort --reverse",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"arg_sort".to_string(), "arg_sort".to_string(),
vec![ vec![
Value::test_int(3), Value::test_int(3),
@ -73,7 +77,9 @@ impl Command for ArgSort {
Value::test_int(2), Value::test_int(2),
Value::test_int(0), Value::test_int(0),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -37,10 +37,13 @@ impl Command for ArgTrue {
description: "Returns indexes where values are true", description: "Returns indexes where values are true",
example: "[false true false] | dfr into-df | dfr arg-true", example: "[false true false] | dfr into-df | dfr arg-true",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"arg_true".to_string(), "arg_true".to_string(),
vec![Value::test_int(1)], vec![Value::test_int(1)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -37,10 +37,13 @@ impl Command for ArgUnique {
description: "Returns indexes for unique values", description: "Returns indexes for unique values",
example: "[1 2 2 3 3] | dfr into-df | dfr arg-unique", example: "[1 2 2 3 3] | dfr into-df | dfr arg-unique",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"arg_unique".to_string(), "arg_unique".to_string(),
vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)], vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -43,7 +43,8 @@ impl Command for SetWithIndex {
let indices = ([0 2] | dfr into-df); let indices = ([0 2] | dfr into-df);
$series | dfr set-with-idx 6 --indices $indices"#, $series | dfr set-with-idx 6 --indices $indices"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_int(6), Value::test_int(6),
@ -53,7 +54,9 @@ impl Command for SetWithIndex {
Value::test_int(4), Value::test_int(4),
Value::test_int(3), Value::test_int(3),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -34,7 +34,8 @@ impl Command for IsDuplicated {
description: "Create mask indicating duplicated values", description: "Create mask indicating duplicated values",
example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-duplicated", example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-duplicated",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"is_duplicated".to_string(), "is_duplicated".to_string(),
vec![ vec![
Value::test_bool(false), Value::test_bool(false),
@ -45,7 +46,9 @@ impl Command for IsDuplicated {
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(true), Value::test_bool(true),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -55,7 +58,8 @@ impl Command for IsDuplicated {
example: example:
"[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-duplicated", "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-duplicated",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"is_duplicated".to_string(), "is_duplicated".to_string(),
vec![ vec![
Value::test_bool(true), Value::test_bool(true),
@ -64,7 +68,9 @@ impl Command for IsDuplicated {
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(false), Value::test_bool(false),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -36,7 +36,8 @@ impl Command for IsIn {
example: r#"let other = ([1 3 6] | dfr into-df); example: r#"let other = ([1 3 6] | dfr into-df);
[5 6 6 6 8 8 8] | dfr into-df | dfr is-in $other"#, [5 6 6 6 8 8 8] | dfr into-df | dfr is-in $other"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"is_in".to_string(), "is_in".to_string(),
vec![ vec![
Value::test_bool(false), Value::test_bool(false),
@ -47,7 +48,9 @@ impl Command for IsIn {
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(false), Value::test_bool(false),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -41,7 +41,8 @@ impl Command for IsNotNull {
let res = ($s / $s); let res = ($s / $s);
$res | dfr is-not-null"#, $res | dfr is-not-null"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"is_not_null".to_string(), "is_not_null".to_string(),
vec![ vec![
Value::test_bool(true), Value::test_bool(true),
@ -49,7 +50,9 @@ impl Command for IsNotNull {
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(true), Value::test_bool(true),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -41,7 +41,8 @@ impl Command for IsNull {
let res = ($s / $s); let res = ($s / $s);
$res | dfr is-null"#, $res | dfr is-null"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"is_null".to_string(), "is_null".to_string(),
vec![ vec![
Value::test_bool(false), Value::test_bool(false),
@ -49,7 +50,9 @@ impl Command for IsNull {
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(false), Value::test_bool(false),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -34,7 +34,8 @@ impl Command for IsUnique {
description: "Create mask indicating unique values", description: "Create mask indicating unique values",
example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-unique", example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-unique",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"is_unique".to_string(), "is_unique".to_string(),
vec![ vec![
Value::test_bool(true), Value::test_bool(true),
@ -45,7 +46,9 @@ impl Command for IsUnique {
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(false), Value::test_bool(false),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -54,7 +57,8 @@ impl Command for IsUnique {
description: "Create mask indicating duplicated rows in a dataframe", description: "Create mask indicating duplicated rows in a dataframe",
example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-unique", example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-unique",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"is_unique".to_string(), "is_unique".to_string(),
vec![ vec![
Value::test_bool(false), Value::test_bool(false),
@ -63,7 +67,9 @@ impl Command for IsUnique {
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(true), Value::test_bool(true),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -34,14 +34,17 @@ impl Command for NotSeries {
description: "Inverts boolean mask", description: "Inverts boolean mask",
example: "[true false true] | dfr into-df | dfr not", example: "[true false true] | dfr into-df | dfr not",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(false), Value::test_bool(false),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -43,7 +43,8 @@ impl Command for SetSeries {
let mask = ($s | dfr is-null); let mask = ($s | dfr is-null);
$s | dfr set 0 --mask $mask"#, $s | dfr set 0 --mask $mask"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_int(0), Value::test_int(0),
@ -52,7 +53,9 @@ impl Command for SetSeries {
Value::test_int(2), Value::test_int(2),
Value::test_int(2), Value::test_int(2),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -33,10 +33,13 @@ impl Command for NNull {
example: r#"let s = ([1 1 0 0 3 3 4] | dfr into-df); example: r#"let s = ([1 1 0 0 3 3 4] | dfr into-df);
($s / $s) | dfr count-null"#, ($s / $s) | dfr count-null"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"count_null".to_string(), "count_null".to_string(),
vec![Value::test_int(2)], vec![Value::test_int(2)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -65,7 +68,10 @@ fn command(
let res = df.as_series(call.head)?.null_count(); let res = df.as_series(call.head)?.null_count();
let value = Value::int(res as i64, call.head); let value = Value::int(res as i64, call.head);
NuDataFrame::try_from_columns(vec![Column::new("count_null".to_string(), vec![value])]) NuDataFrame::try_from_columns(
vec![Column::new("count_null".to_string(), vec![value])],
None,
)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View File

@ -38,10 +38,13 @@ impl Command for NUnique {
description: "Counts unique values", description: "Counts unique values",
example: "[1 1 2 2 3 3 4] | dfr into-df | dfr n-unique", example: "[1 1 2 2 3 3 4] | dfr into-df | dfr n-unique",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"count_unique".to_string(), "count_unique".to_string(),
vec![Value::test_int(4)], vec![Value::test_int(4)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -96,7 +99,10 @@ fn command(
let value = Value::int(res as i64, call.head); let value = Value::int(res as i64, call.head);
NuDataFrame::try_from_columns(vec![Column::new("count_unique".to_string(), vec![value])]) NuDataFrame::try_from_columns(
vec![Column::new("count_unique".to_string(), vec![value])],
None,
)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View File

@ -72,7 +72,8 @@ impl Command for Rolling {
description: "Rolling sum for a series", description: "Rolling sum for a series",
example: "[1 2 3 4 5] | dfr into-df | dfr rolling sum 2 | dfr drop-nulls", example: "[1 2 3 4 5] | dfr into-df | dfr rolling sum 2 | dfr drop-nulls",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0_rolling_sum".to_string(), "0_rolling_sum".to_string(),
vec![ vec![
Value::test_int(3), Value::test_int(3),
@ -80,7 +81,9 @@ impl Command for Rolling {
Value::test_int(7), Value::test_int(7),
Value::test_int(9), Value::test_int(9),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
@ -89,7 +92,8 @@ impl Command for Rolling {
description: "Rolling max for a series", description: "Rolling max for a series",
example: "[1 2 3 4 5] | dfr into-df | dfr rolling max 2 | dfr drop-nulls", example: "[1 2 3 4 5] | dfr into-df | dfr rolling max 2 | dfr drop-nulls",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0_rolling_max".to_string(), "0_rolling_max".to_string(),
vec![ vec![
Value::test_int(2), Value::test_int(2),
@ -97,7 +101,9 @@ impl Command for Rolling {
Value::test_int(4), Value::test_int(4),
Value::test_int(5), Value::test_int(5),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -44,10 +44,13 @@ impl Command for Shift {
description: "Shifts the values by a given period", description: "Shifts the values by a given period",
example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr drop-nulls", example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr drop-nulls",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(1), Value::test_int(2), Value::test_int(2)], vec![Value::test_int(1), Value::test_int(2), Value::test_int(2)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -40,14 +40,17 @@ impl Command for Concatenate {
example: r#"let other = ([za xs cd] | dfr into-df); example: r#"let other = ([za xs cd] | dfr into-df);
[abc abc abc] | dfr into-df | dfr concatenate $other"#, [abc abc abc] | dfr into-df | dfr concatenate $other"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_string("abcza"), Value::test_string("abcza"),
Value::test_string("abcxs"), Value::test_string("abcxs"),
Value::test_string("abccd"), Value::test_string("abccd"),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -39,14 +39,17 @@ impl Command for Contains {
description: "Returns boolean indicating if pattern was found", description: "Returns boolean indicating if pattern was found",
example: "[abc acb acb] | dfr into-df | dfr contains ab", example: "[abc acb acb] | dfr into-df | dfr contains ab",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(false), Value::test_bool(false),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -46,14 +46,17 @@ impl Command for Replace {
description: "Replaces string", description: "Replaces string",
example: "[abc abc abc] | dfr into-df | dfr replace --pattern ab --replace AB", example: "[abc abc abc] | dfr into-df | dfr replace --pattern ab --replace AB",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_string("ABc"), Value::test_string("ABc"),
Value::test_string("ABc"), Value::test_string("ABc"),
Value::test_string("ABc"), Value::test_string("ABc"),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -46,14 +46,17 @@ impl Command for ReplaceAll {
description: "Replaces string", description: "Replaces string",
example: "[abac abac abac] | dfr into-df | dfr replace-all --pattern a --replace A", example: "[abac abac abac] | dfr into-df | dfr replace-all --pattern a --replace A",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_string("AbAc"), Value::test_string("AbAc"),
Value::test_string("AbAc"), Value::test_string("AbAc"),
Value::test_string("AbAc"), Value::test_string("AbAc"),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -33,10 +33,13 @@ impl Command for StrLengths {
description: "Returns string lengths", description: "Returns string lengths",
example: "[a ab abc] | dfr into-df | dfr str-lengths", example: "[a ab abc] | dfr into-df | dfr str-lengths",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -36,14 +36,17 @@ impl Command for StrSlice {
description: "Creates slices from the strings", description: "Creates slices from the strings",
example: "[abcded abc321 abc123] | dfr into-df | dfr str-slice 1 --length 2", example: "[abcded abc321 abc123] | dfr into-df | dfr str-slice 1 --length 2",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_string("bc"), Value::test_string("bc"),
Value::test_string("bc"), Value::test_string("bc"),
Value::test_string("bc"), Value::test_string("bc"),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -37,13 +37,16 @@ impl Command for StrFTime {
let df = ([$dt $dt] | dfr into-df); let df = ([$dt $dt] | dfr into-df);
$df | dfr strftime "%Y/%m/%d""#, $df | dfr strftime "%Y/%m/%d""#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_string("2020/08/04"), Value::test_string("2020/08/04"),
Value::test_string("2020/08/04"), Value::test_string("2020/08/04"),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -33,14 +33,17 @@ impl Command for ToLowerCase {
description: "Modifies strings to lowercase", description: "Modifies strings to lowercase",
example: "[Abc aBc abC] | dfr into-df | dfr lowercase", example: "[Abc aBc abC] | dfr into-df | dfr lowercase",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_string("abc"), Value::test_string("abc"),
Value::test_string("abc"), Value::test_string("abc"),
Value::test_string("abc"), Value::test_string("abc"),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -37,14 +37,17 @@ impl Command for ToUpperCase {
description: "Modifies strings to uppercase", description: "Modifies strings to uppercase",
example: "[Abc aBc abC] | dfr into-df | dfr uppercase", example: "[Abc aBc abC] | dfr into-df | dfr uppercase",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(), "0".to_string(),
vec![ vec![
Value::test_string("ABC"), Value::test_string("ABC"),
Value::test_string("ABC"), Value::test_string("ABC"),
Value::test_string("ABC"), Value::test_string("ABC"),
], ],
)]) )],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -53,10 +53,10 @@ impl Command for Unique {
description: "Returns unique values from a series", description: "Returns unique values from a series",
example: "[2 2 2 2 2] | dfr into-df | dfr unique", example: "[2 2 2 2 2] | dfr into-df | dfr unique",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(
"0".to_string(), vec![Column::new("0".to_string(), vec![Value::test_int(2)])],
vec![Value::test_int(2)], None,
)]) )
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -34,7 +34,8 @@ impl Command for ValueCount {
description: "Calculates value counts", description: "Calculates value counts",
example: "[5 5 5 5 6 6] | dfr into-df | dfr value-counts", example: "[5 5 5 5 6 6] | dfr into-df | dfr value-counts",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(
vec![
Column::new( Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(5), Value::test_int(6)], vec![Value::test_int(5), Value::test_int(6)],
@ -43,7 +44,9 @@ impl Command for ValueCount {
"count".to_string(), "count".to_string(),
vec![Value::test_int(4), Value::test_int(2)], vec![Value::test_int(4), Value::test_int(2)],
), ),
]) ],
None,
)
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),

View File

@ -2,6 +2,7 @@ mod nu_dataframe;
mod nu_expression; mod nu_expression;
mod nu_lazyframe; mod nu_lazyframe;
mod nu_lazygroupby; mod nu_lazygroupby;
mod nu_schema;
mod nu_when; mod nu_when;
pub mod utils; pub mod utils;
@ -9,4 +10,5 @@ pub use nu_dataframe::{Axis, Column, NuDataFrame};
pub use nu_expression::NuExpression; pub use nu_expression::NuExpression;
pub use nu_lazyframe::NuLazyFrame; pub use nu_lazyframe::NuLazyFrame;
pub use nu_lazygroupby::NuLazyGroupBy; pub use nu_lazygroupby::NuLazyGroupBy;
pub use nu_schema::NuSchema;
pub use nu_when::NuWhen; pub use nu_when::NuWhen;

View File

@ -9,14 +9,17 @@ use polars::chunked_array::ChunkedArray;
use polars::datatypes::AnyValue; use polars::datatypes::AnyValue;
use polars::export::arrow::Either; use polars::export::arrow::Either;
use polars::prelude::{ use polars::prelude::{
DataFrame, DataType, DatetimeChunked, Float64Type, Int64Type, IntoSeries, DataFrame, DataType, DatetimeChunked, Float32Type, Float64Type, Int16Type, Int32Type,
ListBooleanChunkedBuilder, ListBuilderTrait, ListPrimitiveChunkedBuilder, Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder, ListBuilderTrait,
ListStringChunkedBuilder, ListType, NamedFrom, NewChunkedArray, ObjectType, Series, ListPrimitiveChunkedBuilder, ListStringChunkedBuilder, ListType, NamedFrom, NewChunkedArray,
TemporalMethods, TimeUnit, ObjectType, Schema, Series, StructChunked, TemporalMethods, TimeUnit, UInt16Type, UInt32Type,
UInt64Type, UInt8Type,
}; };
use nu_protocol::{Record, ShellError, Span, Value}; use nu_protocol::{Record, ShellError, Span, Value};
use crate::dataframe::values::NuSchema;
use super::{DataFrameValue, NuDataFrame}; use super::{DataFrameValue, NuDataFrame};
const NANOS_PER_DAY: i64 = 86_400_000_000_000; const NANOS_PER_DAY: i64 = 86_400_000_000_000;
@ -28,6 +31,39 @@ const NANOS_PER_DAY: i64 = 86_400_000_000_000;
// practical reasons (~ a few thousand rows). // practical reasons (~ a few thousand rows).
const VALUES_CAPACITY: usize = 10; const VALUES_CAPACITY: usize = 10;
macro_rules! value_to_primitive {
($value:ident, u8) => {
$value.as_i64().map(|v| v as u8)
};
($value:ident, u16) => {
$value.as_i64().map(|v| v as u16)
};
($value:ident, u32) => {
$value.as_i64().map(|v| v as u32)
};
($value:ident, u64) => {
$value.as_i64().map(|v| v as u64)
};
($value:ident, i8) => {
$value.as_i64().map(|v| v as i8)
};
($value:ident, i16) => {
$value.as_i64().map(|v| v as i16)
};
($value:ident, i32) => {
$value.as_i64().map(|v| v as i32)
};
($value:ident, i64) => {
$value.as_i64()
};
($value:ident, f32) => {
$value.as_f64().map(|v| v as f32)
};
($value:ident, f64) => {
$value.as_f64()
};
}
#[derive(Debug)] #[derive(Debug)]
pub struct Column { pub struct Column {
name: String, name: String,
@ -74,23 +110,10 @@ impl DerefMut for Column {
} }
} }
#[derive(Debug)]
pub enum InputType {
Integer,
Float,
String,
Boolean,
Object,
Date,
Duration,
Filesize,
List(Box<InputType>),
}
#[derive(Debug)] #[derive(Debug)]
pub struct TypedColumn { pub struct TypedColumn {
column: Column, column: Column,
column_type: Option<InputType>, column_type: Option<DataType>,
} }
impl TypedColumn { impl TypedColumn {
@ -144,9 +167,13 @@ pub fn add_separator(values: &mut Vec<Value>, df: &DataFrame, span: Span) {
} }
// Inserting the values found in a Value::List or Value::Record // Inserting the values found in a Value::List or Value::Record
pub fn insert_record(column_values: &mut ColumnMap, record: Record) -> Result<(), ShellError> { pub fn insert_record(
column_values: &mut ColumnMap,
record: Record,
maybe_schema: &Option<NuSchema>,
) -> Result<(), ShellError> {
for (col, value) in record { for (col, value) in record {
insert_value(value, col, column_values)?; insert_value(value, col, column_values, maybe_schema)?;
} }
Ok(()) Ok(())
@ -156,16 +183,26 @@ pub fn insert_value(
value: Value, value: Value,
key: String, key: String,
column_values: &mut ColumnMap, column_values: &mut ColumnMap,
maybe_schema: &Option<NuSchema>,
) -> Result<(), ShellError> { ) -> Result<(), ShellError> {
let col_val = match column_values.entry(key.clone()) { let col_val = match column_values.entry(key.clone()) {
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)), Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key.clone())),
Entry::Occupied(entry) => entry.into_mut(), Entry::Occupied(entry) => entry.into_mut(),
}; };
// Checking that the type for the value is the same // Checking that the type for the value is the same
// for the previous value in the column // for the previous value in the column
if col_val.values.is_empty() { if col_val.values.is_empty() {
col_val.column_type = Some(value_to_input_type(&value)); if let Some(schema) = maybe_schema {
if let Some(field) = schema.schema.get_field(&key) {
col_val.column_type = Some(field.data_type().clone());
}
}
if col_val.column_type.is_none() {
col_val.column_type = Some(value_to_data_type(&value));
}
col_val.values.push(value); col_val.values.push(value);
} else { } else {
let prev_value = &col_val.values[col_val.values.len() - 1]; let prev_value = &col_val.values[col_val.values.len() - 1];
@ -179,11 +216,11 @@ pub fn insert_value(
| (Value::Filesize { .. }, Value::Filesize { .. }) | (Value::Filesize { .. }, Value::Filesize { .. })
| (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value), | (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value),
(Value::List { .. }, _) => { (Value::List { .. }, _) => {
col_val.column_type = Some(value_to_input_type(&value)); col_val.column_type = Some(value_to_data_type(&value));
col_val.values.push(value); col_val.values.push(value);
} }
_ => { _ => {
col_val.column_type = Some(InputType::Object); col_val.column_type = Some(DataType::Object("Value", None));
col_val.values.push(value); col_val.values.push(value);
} }
} }
@ -192,15 +229,15 @@ pub fn insert_value(
Ok(()) Ok(())
} }
fn value_to_input_type(value: &Value) -> InputType { fn value_to_data_type(value: &Value) -> DataType {
match &value { match &value {
Value::Int { .. } => InputType::Integer, Value::Int { .. } => DataType::Int64,
Value::Float { .. } => InputType::Float, Value::Float { .. } => DataType::Float64,
Value::String { .. } => InputType::String, Value::String { .. } => DataType::String,
Value::Bool { .. } => InputType::Boolean, Value::Bool { .. } => DataType::Boolean,
Value::Date { .. } => InputType::Date, Value::Date { .. } => DataType::Date,
Value::Duration { .. } => InputType::Duration, Value::Duration { .. } => DataType::Duration(TimeUnit::Nanoseconds),
Value::Filesize { .. } => InputType::Filesize, Value::Filesize { .. } => DataType::Int64,
Value::List { vals, .. } => { Value::List { vals, .. } => {
// We need to determined the type inside of the list. // We need to determined the type inside of the list.
// Since Value::List does not have any kind of // Since Value::List does not have any kind of
@ -211,13 +248,213 @@ fn value_to_input_type(value: &Value) -> InputType {
let list_type = vals let list_type = vals
.iter() .iter()
.filter(|v| !matches!(v, Value::Nothing { .. })) .filter(|v| !matches!(v, Value::Nothing { .. }))
.map(value_to_input_type) .map(value_to_data_type)
.nth(1) .nth(1)
.unwrap_or(InputType::Object); .unwrap_or(DataType::Object("Value", None));
InputType::List(Box::new(list_type)) DataType::List(Box::new(list_type))
} }
_ => InputType::Object, _ => DataType::Object("Value", None),
}
}
fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, ShellError> {
if let Some(column_type) = &column.column_type {
match column_type {
DataType::Float32 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_f64().map(|v| v as f32))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::Float64 => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
Ok(Series::new(name, series_values?))
}
DataType::UInt8 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as u8))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::UInt16 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as u16))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::UInt32 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as u32))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::UInt64 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as u64))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::Int8 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as i8))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::Int16 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as i16))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::Int32 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as i32))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::Int64 => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_i64()).collect();
Ok(Series::new(name, series_values?))
}
DataType::Boolean => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_bool()).collect();
Ok(Series::new(name, series_values?))
}
DataType::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
Ok(Series::new(name, series_values?))
}
DataType::Object(_, _) => value_to_series(name, &column.values),
DataType::Duration(time_unit) => {
//todo - finish type conversion
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| nanos_from_timeunit(v, *time_unit)))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::List(list_type) => {
match input_type_list_to_series(name, list_type.as_ref(), &column.values) {
Ok(series) => Ok(series),
Err(_) => {
// An error case will occur when there are lists of mixed types.
// If this happens, fallback to object list
input_type_list_to_series(
name,
&DataType::Object("unknown", None),
&column.values,
)
}
}
}
DataType::Date => {
let it = column.values.iter().map(|v| {
if let Value::Date { val, .. } = &v {
Some(val.timestamp_nanos_opt().unwrap_or_default())
} else {
None
}
});
let res: DatetimeChunked = ChunkedArray::<Int64Type>::from_iter_options(name, it)
.into_datetime(TimeUnit::Nanoseconds, None);
Ok(res.into_series())
}
DataType::Datetime(tu, maybe_tz) => {
let dates = column
.values
.iter()
.map(|v| {
if let Value::Date { val, .. } = &v {
// If there is a timezone specified, make sure
// the value is converted to it
Ok(maybe_tz
.as_ref()
.map(|tz| tz.parse::<Tz>().map(|tz| val.with_timezone(&tz)))
.transpose()
.map_err(|e| ShellError::GenericError {
error: "Error parsing timezone".into(),
msg: "".into(),
span: None,
help: Some(e.to_string()),
inner: vec![],
})?
.and_then(|dt| dt.timestamp_nanos_opt())
.map(|nanos| nanos_from_timeunit(nanos, *tu)))
} else {
Ok(None)
}
})
.collect::<Result<Vec<Option<i64>>, ShellError>>()?;
let res: DatetimeChunked =
ChunkedArray::<Int64Type>::from_iter_options(name, dates.into_iter())
.into_datetime(*tu, maybe_tz.clone());
Ok(res.into_series())
}
DataType::Struct(fields) => {
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone())));
let mut structs: Vec<Series> = Vec::new();
for v in column.values.iter() {
let mut column_values: ColumnMap = IndexMap::new();
let record = v.as_record()?;
insert_record(&mut column_values, record.clone(), &schema)?;
let df = from_parsed_columns(column_values)?;
structs.push(df.as_series(Span::unknown())?);
}
let chunked = StructChunked::new(column.name(), structs.as_ref()).map_err(|e| {
ShellError::GenericError {
error: format!("Error creating struct: {e}"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
}
})?;
Ok(chunked.into_series())
}
_ => Err(ShellError::GenericError {
error: format!("Error creating dataframe: Unsupported type: {column_type:?}"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
}),
}
} else {
Err(ShellError::GenericError {
error: "Passed a type column with no type".into(),
msg: "".into(),
span: None,
help: None,
inner: vec![],
})
} }
} }
@ -227,80 +464,22 @@ fn value_to_input_type(value: &Value) -> InputType {
pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, ShellError> { pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, ShellError> {
let mut df_series: Vec<Series> = Vec::new(); let mut df_series: Vec<Series> = Vec::new();
for (name, column) in column_values { for (name, column) in column_values {
if let Some(column_type) = &column.column_type { let series = typed_column_to_series(&name, column)?;
match column_type { df_series.push(series);
InputType::Float => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Integer | InputType::Filesize | InputType::Duration => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_i64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Boolean => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_bool()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Object => {
df_series.push(input_type_object_to_series(&name, &column.values)?)
}
InputType::List(list_type) => {
match input_type_list_to_series(&name, list_type.as_ref(), &column.values) {
Ok(series) => df_series.push(series),
Err(_) => {
// An error case will occur when there are lists of mixed types.
// If this happens, fallback to object list
df_series.push(input_type_list_to_series(
&name,
&InputType::Object,
&column.values,
)?)
}
}
}
InputType::Date => {
let it = column.values.iter().map(|v| {
if let Value::Date { val, .. } = &v {
Some(val.timestamp_nanos_opt().unwrap_or_default())
} else {
None
}
});
let res: DatetimeChunked =
ChunkedArray::<Int64Type>::from_iter_options(&name, it)
.into_datetime(TimeUnit::Nanoseconds, None);
df_series.push(res.into_series())
}
}
}
} }
DataFrame::new(df_series) DataFrame::new(df_series)
.map(|df| NuDataFrame::new(false, df)) .map(|df| NuDataFrame::new(false, df))
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {
error: "Error creating dataframe".into(), error: "Error creating dataframe".into(),
msg: "".into(), msg: e.to_string(),
span: None, span: None,
help: Some(e.to_string()), help: None,
inner: vec![], inner: vec![],
}) })
} }
fn input_type_object_to_series(name: &str, values: &[Value]) -> Result<Series, ShellError> { fn value_to_series(name: &str, values: &[Value]) -> Result<Series, ShellError> {
let mut builder = ObjectChunkedBuilder::<DataFrameValue>::new(name, values.len()); let mut builder = ObjectChunkedBuilder::<DataFrameValue>::new(name, values.len());
for v in values { for v in values {
@ -313,21 +492,45 @@ fn input_type_object_to_series(name: &str, values: &[Value]) -> Result<Series, S
fn input_type_list_to_series( fn input_type_list_to_series(
name: &str, name: &str,
list_type: &InputType, data_type: &DataType,
values: &[Value], values: &[Value],
) -> Result<Series, ShellError> { ) -> Result<Series, ShellError> {
let inconsistent_error = |_| ShellError::GenericError { let inconsistent_error = |_| ShellError::GenericError {
error: format!( error: format!(
"column {name} contains a list with inconsistent types: Expecting: {list_type:?}" "column {name} contains a list with inconsistent types: Expecting: {data_type:?}"
), ),
msg: "".into(), msg: "".into(),
span: None, span: None,
help: None, help: None,
inner: vec![], inner: vec![],
}; };
match *list_type {
macro_rules! primitive_list_series {
($list_type:ty, $vec_type:tt) => {{
let mut builder = ListPrimitiveChunkedBuilder::<$list_type>::new(
name,
values.len(),
VALUES_CAPACITY,
data_type.clone(),
);
for v in values {
let value_list = v
.as_list()?
.iter()
.map(|v| value_to_primitive!(v, $vec_type))
.collect::<Result<Vec<$vec_type>, _>>()
.map_err(inconsistent_error)?;
builder.append_iter_values(value_list.iter().copied());
}
let res = builder.finish();
Ok(res.into_series())
}};
}
match *data_type {
// list of boolean values // list of boolean values
InputType::Boolean => { DataType::Boolean => {
let mut builder = ListBooleanChunkedBuilder::new(name, values.len(), VALUES_CAPACITY); let mut builder = ListBooleanChunkedBuilder::new(name, values.len(), VALUES_CAPACITY);
for v in values { for v in values {
let value_list = v let value_list = v
@ -341,52 +544,18 @@ fn input_type_list_to_series(
let res = builder.finish(); let res = builder.finish();
Ok(res.into_series()) Ok(res.into_series())
} }
// list of values that reduce down to i64 DataType::Duration(_) => primitive_list_series!(Int64Type, i64),
InputType::Integer | InputType::Filesize | InputType::Duration => { DataType::UInt8 => primitive_list_series!(UInt8Type, u8),
let logical_type = match list_type { DataType::UInt16 => primitive_list_series!(UInt16Type, u16),
InputType::Duration => DataType::Duration(TimeUnit::Milliseconds), DataType::UInt32 => primitive_list_series!(UInt32Type, u32),
_ => DataType::Int64, DataType::UInt64 => primitive_list_series!(UInt64Type, u64),
}; DataType::Int8 => primitive_list_series!(Int8Type, i8),
DataType::Int16 => primitive_list_series!(Int16Type, i16),
let mut builder = ListPrimitiveChunkedBuilder::<Int64Type>::new( DataType::Int32 => primitive_list_series!(Int32Type, i32),
name, DataType::Int64 => primitive_list_series!(Int64Type, i64),
values.len(), DataType::Float32 => primitive_list_series!(Float32Type, f32),
VALUES_CAPACITY, DataType::Float64 => primitive_list_series!(Float64Type, f64),
logical_type, DataType::String => {
);
for v in values {
let value_list = v
.as_list()?
.iter()
.map(|v| v.as_i64())
.collect::<Result<Vec<i64>, _>>()
.map_err(inconsistent_error)?;
builder.append_iter_values(value_list.iter().copied());
}
let res = builder.finish();
Ok(res.into_series())
}
InputType::Float => {
let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
name,
values.len(),
VALUES_CAPACITY,
DataType::Float64,
);
for v in values {
let value_list = v
.as_list()?
.iter()
.map(|v| v.as_f64())
.collect::<Result<Vec<f64>, _>>()
.map_err(inconsistent_error)?;
builder.append_iter_values(value_list.iter().copied());
}
let res = builder.finish();
Ok(res.into_series())
}
InputType::String => {
let mut builder = ListStringChunkedBuilder::new(name, values.len(), VALUES_CAPACITY); let mut builder = ListStringChunkedBuilder::new(name, values.len(), VALUES_CAPACITY);
for v in values { for v in values {
let value_list = v let value_list = v
@ -400,9 +569,7 @@ fn input_type_list_to_series(
let res = builder.finish(); let res = builder.finish();
Ok(res.into_series()) Ok(res.into_series())
} }
// Treat lists as objects at this depth as it is expensive to calculate the list type DataType::Date => {
// We can revisit this later if necessary
InputType::Date => {
let mut builder = AnonymousOwnedListBuilder::new( let mut builder = AnonymousOwnedListBuilder::new(
name, name,
values.len(), values.len(),
@ -434,11 +601,11 @@ fn input_type_list_to_series(
let res = builder.finish(); let res = builder.finish();
Ok(res.into_series()) Ok(res.into_series())
} }
InputType::List(ref sub_list_type) => { DataType::List(ref sub_list_type) => {
Ok(input_type_list_to_series(name, sub_list_type, values)?) Ok(input_type_list_to_series(name, sub_list_type, values)?)
} }
// treat everything else as an object // treat everything else as an object
_ => Ok(input_type_object_to_series(name, values)?), _ => Ok(value_to_series(name, values)?),
} }
} }
@ -1081,7 +1248,7 @@ mod tests {
}; };
let typed_column = TypedColumn { let typed_column = TypedColumn {
column, column,
column_type: Some(InputType::List(Box::new(InputType::String))), column_type: Some(DataType::List(Box::new(DataType::String))),
}; };
let column_map = indexmap!("foo".to_string() => typed_column); let column_map = indexmap!("foo".to_string() => typed_column);

View File

@ -13,7 +13,7 @@ use polars_utils::total_ord::TotalEq;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display, hash::Hasher}; use std::{cmp::Ordering, fmt::Display, hash::Hasher};
use super::{utils::DEFAULT_ROWS, NuLazyFrame}; use super::{nu_schema::NuSchema, utils::DEFAULT_ROWS, NuLazyFrame};
// DataFrameValue is an encapsulation of Nushell Value that can be used // DataFrameValue is an encapsulation of Nushell Value that can be used
// to define the PolarsObject Trait. The polars object trait allows to // to define the PolarsObject Trait. The polars object trait allows to
@ -141,7 +141,7 @@ impl NuDataFrame {
} }
} }
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError> pub fn try_from_iter<T>(iter: T, maybe_schema: Option<NuSchema>) -> Result<Self, ShellError>
where where
T: Iterator<Item = Value>, T: Iterator<Item = Value>,
{ {
@ -161,14 +161,15 @@ impl NuDataFrame {
conversion::insert_record( conversion::insert_record(
&mut column_values, &mut column_values,
Record::from_raw_cols_vals(cols, vals), Record::from_raw_cols_vals(cols, vals),
&maybe_schema,
)? )?
} }
Value::Record { val: record, .. } => { Value::Record { val: record, .. } => {
conversion::insert_record(&mut column_values, record)? conversion::insert_record(&mut column_values, record, &maybe_schema)?
} }
_ => { _ => {
let key = "0".to_string(); let key = "0".to_string();
conversion::insert_value(value, key, &mut column_values)? conversion::insert_value(value, key, &mut column_values, &maybe_schema)?
} }
} }
} }
@ -188,13 +189,16 @@ impl NuDataFrame {
Ok(Self::new(false, dataframe)) Ok(Self::new(false, dataframe))
} }
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> { pub fn try_from_columns(
columns: Vec<Column>,
maybe_schema: Option<NuSchema>,
) -> Result<Self, ShellError> {
let mut column_values: ColumnMap = IndexMap::new(); let mut column_values: ColumnMap = IndexMap::new();
for column in columns { for column in columns {
let name = column.name().to_string(); let name = column.name().to_string();
for value in column { for value in column {
conversion::insert_value(value, name.clone(), &mut column_values)?; conversion::insert_value(value, name.clone(), &mut column_values, &maybe_schema)?;
} }
} }
@ -503,4 +507,8 @@ impl NuDataFrame {
Some(Ordering::Equal) Some(Ordering::Equal)
} }
pub fn schema(&self) -> NuSchema {
NuSchema::new(self.df.schema())
}
} }

View File

@ -0,0 +1,397 @@
use std::sync::Arc;
use nu_protocol::{Record, ShellError, Span, Value};
use polars::prelude::{DataType, Field, Schema, SchemaRef, TimeUnit};
#[derive(Debug, Clone)]
pub struct NuSchema {
pub schema: SchemaRef,
}
impl NuSchema {
pub fn new(schema: Schema) -> Self {
Self {
schema: Arc::new(schema),
}
}
}
impl TryFrom<&Value> for NuSchema {
type Error = ShellError;
fn try_from(value: &Value) -> Result<Self, Self::Error> {
let schema = value_to_schema(value, Span::unknown())?;
Ok(Self::new(schema))
}
}
impl From<NuSchema> for Value {
fn from(schema: NuSchema) -> Self {
fields_to_value(schema.schema.iter_fields(), Span::unknown())
}
}
impl From<NuSchema> for SchemaRef {
fn from(val: NuSchema) -> Self {
Arc::clone(&val.schema)
}
}
fn fields_to_value(fields: impl Iterator<Item = Field>, span: Span) -> Value {
let (cols, vals) = fields
.map(|field| {
let val = dtype_to_value(field.data_type(), span);
let col = field.name().to_string();
(col, val)
})
.unzip();
let record = Record::from_raw_cols_vals(cols, vals);
Value::record(record, Span::unknown())
}
fn dtype_to_value(dtype: &DataType, span: Span) -> Value {
match dtype {
DataType::Struct(fields) => fields_to_value(fields.iter().cloned(), span),
_ => Value::string(dtype.to_string().replace('[', "<").replace(']', ">"), span),
}
}
fn value_to_schema(value: &Value, span: Span) -> Result<Schema, ShellError> {
let fields = value_to_fields(value, span)?;
let schema = Schema::from_iter(fields);
Ok(schema)
}
fn value_to_fields(value: &Value, span: Span) -> Result<Vec<Field>, ShellError> {
let fields = value
.as_record()?
.into_iter()
.map(|(col, val)| match val {
Value::Record { .. } => {
let fields = value_to_fields(val, span)?;
let dtype = DataType::Struct(fields);
Ok(Field::new(col, dtype))
}
_ => {
let dtype = dtype_str_to_schema(&val.as_string()?, span)?;
Ok(Field::new(col, dtype))
}
})
.collect::<Result<Vec<Field>, ShellError>>()?;
Ok(fields)
}
fn dtype_str_to_schema(dtype: &str, span: Span) -> Result<DataType, ShellError> {
match dtype {
"bool" => Ok(DataType::Boolean),
"u8" => Ok(DataType::UInt8),
"u16" => Ok(DataType::UInt16),
"u32" => Ok(DataType::UInt32),
"u64" => Ok(DataType::UInt64),
"i8" => Ok(DataType::Int8),
"i16" => Ok(DataType::Int16),
"i32" => Ok(DataType::Int32),
"i64" => Ok(DataType::Int64),
"f32" => Ok(DataType::Float32),
"f64" => Ok(DataType::Float64),
"str" => Ok(DataType::String),
"binary" => Ok(DataType::Binary),
"date" => Ok(DataType::Date),
"time" => Ok(DataType::Time),
"null" => Ok(DataType::Null),
"unknown" => Ok(DataType::Unknown),
"object" => Ok(DataType::Object("unknown", None)),
_ if dtype.starts_with("list") => {
let dtype = dtype
.trim_start_matches("list")
.trim_start_matches('<')
.trim_end_matches('>')
.trim();
let dtype = dtype_str_to_schema(dtype, span)?;
Ok(DataType::List(Box::new(dtype)))
}
_ if dtype.starts_with("datetime") => {
let dtype = dtype
.trim_start_matches("datetime")
.trim_start_matches('<')
.trim_end_matches('>');
let mut split = dtype.split(',');
let next = split
.next()
.ok_or_else(|| ShellError::GenericError {
error: "Invalid polars data type".into(),
msg: "Missing time unit".into(),
span: Some(span),
help: None,
inner: vec![],
})?
.trim();
let time_unit = str_to_time_unit(next, span)?;
let next = split
.next()
.ok_or_else(|| ShellError::GenericError {
error: "Invalid polars data type".into(),
msg: "Missing time zone".into(),
span: Some(span),
help: None,
inner: vec![],
})?
.trim();
let timezone = if "*" == next {
None
} else {
Some(next.to_string())
};
Ok(DataType::Datetime(time_unit, timezone))
}
_ if dtype.starts_with("duration") => {
let inner = dtype.trim_start_matches("duration<").trim_end_matches('>');
let next = inner
.split(',')
.next()
.ok_or_else(|| ShellError::GenericError {
error: "Invalid polars data type".into(),
msg: "Missing time unit".into(),
span: Some(span),
help: None,
inner: vec![],
})?
.trim();
let time_unit = str_to_time_unit(next, span)?;
Ok(DataType::Duration(time_unit))
}
_ => Err(ShellError::GenericError {
error: "Invalid polars data type".into(),
msg: format!("Unknown type: {dtype}"),
span: Some(span),
help: None,
inner: vec![],
}),
}
}
fn str_to_time_unit(ts_string: &str, span: Span) -> Result<TimeUnit, ShellError> {
match ts_string {
"ms" => Ok(TimeUnit::Milliseconds),
"us" | "μs" => Ok(TimeUnit::Microseconds),
"ns" => Ok(TimeUnit::Nanoseconds),
_ => Err(ShellError::GenericError {
error: "Invalid polars data type".into(),
msg: "Invalid time unit".into(),
span: Some(span),
help: None,
inner: vec![],
}),
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_value_to_schema() {
let value = Value::Record {
val: Record::from_raw_cols_vals(
vec!["name".into(), "age".into(), "address".into()],
vec![
Value::String {
val: "str".into(),
internal_span: Span::test_data(),
},
Value::String {
val: "i32".into(),
internal_span: Span::test_data(),
},
Value::Record {
val: Record::from_raw_cols_vals(
vec!["street".into(), "city".into()],
vec![
Value::String {
val: "str".into(),
internal_span: Span::test_data(),
},
Value::String {
val: "str".into(),
internal_span: Span::test_data(),
},
],
),
internal_span: Span::test_data(),
},
],
),
internal_span: Span::test_data(),
};
let schema = value_to_schema(&value, Span::unknown()).unwrap();
let expected = Schema::from_iter(vec![
Field::new("name", DataType::String),
Field::new("age", DataType::Int32),
Field::new(
"address",
DataType::Struct(vec![
Field::new("street", DataType::String),
Field::new("city", DataType::String),
]),
),
]);
assert_eq!(schema, expected);
}
#[test]
fn test_dtype_str_to_schema_simple_types() {
let dtype = "bool";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Boolean;
assert_eq!(schema, expected);
let dtype = "u8";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::UInt8;
assert_eq!(schema, expected);
let dtype = "u16";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::UInt16;
assert_eq!(schema, expected);
let dtype = "u32";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::UInt32;
assert_eq!(schema, expected);
let dtype = "u64";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::UInt64;
assert_eq!(schema, expected);
let dtype = "i8";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Int8;
assert_eq!(schema, expected);
let dtype = "i16";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Int16;
assert_eq!(schema, expected);
let dtype = "i32";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Int32;
assert_eq!(schema, expected);
let dtype = "i64";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Int64;
assert_eq!(schema, expected);
let dtype = "str";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::String;
assert_eq!(schema, expected);
let dtype = "binary";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Binary;
assert_eq!(schema, expected);
let dtype = "date";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Date;
assert_eq!(schema, expected);
let dtype = "time";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Time;
assert_eq!(schema, expected);
let dtype = "null";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Null;
assert_eq!(schema, expected);
let dtype = "unknown";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Unknown;
assert_eq!(schema, expected);
let dtype = "object";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Object("unknown", None);
assert_eq!(schema, expected);
}
#[test]
fn test_dtype_str_schema_datetime() {
let dtype = "datetime<ms, *>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Milliseconds, None);
assert_eq!(schema, expected);
let dtype = "datetime<us, *>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Microseconds, None);
assert_eq!(schema, expected);
let dtype = "datetime<μs, *>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Microseconds, None);
assert_eq!(schema, expected);
let dtype = "datetime<ns, *>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Nanoseconds, None);
assert_eq!(schema, expected);
let dtype = "datetime<ms, UTC>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Milliseconds, Some("UTC".into()));
assert_eq!(schema, expected);
let dtype = "invalid";
let schema = dtype_str_to_schema(dtype, Span::unknown());
assert!(schema.is_err())
}
#[test]
fn test_dtype_str_schema_duration() {
let dtype = "duration<ms>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Duration(TimeUnit::Milliseconds);
assert_eq!(schema, expected);
let dtype = "duration<us>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Duration(TimeUnit::Microseconds);
assert_eq!(schema, expected);
let dtype = "duration<μs>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Duration(TimeUnit::Microseconds);
assert_eq!(schema, expected);
let dtype = "duration<ns>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Duration(TimeUnit::Nanoseconds);
assert_eq!(schema, expected);
}
#[test]
fn test_dtype_str_to_schema_list_types() {
let dtype = "list<i32>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::List(Box::new(DataType::Int32));
assert_eq!(schema, expected);
let dtype = "list<duration<ms>>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::List(Box::new(DataType::Duration(TimeUnit::Milliseconds)));
assert_eq!(schema, expected);
let dtype = "list<datetime<ms, *>>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::List(Box::new(DataType::Datetime(TimeUnit::Milliseconds, None)));
assert_eq!(schema, expected);
}
}