The ability to specify a schema when using dfr open and dfr into-df (#11634)

# Description

There are times where explicitly specifying a schema for a dataframe is
needed such as:
- Opening CSV and JSON lines files and needing provide more information
to polars to keep it from failing or in a desire to override default
type conversion
- When converting a nushell value to a dataframe and wanting to override
the default conversion behaviors.

This pull requests provides:
- A flag to allow specifying a schema when using dfr into-df
- A flag to allow specifying a schema when using dfr open that works for
CSV and JSON types
- A new command `dfr schema` which displays schema information and will
allow display support schema dtypes

Schema is specified creating a record that has the key value and the
dtype. Examples usages:

```
{a:1, b:{a:2}} | dfr into-df -s {a: u8, b: {a: i32}} | dfr schema
{a: 1, b: {a: [1 2 3]}, c: [a b c]} | dfr into-df -s {a: u8, b: {a: list<u64>}, c: list<str>} | dfr schema
 dfr open -s {pid: i32, ppid: i32, name: str, status: str, cpu: f64, mem: i64, virtual: i64} /tmp/ps.jsonl  | dfr schema
```

Supported dtypes:
null                                                   
bool                                                   
u8                                                     
u16                                                    
u32                                                    
u64                                                    
i8                                                     
i16                                                    
i32                                                    
i64                                                    
f32                                                    
f64                                                    
str                                                    
binary                                                 
date                                                   
datetime[time_unit: (ms, us, ns) timezone (optional)]  
duration[time_unit: (ms, us, ns)]                      
time                                                   
object                                                 
unknown                                                
list[dtype]


structs are also supported but are specified via another record:
{a: u8, b: {d: str}}

Another feature with the dfr schema command is that it returns the data
back in a format that can be passed to provide a valid schema that can
be passed in as schema argument:

<img width="638" alt="Screenshot 2024-01-29 at 10 23 58"
src="https://github.com/nushell/nushell/assets/56345/b49c3bff-5cda-4c86-975a-dfd91d991373">

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright 2024-01-29 11:26:04 -08:00 committed by GitHub
parent d03ad6a257
commit f879c00f9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
90 changed files with 2408 additions and 1277 deletions

View File

@ -37,7 +37,8 @@ impl Command for AppendDF {
example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df);
$a | dfr append $a"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
@ -54,7 +55,9 @@ impl Command for AppendDF {
"b_x".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -64,7 +67,8 @@ impl Command for AppendDF {
example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df);
$a | dfr append $a --col"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
@ -83,7 +87,9 @@ impl Command for AppendDF {
Value::test_int(4),
],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for DropDF {
description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr drop a",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -46,7 +46,8 @@ impl Command for DropDuplicates {
description: "drop duplicates",
example: "[[a b]; [1 2] [3 4] [1 2]] | dfr into-df | dfr drop-duplicates",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(3), Value::test_int(1)],
@ -55,7 +56,9 @@ impl Command for DropDuplicates {
"b".to_string(),
vec![Value::test_int(4), Value::test_int(2)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -43,7 +43,8 @@ impl Command for DropNulls {
let a = ($df | dfr with-column $res --name res);
$a | dfr drop-nulls"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(1)],
@ -56,7 +57,9 @@ impl Command for DropNulls {
"res".to_string(),
vec![Value::test_int(1), Value::test_int(1)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -66,7 +69,8 @@ impl Command for DropNulls {
example: r#"let s = ([1 2 0 0 3 4] | dfr into-df);
($s / $s) | dfr drop-nulls"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"div_0_0".to_string(),
vec![
Value::test_int(1),
@ -74,7 +78,9 @@ impl Command for DropNulls {
Value::test_int(1),
Value::test_int(1),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -31,7 +31,8 @@ impl Command for DataTypes {
description: "Dataframe dtypes",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr dtypes",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"column".to_string(),
vec![Value::test_string("a"), Value::test_string("b")],
@ -40,7 +41,9 @@ impl Command for DataTypes {
"dtype".to_string(),
vec![Value::test_string("i64"), Value::test_string("i64")],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -79,6 +82,7 @@ fn command(
.dtype();
let dtype_str = dtype.to_string();
dtypes.push(Value::string(dtype_str, call.head));
Value::string(*v, call.head)
@ -88,7 +92,7 @@ fn command(
let names_col = Column::new("column".to_string(), names);
let dtypes_col = Column::new("dtype".to_string(), dtypes);
NuDataFrame::try_from_columns(vec![names_col, dtypes_col])
NuDataFrame::try_from_columns(vec![names_col, dtypes_col], None)
.map(|df| PipelineData::Value(df.into_value(call.head), None))
}

View File

@ -43,10 +43,13 @@ impl Command for FilterWith {
example: r#"let mask = ([true false] | dfr into-df);
[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with $mask"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -55,10 +58,13 @@ impl Command for FilterWith {
description: "Filter dataframe using an expression",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with ((dfr col a) > 1)",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -44,10 +44,13 @@ impl Command for FirstDF {
description: "Return the first row of a dataframe",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
])
],
None,
)
.expect("should not fail")
.into_value(Span::test_data()),
),
@ -56,7 +59,8 @@ impl Command for FirstDF {
description: "Return the first two rows of a dataframe",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first 2",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
@ -65,7 +69,9 @@ impl Command for FirstDF {
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
],
None,
)
.expect("should not fail")
.into_value(Span::test_data()),
),

View File

@ -36,10 +36,13 @@ impl Command for GetDF {
description: "Returns the selected column",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr get a",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -40,10 +40,13 @@ impl Command for LastDF {
description: "Create new dataframe with last rows",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -106,7 +106,7 @@ impl Command for MeltDF {
Value::test_string("c"),
],
),
])
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -15,6 +15,7 @@ mod open;
mod query_df;
mod rename;
mod sample;
mod schema;
mod shape;
mod slice;
mod sql_context;
@ -49,6 +50,7 @@ pub use melt::MeltDF;
pub use query_df::QueryDf;
pub use rename::RenameDF;
pub use sample::SampleDF;
pub use schema::SchemaDF;
pub use shape::ShapeDF;
pub use slice::SliceDF;
pub use sql_context::SQLContext;
@ -93,6 +95,7 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
QueryDf,
RenameDF,
SampleDF,
SchemaDF,
ShapeDF,
SliceDF,
TakeDF,

View File

@ -1,3 +1,5 @@
use crate::dataframe::values::NuSchema;
use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
@ -70,6 +72,12 @@ impl Command for OpenDataFrame {
"Columns to be selected from csv file. CSV and Parquet file",
None,
)
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s')
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("dataframe".into()))
}
@ -305,10 +313,19 @@ fn from_json(
help: None,
inner: vec![],
})?;
let maybe_schema = call
.get_flag(engine_state, stack, "schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let buf_reader = BufReader::new(file);
let reader = JsonReader::new(buf_reader);
let reader = match maybe_schema {
Some(schema) => reader.with_schema(schema.into()),
None => reader,
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
@ -329,6 +346,10 @@ fn from_jsonl(
call: &Call,
) -> Result<Value, ShellError> {
let infer_schema: Option<usize> = call.get_flag(engine_state, stack, "infer-schema")?;
let maybe_schema = call
.get_flag(engine_state, stack, "schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let file = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
@ -343,6 +364,11 @@ fn from_jsonl(
.with_json_format(JsonFormat::JsonLines)
.infer_schema_len(infer_schema);
let reader = match maybe_schema {
Some(schema) => reader.with_schema(schema.into()),
None => reader,
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
@ -368,6 +394,11 @@ fn from_csv(
let skip_rows: Option<usize> = call.get_flag(engine_state, stack, "skip-rows")?;
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
let maybe_schema = call
.get_flag(engine_state, stack, "schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
if call.has_flag(engine_state, stack, "lazy")? {
let file: String = call.req(engine_state, stack, 0)?;
let csv_reader = LazyCsvReader::new(file);
@ -395,6 +426,11 @@ fn from_csv(
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema {
None => csv_reader,
Some(r) => csv_reader.with_infer_schema_length(Some(r)),
@ -452,6 +488,11 @@ fn from_csv(
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema {
None => csv_reader,
Some(r) => csv_reader.infer_schema(Some(r)),

View File

@ -44,10 +44,13 @@ impl Command for QueryDf {
description: "Query dataframe using SQL",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr query 'select a from df'",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -46,7 +46,8 @@ impl Command for RenameDF {
description: "Renames a series",
example: "[5 6 7 8] | dfr into-df | dfr rename '0' new_name",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"new_name".to_string(),
vec![
Value::test_int(5),
@ -54,7 +55,9 @@ impl Command for RenameDF {
Value::test_int(7),
Value::test_int(8),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -63,7 +66,8 @@ impl Command for RenameDF {
description: "Renames a dataframe column",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename a a_new",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
@ -72,7 +76,9 @@ impl Command for RenameDF {
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -81,7 +87,8 @@ impl Command for RenameDF {
description: "Renames two dataframe columns",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename [a b] [a_new b_new]",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
@ -90,7 +97,9 @@ impl Command for RenameDF {
"b_new".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -0,0 +1,119 @@
use super::super::values::NuDataFrame;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, Record, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct SchemaDF;
impl Command for SchemaDF {
fn name(&self) -> &str {
"dfr schema"
}
fn usage(&self) -> &str {
"Show schema for a dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("datatype-list", "creates a lazy dataframe", Some('l'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Dataframe schema",
example: r#"[[a b]; [1 "foo"] [3 "bar"]] | dfr into-df | dfr schema"#,
result: Some(Value::record(
Record::from_raw_cols_vals(
vec!["a".to_string(), "b".to_string()],
vec![
Value::string("i64", Span::test_data()),
Value::string("str", Span::test_data()),
],
),
Span::test_data(),
)),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
if call.has_flag(engine_state, stack, "datatype-list")? {
Ok(PipelineData::Value(datatype_list(Span::unknown()), None))
} else {
command(engine_state, stack, call, input)
}
}
}
fn command(
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let schema = df.schema();
let value: Value = schema.into();
Ok(PipelineData::Value(value, None))
}
fn datatype_list(span: Span) -> Value {
let types: Vec<Value> = [
("null", ""),
("bool", ""),
("u8", ""),
("u16", ""),
("u32", ""),
("u64", ""),
("i8", ""),
("i16", ""),
("i32", ""),
("i64", ""),
("f32", ""),
("f64", ""),
("str", ""),
("binary", ""),
("date", ""),
("datetime<time_unit: (ms, us, ns) timezone (optional)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns. Timezone wildcard is *. Other Timezone examples: UTC, America/Los_Angeles."),
("duration<time_unit: (ms, us, ns)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns."),
("time", ""),
("object", ""),
("unknown", ""),
("list<dtype>", ""),
]
.iter()
.map(|(dtype, note)| {
Value::record(Record::from_raw_cols_vals(
vec!["dtype".to_string(), "note".to_string()],
vec![Value::string(*dtype, span), Value::string(*note, span)],
),span)
})
.collect();
Value::list(types, span)
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(SchemaDF {})])
}
}

View File

@ -34,10 +34,13 @@ impl Command for ShapeDF {
description: "Shows row and column shape",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr shape",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("rows".to_string(), vec![Value::test_int(2)]),
Column::new("columns".to_string(), vec![Value::test_int(2)]),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -70,7 +73,7 @@ fn command(
let rows_col = Column::new("rows".to_string(), vec![rows]);
let cols_col = Column::new("columns".to_string(), vec![cols]);
NuDataFrame::try_from_columns(vec![rows_col, cols_col])
NuDataFrame::try_from_columns(vec![rows_col, cols_col], None)
.map(|df| PipelineData::Value(df.into_value(call.head), None))
}

View File

@ -37,10 +37,13 @@ impl Command for SliceDF {
description: "Create new dataframe from a slice of the rows",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr slice 0 1",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -46,7 +46,8 @@ impl Command for Summary {
description: "list dataframe descriptives",
example: "[[a b]; [1 1] [1 1]] | dfr into-df | dfr summary",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"descriptor".to_string(),
vec![
@ -92,7 +93,9 @@ impl Command for Summary {
Value::test_float(1.0),
],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -44,7 +44,8 @@ impl Command for TakeDF {
let indices = ([0 2] | dfr into-df);
$df | dfr take $indices"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(4), Value::test_int(4)],
@ -53,7 +54,9 @@ impl Command for TakeDF {
"b".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -64,10 +67,13 @@ impl Command for TakeDF {
let indices = ([0 2] | dfr into-df);
$series | dfr take $indices"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(4), Value::test_int(5)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -1,10 +1,14 @@
use crate::dataframe::values::NuSchema;
use super::super::values::{Column, NuDataFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::*;
#[derive(Clone)]
pub struct ToDataFrame;
@ -20,6 +24,12 @@ impl Command for ToDataFrame {
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s'),
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("dataframe".into()))
}
@ -30,7 +40,8 @@ impl Command for ToDataFrame {
description: "Takes a dictionary and creates a dataframe",
example: "[[a b];[1 2] [3 4]] | dfr into-df",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
@ -39,7 +50,9 @@ impl Command for ToDataFrame {
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -48,7 +61,8 @@ impl Command for ToDataFrame {
description: "Takes a list of tables and creates a dataframe",
example: "[[1 2 a] [3 4 b] [5 6 c]] | dfr into-df",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"0".to_string(),
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
@ -65,7 +79,9 @@ impl Command for ToDataFrame {
Value::test_string("c"),
],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -74,14 +90,17 @@ impl Command for ToDataFrame {
description: "Takes a list and creates a dataframe",
example: "[a b c] | dfr into-df",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -90,14 +109,41 @@ impl Command for ToDataFrame {
description: "Takes a list of booleans and creates a dataframe",
example: "[true true false] | dfr into-df",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Convert to a dataframe and provide a schema",
example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| dfr into-df -s {a: u8, b: {a: list<u64>}, c: list<str>}",
result: Some(
NuDataFrame::try_from_series(vec![
Series::new("a", &[1u8]),
{
let dtype = DataType::Struct(vec![Field::new("a", DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("a", &[1u64, 2, 3]))], vec![Field::new("a", DataType::String)]))); 1];
Series::from_any_values_and_dtype("b", &vals, &dtype, false)
.expect("Struct series should not fail")
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))];
Series::from_any_values_and_dtype("c", &vals, &dtype, false)
.expect("List series should not fail")
}
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -107,12 +153,17 @@ impl Command for ToDataFrame {
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
NuDataFrame::try_from_iter(input.into_iter())
let maybe_schema = call
.get_flag(engine_state, stack, "schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}
}

View File

@ -42,7 +42,8 @@ impl Command for WithColumn {
| dfr into-df
| dfr with-column ([5 6] | dfr into-df) --name c"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
@ -55,7 +56,9 @@ impl Command for WithColumn {
"c".to_string(),
vec![Value::test_int(5), Value::test_int(6)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -70,7 +73,8 @@ impl Command for WithColumn {
]
| dfr collect"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
@ -87,7 +91,9 @@ impl Command for WithColumn {
"d".to_string(),
vec![Value::test_int(3), Value::test_int(9)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -32,10 +32,13 @@ impl Command for ExprArgWhere {
example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df);
$df | dfr select (dfr arg-where ((dfr col b) >= 2) | dfr as b_arg)",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"b_arg".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -41,7 +41,8 @@ impl Command for ExprConcatStr {
example: r#"let df = ([[a b c]; [one two 1] [three four 2]] | dfr into-df);
$df | dfr with-column ((dfr concat-str "-" [(dfr col a) (dfr col b) ((dfr col c) * 2)]) | dfr as concat)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("three")],
@ -61,7 +62,9 @@ impl Command for ExprConcatStr {
Value::test_string("three-four-4"),
],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -52,10 +52,13 @@ impl Command for ExprDatePart {
description: "Creates an expression to capture the year date part",
example: r#"[["2021-12-30T01:02:03.123456789"]] | dfr into-df | dfr as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | dfr with-column [(dfr col datetime | dfr datepart year | dfr as datetime_year )]"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("datetime".to_string(), vec![Value::test_date(dt)]),
Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -407,10 +407,13 @@ lazy_expr_command!(
description: "Max value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr max",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(6)],),
Column::new("b".to_string(), vec![Value::test_int(4)],),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -422,7 +425,8 @@ lazy_expr_command!(
| dfr group-by a
| dfr agg (dfr col b | dfr max)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
@ -431,7 +435,9 @@ lazy_expr_command!(
"b".to_string(),
vec![Value::test_int(4), Value::test_int(1)],
),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -452,10 +458,13 @@ lazy_expr_command!(
description: "Min value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr min",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)],),
Column::new("b".to_string(), vec![Value::test_int(1)],),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -467,7 +476,8 @@ lazy_expr_command!(
| dfr group-by a
| dfr agg (dfr col b | dfr min)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
@ -476,7 +486,9 @@ lazy_expr_command!(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(1)],
),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -497,10 +509,13 @@ lazy_expr_command!(
description: "Sums all columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sum",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(11)],),
Column::new("b".to_string(), vec![Value::test_int(7)],),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -512,7 +527,8 @@ lazy_expr_command!(
| dfr group-by a
| dfr agg (dfr col b | dfr sum)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
@ -521,7 +537,9 @@ lazy_expr_command!(
"b".to_string(),
vec![Value::test_int(6), Value::test_int(1)],
),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -542,10 +560,13 @@ lazy_expr_command!(
description: "Mean value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr mean",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -557,7 +578,8 @@ lazy_expr_command!(
| dfr group-by a
| dfr agg (dfr col b | dfr mean)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
@ -566,7 +588,9 @@ lazy_expr_command!(
"b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)],
),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -589,7 +613,8 @@ expr_command!(
| dfr group-by a
| dfr agg (dfr col b | dfr median)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
@ -598,7 +623,9 @@ expr_command!(
"b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)],
),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -618,10 +645,13 @@ lazy_expr_command!(
description: "Std value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr std",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(2.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -633,7 +663,8 @@ lazy_expr_command!(
| dfr group-by a
| dfr agg (dfr col b | dfr std)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
@ -642,7 +673,9 @@ lazy_expr_command!(
"b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)],
),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -665,10 +698,13 @@ lazy_expr_command!(
"Var value from columns in a dataframe or aggregates columns to their var value",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr var",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -680,7 +716,8 @@ lazy_expr_command!(
| dfr group-by a
| dfr agg (dfr col b | dfr var)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
@ -689,7 +726,9 @@ lazy_expr_command!(
"b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)],
),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -39,7 +39,8 @@ impl Command for ExprIsIn {
example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df);
$df | dfr with-column (dfr col a | dfr is-in [one two] | dfr as a_in)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
@ -60,7 +61,9 @@ impl Command for ExprIsIn {
Value::test_bool(false),
],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -81,7 +84,8 @@ impl Command for ExprIsIn {
let list: Vec<Value> = call.req(engine_state, stack, 0)?;
let expr = NuExpression::try_from_pipeline(input, call.head)?;
let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)])?;
let values =
NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)], None)?;
let list = values.as_series(call.head)?;
if matches!(list.dtype(), DataType::Object(..)) {

View File

@ -54,7 +54,8 @@ impl Command for ExprOtherwise {
)
| dfr collect"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
@ -71,7 +72,9 @@ impl Command for ExprOtherwise {
"d".to_string(),
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -41,7 +41,8 @@ impl Command for ExprQuantile {
| dfr group-by a
| dfr agg (dfr col b | dfr quantile 0.5)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
@ -50,7 +51,9 @@ impl Command for ExprQuantile {
"b".to_string(),
vec![Value::test_float(4.0), Value::test_float(1.0)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -62,7 +62,8 @@ impl Command for ExprWhen {
)
| dfr collect"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
@ -79,7 +80,9 @@ impl Command for ExprWhen {
"d".to_string(),
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -47,7 +47,8 @@ impl Command for LazyAggregate {
(dfr col b | dfr sum | dfr as "b_sum")
]"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
@ -64,7 +65,9 @@ impl Command for LazyAggregate {
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -81,7 +84,8 @@ impl Command for LazyAggregate {
]
| dfr collect"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
@ -98,7 +102,9 @@ impl Command for LazyAggregate {
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -33,7 +33,8 @@ impl Command for LazyCollect {
description: "drop duplicates",
example: "[[a b]; [1 2] [3 4]] | dfr into-lazy | dfr collect",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
@ -42,7 +43,9 @@ impl Command for LazyCollect {
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -69,7 +69,7 @@ impl Command for LazyExplode {
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
]).expect("simple df for test should not fail")
], None).expect("simple df for test should not fail")
.into_value(Span::test_data()),
)
},
@ -86,7 +86,7 @@ impl Command for LazyExplode {
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
]).expect("simple df for test should not fail")
], None).expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},

View File

@ -38,7 +38,8 @@ impl Command for LazyFetch {
description: "Fetch a rows from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr fetch 2",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
@ -47,7 +48,9 @@ impl Command for LazyFetch {
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -38,7 +38,8 @@ impl Command for LazyFillNA {
description: "Fills the NaN values with 0",
example: "[1 2 NaN 3 NaN] | dfr into-df | dfr fill-nan 0",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(1),
@ -47,7 +48,9 @@ impl Command for LazyFillNA {
Value::test_int(3),
Value::test_int(0),
],
)])
)],
None,
)
.expect("Df for test should not fail")
.into_value(Span::test_data()),
),
@ -56,7 +59,8 @@ impl Command for LazyFillNA {
description: "Fills the NaN values of a whole dataframe",
example: "[[a b]; [0.2 1] [0.1 NaN]] | dfr into-df | dfr fill-nan 0",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_float(0.2), Value::test_float(0.1)],
@ -65,7 +69,9 @@ impl Command for LazyFillNA {
"b".to_string(),
vec![Value::test_int(1), Value::test_int(0)],
),
])
],
None,
)
.expect("Df for test should not fail")
.into_value(Span::test_data()),
),
@ -123,7 +129,7 @@ impl Command for LazyFillNA {
})
.collect::<Vec<Column>>();
Ok(PipelineData::Value(
NuDataFrame::try_from_columns(dataframe)?.into_value(call.head),
NuDataFrame::try_from_columns(dataframe, None)?.into_value(call.head),
None,
))
}

View File

@ -37,7 +37,8 @@ impl Command for LazyFillNull {
description: "Fills the null values by 0",
example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr fill-null 0",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(0),
@ -46,7 +47,9 @@ impl Command for LazyFillNull {
Value::test_int(2),
Value::test_int(2),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -38,7 +38,8 @@ impl Command for LazyFilter {
description: "Filter dataframe using an expression",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr filter ((dfr col a) >= 4)",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
@ -47,7 +48,9 @@ impl Command for LazyFilter {
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -71,7 +71,7 @@ Example {
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
]).expect("simple df for test should not fail")
], None).expect("simple df for test should not fail")
.into_value(Span::test_data()),
)
},
@ -88,7 +88,7 @@ Example {
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
]).expect("simple df for test should not fail")
], None).expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},

View File

@ -46,7 +46,8 @@ impl Command for ToLazyGroupBy {
(dfr col b | dfr sum | dfr as "b_sum")
]"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
@ -63,7 +64,9 @@ impl Command for ToLazyGroupBy {
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -80,7 +83,8 @@ impl Command for ToLazyGroupBy {
]
| dfr collect"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
@ -97,7 +101,9 @@ impl Command for ToLazyGroupBy {
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -53,7 +53,8 @@ impl Command for LazyJoin {
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy);
$df_a | dfr join $df_b a foo | dfr collect"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
@ -99,7 +100,9 @@ impl Command for LazyJoin {
Value::test_string("let"),
],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -110,7 +113,8 @@ impl Command for LazyJoin {
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy);
$df_a | dfr join $df_b a foo"#,
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
@ -156,7 +160,9 @@ impl Command for LazyJoin {
Value::test_string("let"),
],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -188,7 +188,8 @@ lazy_command!(
description: "Reverses the dataframe.",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr reverse",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),],
@ -197,7 +198,9 @@ lazy_command!(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),],
),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -231,10 +234,13 @@ lazy_command!(
description: "Median value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr median",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
])
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -38,10 +38,13 @@ impl Command for LazyQuantile {
description: "quantile value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr quantile 0.5",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -37,10 +37,13 @@ impl Command for LazySelect {
description: "Select a column from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr select a",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -60,7 +60,7 @@ impl Command for LazySortBy {
"b".to_string(),
vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)],
),
])
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -89,7 +89,7 @@ impl Command for LazySortBy {
Value::test_int(2),
],
),
])
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -1,9 +1,12 @@
use crate::dataframe::values::NuSchema;
use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Type, Value,
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Type, Value,
};
#[derive(Clone)]
@ -20,6 +23,12 @@ impl Command for ToLazyFrame {
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s'),
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("lazyframe".into()))
}
@ -34,12 +43,17 @@ impl Command for ToLazyFrame {
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_iter(input.into_iter())?;
let maybe_schema = call
.get_flag(engine_state, stack, "schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let df = NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)?;
let lazy = NuLazyFrame::from_dataframe(df);
let value = Value::custom_value(Box::new(lazy), call.head);

View File

@ -33,10 +33,13 @@ impl Command for AllFalse {
description: "Returns true if all values are false",
example: "[false false false] | dfr into-df | dfr all-false",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_false".to_string(),
vec![Value::test_bool(true)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -47,10 +50,13 @@ impl Command for AllFalse {
let res = ($s > 9);
$res | dfr all-false"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_false".to_string(),
vec![Value::test_bool(false)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -88,7 +94,10 @@ fn command(
let value = Value::bool(!bool.any(), call.head);
NuDataFrame::try_from_columns(vec![Column::new("all_false".to_string(), vec![value])])
NuDataFrame::try_from_columns(
vec![Column::new("all_false".to_string(), vec![value])],
None,
)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}

View File

@ -33,10 +33,13 @@ impl Command for AllTrue {
description: "Returns true if all values are true",
example: "[true true true] | dfr into-df | dfr all-true",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_true".to_string(),
vec![Value::test_bool(true)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -47,10 +50,13 @@ impl Command for AllTrue {
let res = ($s > 9);
$res | dfr all-true"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_true".to_string(),
vec![Value::test_bool(false)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -88,7 +94,7 @@ fn command(
let value = Value::bool(bool.all(), call.head);
NuDataFrame::try_from_columns(vec![Column::new("all_true".to_string(), vec![value])])
NuDataFrame::try_from_columns(vec![Column::new("all_true".to_string(), vec![value])], None)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}

View File

@ -37,10 +37,10 @@ impl Command for ArgMax {
description: "Returns index for max value",
example: "[1 3 2] | dfr into-df | dfr arg-max",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"arg_max".to_string(),
vec![Value::test_int(1)],
)])
NuDataFrame::try_from_columns(
vec![Column::new("arg_max".to_string(), vec![Value::test_int(1)])],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -37,10 +37,10 @@ impl Command for ArgMin {
description: "Returns index for min value",
example: "[1 3 2] | dfr into-df | dfr arg-min",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"arg_min".to_string(),
vec![Value::test_int(0)],
)])
NuDataFrame::try_from_columns(
vec![Column::new("arg_min".to_string(), vec![Value::test_int(0)])],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -69,7 +69,8 @@ impl Command for Cumulative {
description: "Cumulative sum for a series",
example: "[1 2 3 4 5] | dfr into-df | dfr cumulative sum",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0_cumulative_sum".to_string(),
vec![
Value::test_int(1),
@ -78,7 +79,9 @@ impl Command for Cumulative {
Value::test_int(10),
Value::test_int(15),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -53,7 +53,8 @@ impl Command for AsDateTime {
description: "Converts string to datetime",
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"datetime".to_string(),
vec![
Value::date(
@ -73,7 +74,9 @@ impl Command for AsDateTime {
Span::test_data(),
),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -82,7 +85,8 @@ impl Command for AsDateTime {
description: "Converts string to datetime with high resolutions",
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S.%9f""#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"datetime".to_string(),
vec![
Value::date(
@ -102,7 +106,9 @@ impl Command for AsDateTime {
Span::test_data(),
),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for GetDay {
let df = ([$dt $dt] | dfr into-df);
$df | dfr get-day"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(4), Value::test_int(4)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for GetHour {
let df = ([$dt $dt] | dfr into-df);
$df | dfr get-hour"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(16), Value::test_int(16)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for GetMinute {
let df = ([$dt $dt] | dfr into-df);
$df | dfr get-minute"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(39), Value::test_int(39)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for GetMonth {
let df = ([$dt $dt] | dfr into-df);
$df | dfr get-month"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(8), Value::test_int(8)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for GetNanosecond {
let df = ([$dt $dt] | dfr into-df);
$df | dfr get-nanosecond"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(0), Value::test_int(0)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for GetOrdinal {
let df = ([$dt $dt] | dfr into-df);
$df | dfr get-ordinal"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(217), Value::test_int(217)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for GetSecond {
let df = ([$dt $dt] | dfr into-df);
$df | dfr get-second"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(18), Value::test_int(18)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for GetWeek {
let df = ([$dt $dt] | dfr into-df);
$df | dfr get-week"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(32), Value::test_int(32)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for GetWeekDay {
let df = ([$dt $dt] | dfr into-df);
$df | dfr get-weekday"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -35,10 +35,13 @@ impl Command for GetYear {
let df = ([$dt $dt] | dfr into-df);
$df | dfr get-year"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(2020), Value::test_int(2020)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -46,7 +46,8 @@ impl Command for ArgSort {
description: "Returns indexes for a sorted series",
example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_sort".to_string(),
vec![
Value::test_int(0),
@ -55,7 +56,9 @@ impl Command for ArgSort {
Value::test_int(3),
Value::test_int(4),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -64,7 +67,8 @@ impl Command for ArgSort {
description: "Returns indexes for a sorted series",
example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort --reverse",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_sort".to_string(),
vec![
Value::test_int(3),
@ -73,7 +77,9 @@ impl Command for ArgSort {
Value::test_int(2),
Value::test_int(0),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -37,10 +37,13 @@ impl Command for ArgTrue {
description: "Returns indexes where values are true",
example: "[false true false] | dfr into-df | dfr arg-true",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_true".to_string(),
vec![Value::test_int(1)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -37,10 +37,13 @@ impl Command for ArgUnique {
description: "Returns indexes for unique values",
example: "[1 2 2 3 3] | dfr into-df | dfr arg-unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_unique".to_string(),
vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -43,7 +43,8 @@ impl Command for SetWithIndex {
let indices = ([0 2] | dfr into-df);
$series | dfr set-with-idx 6 --indices $indices"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(6),
@ -53,7 +54,9 @@ impl Command for SetWithIndex {
Value::test_int(4),
Value::test_int(3),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -34,7 +34,8 @@ impl Command for IsDuplicated {
description: "Create mask indicating duplicated values",
example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-duplicated",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_duplicated".to_string(),
vec![
Value::test_bool(false),
@ -45,7 +46,9 @@ impl Command for IsDuplicated {
Value::test_bool(true),
Value::test_bool(true),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -55,7 +58,8 @@ impl Command for IsDuplicated {
example:
"[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-duplicated",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_duplicated".to_string(),
vec![
Value::test_bool(true),
@ -64,7 +68,9 @@ impl Command for IsDuplicated {
Value::test_bool(true),
Value::test_bool(false),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -36,7 +36,8 @@ impl Command for IsIn {
example: r#"let other = ([1 3 6] | dfr into-df);
[5 6 6 6 8 8 8] | dfr into-df | dfr is-in $other"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_in".to_string(),
vec![
Value::test_bool(false),
@ -47,7 +48,9 @@ impl Command for IsIn {
Value::test_bool(false),
Value::test_bool(false),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -41,7 +41,8 @@ impl Command for IsNotNull {
let res = ($s / $s);
$res | dfr is-not-null"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_not_null".to_string(),
vec![
Value::test_bool(true),
@ -49,7 +50,9 @@ impl Command for IsNotNull {
Value::test_bool(false),
Value::test_bool(true),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -41,7 +41,8 @@ impl Command for IsNull {
let res = ($s / $s);
$res | dfr is-null"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_null".to_string(),
vec![
Value::test_bool(false),
@ -49,7 +50,9 @@ impl Command for IsNull {
Value::test_bool(true),
Value::test_bool(false),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -34,7 +34,8 @@ impl Command for IsUnique {
description: "Create mask indicating unique values",
example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_unique".to_string(),
vec![
Value::test_bool(true),
@ -45,7 +46,9 @@ impl Command for IsUnique {
Value::test_bool(false),
Value::test_bool(false),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -54,7 +57,8 @@ impl Command for IsUnique {
description: "Create mask indicating duplicated rows in a dataframe",
example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_unique".to_string(),
vec![
Value::test_bool(false),
@ -63,7 +67,9 @@ impl Command for IsUnique {
Value::test_bool(false),
Value::test_bool(true),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -34,14 +34,17 @@ impl Command for NotSeries {
description: "Inverts boolean mask",
example: "[true false true] | dfr into-df | dfr not",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(false),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -43,7 +43,8 @@ impl Command for SetSeries {
let mask = ($s | dfr is-null);
$s | dfr set 0 --mask $mask"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(0),
@ -52,7 +53,9 @@ impl Command for SetSeries {
Value::test_int(2),
Value::test_int(2),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -33,10 +33,13 @@ impl Command for NNull {
example: r#"let s = ([1 1 0 0 3 3 4] | dfr into-df);
($s / $s) | dfr count-null"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"count_null".to_string(),
vec![Value::test_int(2)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -65,7 +68,10 @@ fn command(
let res = df.as_series(call.head)?.null_count();
let value = Value::int(res as i64, call.head);
NuDataFrame::try_from_columns(vec![Column::new("count_null".to_string(), vec![value])])
NuDataFrame::try_from_columns(
vec![Column::new("count_null".to_string(), vec![value])],
None,
)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}

View File

@ -38,10 +38,13 @@ impl Command for NUnique {
description: "Counts unique values",
example: "[1 1 2 2 3 3 4] | dfr into-df | dfr n-unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"count_unique".to_string(),
vec![Value::test_int(4)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -96,7 +99,10 @@ fn command(
let value = Value::int(res as i64, call.head);
NuDataFrame::try_from_columns(vec![Column::new("count_unique".to_string(), vec![value])])
NuDataFrame::try_from_columns(
vec![Column::new("count_unique".to_string(), vec![value])],
None,
)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}

View File

@ -72,7 +72,8 @@ impl Command for Rolling {
description: "Rolling sum for a series",
example: "[1 2 3 4 5] | dfr into-df | dfr rolling sum 2 | dfr drop-nulls",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0_rolling_sum".to_string(),
vec![
Value::test_int(3),
@ -80,7 +81,9 @@ impl Command for Rolling {
Value::test_int(7),
Value::test_int(9),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
@ -89,7 +92,8 @@ impl Command for Rolling {
description: "Rolling max for a series",
example: "[1 2 3 4 5] | dfr into-df | dfr rolling max 2 | dfr drop-nulls",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0_rolling_max".to_string(),
vec![
Value::test_int(2),
@ -97,7 +101,9 @@ impl Command for Rolling {
Value::test_int(4),
Value::test_int(5),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -44,10 +44,13 @@ impl Command for Shift {
description: "Shifts the values by a given period",
example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr drop-nulls",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(1), Value::test_int(2), Value::test_int(2)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -40,14 +40,17 @@ impl Command for Concatenate {
example: r#"let other = ([za xs cd] | dfr into-df);
[abc abc abc] | dfr into-df | dfr concatenate $other"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("abcza"),
Value::test_string("abcxs"),
Value::test_string("abccd"),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -39,14 +39,17 @@ impl Command for Contains {
description: "Returns boolean indicating if pattern was found",
example: "[abc acb acb] | dfr into-df | dfr contains ab",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(false),
Value::test_bool(false),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -46,14 +46,17 @@ impl Command for Replace {
description: "Replaces string",
example: "[abc abc abc] | dfr into-df | dfr replace --pattern ab --replace AB",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("ABc"),
Value::test_string("ABc"),
Value::test_string("ABc"),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -46,14 +46,17 @@ impl Command for ReplaceAll {
description: "Replaces string",
example: "[abac abac abac] | dfr into-df | dfr replace-all --pattern a --replace A",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("AbAc"),
Value::test_string("AbAc"),
Value::test_string("AbAc"),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -33,10 +33,13 @@ impl Command for StrLengths {
description: "Returns string lengths",
example: "[a ab abc] | dfr into-df | dfr str-lengths",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -36,14 +36,17 @@ impl Command for StrSlice {
description: "Creates slices from the strings",
example: "[abcded abc321 abc123] | dfr into-df | dfr str-slice 1 --length 2",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("bc"),
Value::test_string("bc"),
Value::test_string("bc"),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -37,13 +37,16 @@ impl Command for StrFTime {
let df = ([$dt $dt] | dfr into-df);
$df | dfr strftime "%Y/%m/%d""#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("2020/08/04"),
Value::test_string("2020/08/04"),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -33,14 +33,17 @@ impl Command for ToLowerCase {
description: "Modifies strings to lowercase",
example: "[Abc aBc abC] | dfr into-df | dfr lowercase",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("abc"),
Value::test_string("abc"),
Value::test_string("abc"),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -37,14 +37,17 @@ impl Command for ToUpperCase {
description: "Modifies strings to uppercase",
example: "[Abc aBc abC] | dfr into-df | dfr uppercase",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("ABC"),
Value::test_string("ABC"),
Value::test_string("ABC"),
],
)])
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -53,10 +53,10 @@ impl Command for Unique {
description: "Returns unique values from a series",
example: "[2 2 2 2 2] | dfr into-df | dfr unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![Value::test_int(2)],
)])
NuDataFrame::try_from_columns(
vec![Column::new("0".to_string(), vec![Value::test_int(2)])],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -34,7 +34,8 @@ impl Command for ValueCount {
description: "Calculates value counts",
example: "[5 5 5 5 6 6] | dfr into-df | dfr value-counts",
result: Some(
NuDataFrame::try_from_columns(vec![
NuDataFrame::try_from_columns(
vec![
Column::new(
"0".to_string(),
vec![Value::test_int(5), Value::test_int(6)],
@ -43,7 +44,9 @@ impl Command for ValueCount {
"count".to_string(),
vec![Value::test_int(4), Value::test_int(2)],
),
])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),

View File

@ -2,6 +2,7 @@ mod nu_dataframe;
mod nu_expression;
mod nu_lazyframe;
mod nu_lazygroupby;
mod nu_schema;
mod nu_when;
pub mod utils;
@ -9,4 +10,5 @@ pub use nu_dataframe::{Axis, Column, NuDataFrame};
pub use nu_expression::NuExpression;
pub use nu_lazyframe::NuLazyFrame;
pub use nu_lazygroupby::NuLazyGroupBy;
pub use nu_schema::NuSchema;
pub use nu_when::NuWhen;

View File

@ -9,14 +9,17 @@ use polars::chunked_array::ChunkedArray;
use polars::datatypes::AnyValue;
use polars::export::arrow::Either;
use polars::prelude::{
DataFrame, DataType, DatetimeChunked, Float64Type, Int64Type, IntoSeries,
ListBooleanChunkedBuilder, ListBuilderTrait, ListPrimitiveChunkedBuilder,
ListStringChunkedBuilder, ListType, NamedFrom, NewChunkedArray, ObjectType, Series,
TemporalMethods, TimeUnit,
DataFrame, DataType, DatetimeChunked, Float32Type, Float64Type, Int16Type, Int32Type,
Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder, ListBuilderTrait,
ListPrimitiveChunkedBuilder, ListStringChunkedBuilder, ListType, NamedFrom, NewChunkedArray,
ObjectType, Schema, Series, StructChunked, TemporalMethods, TimeUnit, UInt16Type, UInt32Type,
UInt64Type, UInt8Type,
};
use nu_protocol::{Record, ShellError, Span, Value};
use crate::dataframe::values::NuSchema;
use super::{DataFrameValue, NuDataFrame};
const NANOS_PER_DAY: i64 = 86_400_000_000_000;
@ -28,6 +31,39 @@ const NANOS_PER_DAY: i64 = 86_400_000_000_000;
// practical reasons (~ a few thousand rows).
const VALUES_CAPACITY: usize = 10;
macro_rules! value_to_primitive {
($value:ident, u8) => {
$value.as_i64().map(|v| v as u8)
};
($value:ident, u16) => {
$value.as_i64().map(|v| v as u16)
};
($value:ident, u32) => {
$value.as_i64().map(|v| v as u32)
};
($value:ident, u64) => {
$value.as_i64().map(|v| v as u64)
};
($value:ident, i8) => {
$value.as_i64().map(|v| v as i8)
};
($value:ident, i16) => {
$value.as_i64().map(|v| v as i16)
};
($value:ident, i32) => {
$value.as_i64().map(|v| v as i32)
};
($value:ident, i64) => {
$value.as_i64()
};
($value:ident, f32) => {
$value.as_f64().map(|v| v as f32)
};
($value:ident, f64) => {
$value.as_f64()
};
}
#[derive(Debug)]
pub struct Column {
name: String,
@ -74,23 +110,10 @@ impl DerefMut for Column {
}
}
#[derive(Debug)]
pub enum InputType {
Integer,
Float,
String,
Boolean,
Object,
Date,
Duration,
Filesize,
List(Box<InputType>),
}
#[derive(Debug)]
pub struct TypedColumn {
column: Column,
column_type: Option<InputType>,
column_type: Option<DataType>,
}
impl TypedColumn {
@ -144,9 +167,13 @@ pub fn add_separator(values: &mut Vec<Value>, df: &DataFrame, span: Span) {
}
// Inserting the values found in a Value::List or Value::Record
pub fn insert_record(column_values: &mut ColumnMap, record: Record) -> Result<(), ShellError> {
pub fn insert_record(
column_values: &mut ColumnMap,
record: Record,
maybe_schema: &Option<NuSchema>,
) -> Result<(), ShellError> {
for (col, value) in record {
insert_value(value, col, column_values)?;
insert_value(value, col, column_values, maybe_schema)?;
}
Ok(())
@ -156,16 +183,26 @@ pub fn insert_value(
value: Value,
key: String,
column_values: &mut ColumnMap,
maybe_schema: &Option<NuSchema>,
) -> Result<(), ShellError> {
let col_val = match column_values.entry(key.clone()) {
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)),
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key.clone())),
Entry::Occupied(entry) => entry.into_mut(),
};
// Checking that the type for the value is the same
// for the previous value in the column
if col_val.values.is_empty() {
col_val.column_type = Some(value_to_input_type(&value));
if let Some(schema) = maybe_schema {
if let Some(field) = schema.schema.get_field(&key) {
col_val.column_type = Some(field.data_type().clone());
}
}
if col_val.column_type.is_none() {
col_val.column_type = Some(value_to_data_type(&value));
}
col_val.values.push(value);
} else {
let prev_value = &col_val.values[col_val.values.len() - 1];
@ -179,11 +216,11 @@ pub fn insert_value(
| (Value::Filesize { .. }, Value::Filesize { .. })
| (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value),
(Value::List { .. }, _) => {
col_val.column_type = Some(value_to_input_type(&value));
col_val.column_type = Some(value_to_data_type(&value));
col_val.values.push(value);
}
_ => {
col_val.column_type = Some(InputType::Object);
col_val.column_type = Some(DataType::Object("Value", None));
col_val.values.push(value);
}
}
@ -192,15 +229,15 @@ pub fn insert_value(
Ok(())
}
fn value_to_input_type(value: &Value) -> InputType {
fn value_to_data_type(value: &Value) -> DataType {
match &value {
Value::Int { .. } => InputType::Integer,
Value::Float { .. } => InputType::Float,
Value::String { .. } => InputType::String,
Value::Bool { .. } => InputType::Boolean,
Value::Date { .. } => InputType::Date,
Value::Duration { .. } => InputType::Duration,
Value::Filesize { .. } => InputType::Filesize,
Value::Int { .. } => DataType::Int64,
Value::Float { .. } => DataType::Float64,
Value::String { .. } => DataType::String,
Value::Bool { .. } => DataType::Boolean,
Value::Date { .. } => DataType::Date,
Value::Duration { .. } => DataType::Duration(TimeUnit::Nanoseconds),
Value::Filesize { .. } => DataType::Int64,
Value::List { vals, .. } => {
// We need to determined the type inside of the list.
// Since Value::List does not have any kind of
@ -211,13 +248,213 @@ fn value_to_input_type(value: &Value) -> InputType {
let list_type = vals
.iter()
.filter(|v| !matches!(v, Value::Nothing { .. }))
.map(value_to_input_type)
.map(value_to_data_type)
.nth(1)
.unwrap_or(InputType::Object);
.unwrap_or(DataType::Object("Value", None));
InputType::List(Box::new(list_type))
DataType::List(Box::new(list_type))
}
_ => InputType::Object,
_ => DataType::Object("Value", None),
}
}
fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, ShellError> {
if let Some(column_type) = &column.column_type {
match column_type {
DataType::Float32 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_f64().map(|v| v as f32))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::Float64 => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
Ok(Series::new(name, series_values?))
}
DataType::UInt8 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as u8))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::UInt16 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as u16))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::UInt32 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as u32))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::UInt64 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as u64))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::Int8 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as i8))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::Int16 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as i16))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::Int32 => {
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| v as i32))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::Int64 => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_i64()).collect();
Ok(Series::new(name, series_values?))
}
DataType::Boolean => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_bool()).collect();
Ok(Series::new(name, series_values?))
}
DataType::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
Ok(Series::new(name, series_values?))
}
DataType::Object(_, _) => value_to_series(name, &column.values),
DataType::Duration(time_unit) => {
//todo - finish type conversion
let series_values: Result<Vec<_>, _> = column
.values
.iter()
.map(|v| v.as_i64().map(|v| nanos_from_timeunit(v, *time_unit)))
.collect();
Ok(Series::new(name, series_values?))
}
DataType::List(list_type) => {
match input_type_list_to_series(name, list_type.as_ref(), &column.values) {
Ok(series) => Ok(series),
Err(_) => {
// An error case will occur when there are lists of mixed types.
// If this happens, fallback to object list
input_type_list_to_series(
name,
&DataType::Object("unknown", None),
&column.values,
)
}
}
}
DataType::Date => {
let it = column.values.iter().map(|v| {
if let Value::Date { val, .. } = &v {
Some(val.timestamp_nanos_opt().unwrap_or_default())
} else {
None
}
});
let res: DatetimeChunked = ChunkedArray::<Int64Type>::from_iter_options(name, it)
.into_datetime(TimeUnit::Nanoseconds, None);
Ok(res.into_series())
}
DataType::Datetime(tu, maybe_tz) => {
let dates = column
.values
.iter()
.map(|v| {
if let Value::Date { val, .. } = &v {
// If there is a timezone specified, make sure
// the value is converted to it
Ok(maybe_tz
.as_ref()
.map(|tz| tz.parse::<Tz>().map(|tz| val.with_timezone(&tz)))
.transpose()
.map_err(|e| ShellError::GenericError {
error: "Error parsing timezone".into(),
msg: "".into(),
span: None,
help: Some(e.to_string()),
inner: vec![],
})?
.and_then(|dt| dt.timestamp_nanos_opt())
.map(|nanos| nanos_from_timeunit(nanos, *tu)))
} else {
Ok(None)
}
})
.collect::<Result<Vec<Option<i64>>, ShellError>>()?;
let res: DatetimeChunked =
ChunkedArray::<Int64Type>::from_iter_options(name, dates.into_iter())
.into_datetime(*tu, maybe_tz.clone());
Ok(res.into_series())
}
DataType::Struct(fields) => {
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone())));
let mut structs: Vec<Series> = Vec::new();
for v in column.values.iter() {
let mut column_values: ColumnMap = IndexMap::new();
let record = v.as_record()?;
insert_record(&mut column_values, record.clone(), &schema)?;
let df = from_parsed_columns(column_values)?;
structs.push(df.as_series(Span::unknown())?);
}
let chunked = StructChunked::new(column.name(), structs.as_ref()).map_err(|e| {
ShellError::GenericError {
error: format!("Error creating struct: {e}"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
}
})?;
Ok(chunked.into_series())
}
_ => Err(ShellError::GenericError {
error: format!("Error creating dataframe: Unsupported type: {column_type:?}"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
}),
}
} else {
Err(ShellError::GenericError {
error: "Passed a type column with no type".into(),
msg: "".into(),
span: None,
help: None,
inner: vec![],
})
}
}
@ -227,80 +464,22 @@ fn value_to_input_type(value: &Value) -> InputType {
pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, ShellError> {
let mut df_series: Vec<Series> = Vec::new();
for (name, column) in column_values {
if let Some(column_type) = &column.column_type {
match column_type {
InputType::Float => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Integer | InputType::Filesize | InputType::Duration => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_i64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Boolean => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_bool()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Object => {
df_series.push(input_type_object_to_series(&name, &column.values)?)
}
InputType::List(list_type) => {
match input_type_list_to_series(&name, list_type.as_ref(), &column.values) {
Ok(series) => df_series.push(series),
Err(_) => {
// An error case will occur when there are lists of mixed types.
// If this happens, fallback to object list
df_series.push(input_type_list_to_series(
&name,
&InputType::Object,
&column.values,
)?)
}
}
}
InputType::Date => {
let it = column.values.iter().map(|v| {
if let Value::Date { val, .. } = &v {
Some(val.timestamp_nanos_opt().unwrap_or_default())
} else {
None
}
});
let res: DatetimeChunked =
ChunkedArray::<Int64Type>::from_iter_options(&name, it)
.into_datetime(TimeUnit::Nanoseconds, None);
df_series.push(res.into_series())
}
}
}
let series = typed_column_to_series(&name, column)?;
df_series.push(series);
}
DataFrame::new(df_series)
.map(|df| NuDataFrame::new(false, df))
.map_err(|e| ShellError::GenericError {
error: "Error creating dataframe".into(),
msg: "".into(),
msg: e.to_string(),
span: None,
help: Some(e.to_string()),
help: None,
inner: vec![],
})
}
fn input_type_object_to_series(name: &str, values: &[Value]) -> Result<Series, ShellError> {
fn value_to_series(name: &str, values: &[Value]) -> Result<Series, ShellError> {
let mut builder = ObjectChunkedBuilder::<DataFrameValue>::new(name, values.len());
for v in values {
@ -313,21 +492,45 @@ fn input_type_object_to_series(name: &str, values: &[Value]) -> Result<Series, S
fn input_type_list_to_series(
name: &str,
list_type: &InputType,
data_type: &DataType,
values: &[Value],
) -> Result<Series, ShellError> {
let inconsistent_error = |_| ShellError::GenericError {
error: format!(
"column {name} contains a list with inconsistent types: Expecting: {list_type:?}"
"column {name} contains a list with inconsistent types: Expecting: {data_type:?}"
),
msg: "".into(),
span: None,
help: None,
inner: vec![],
};
match *list_type {
macro_rules! primitive_list_series {
($list_type:ty, $vec_type:tt) => {{
let mut builder = ListPrimitiveChunkedBuilder::<$list_type>::new(
name,
values.len(),
VALUES_CAPACITY,
data_type.clone(),
);
for v in values {
let value_list = v
.as_list()?
.iter()
.map(|v| value_to_primitive!(v, $vec_type))
.collect::<Result<Vec<$vec_type>, _>>()
.map_err(inconsistent_error)?;
builder.append_iter_values(value_list.iter().copied());
}
let res = builder.finish();
Ok(res.into_series())
}};
}
match *data_type {
// list of boolean values
InputType::Boolean => {
DataType::Boolean => {
let mut builder = ListBooleanChunkedBuilder::new(name, values.len(), VALUES_CAPACITY);
for v in values {
let value_list = v
@ -341,52 +544,18 @@ fn input_type_list_to_series(
let res = builder.finish();
Ok(res.into_series())
}
// list of values that reduce down to i64
InputType::Integer | InputType::Filesize | InputType::Duration => {
let logical_type = match list_type {
InputType::Duration => DataType::Duration(TimeUnit::Milliseconds),
_ => DataType::Int64,
};
let mut builder = ListPrimitiveChunkedBuilder::<Int64Type>::new(
name,
values.len(),
VALUES_CAPACITY,
logical_type,
);
for v in values {
let value_list = v
.as_list()?
.iter()
.map(|v| v.as_i64())
.collect::<Result<Vec<i64>, _>>()
.map_err(inconsistent_error)?;
builder.append_iter_values(value_list.iter().copied());
}
let res = builder.finish();
Ok(res.into_series())
}
InputType::Float => {
let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
name,
values.len(),
VALUES_CAPACITY,
DataType::Float64,
);
for v in values {
let value_list = v
.as_list()?
.iter()
.map(|v| v.as_f64())
.collect::<Result<Vec<f64>, _>>()
.map_err(inconsistent_error)?;
builder.append_iter_values(value_list.iter().copied());
}
let res = builder.finish();
Ok(res.into_series())
}
InputType::String => {
DataType::Duration(_) => primitive_list_series!(Int64Type, i64),
DataType::UInt8 => primitive_list_series!(UInt8Type, u8),
DataType::UInt16 => primitive_list_series!(UInt16Type, u16),
DataType::UInt32 => primitive_list_series!(UInt32Type, u32),
DataType::UInt64 => primitive_list_series!(UInt64Type, u64),
DataType::Int8 => primitive_list_series!(Int8Type, i8),
DataType::Int16 => primitive_list_series!(Int16Type, i16),
DataType::Int32 => primitive_list_series!(Int32Type, i32),
DataType::Int64 => primitive_list_series!(Int64Type, i64),
DataType::Float32 => primitive_list_series!(Float32Type, f32),
DataType::Float64 => primitive_list_series!(Float64Type, f64),
DataType::String => {
let mut builder = ListStringChunkedBuilder::new(name, values.len(), VALUES_CAPACITY);
for v in values {
let value_list = v
@ -400,9 +569,7 @@ fn input_type_list_to_series(
let res = builder.finish();
Ok(res.into_series())
}
// Treat lists as objects at this depth as it is expensive to calculate the list type
// We can revisit this later if necessary
InputType::Date => {
DataType::Date => {
let mut builder = AnonymousOwnedListBuilder::new(
name,
values.len(),
@ -434,11 +601,11 @@ fn input_type_list_to_series(
let res = builder.finish();
Ok(res.into_series())
}
InputType::List(ref sub_list_type) => {
DataType::List(ref sub_list_type) => {
Ok(input_type_list_to_series(name, sub_list_type, values)?)
}
// treat everything else as an object
_ => Ok(input_type_object_to_series(name, values)?),
_ => Ok(value_to_series(name, values)?),
}
}
@ -1081,7 +1248,7 @@ mod tests {
};
let typed_column = TypedColumn {
column,
column_type: Some(InputType::List(Box::new(InputType::String))),
column_type: Some(DataType::List(Box::new(DataType::String))),
};
let column_map = indexmap!("foo".to_string() => typed_column);

View File

@ -13,7 +13,7 @@ use polars_utils::total_ord::TotalEq;
use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
use super::{utils::DEFAULT_ROWS, NuLazyFrame};
use super::{nu_schema::NuSchema, utils::DEFAULT_ROWS, NuLazyFrame};
// DataFrameValue is an encapsulation of Nushell Value that can be used
// to define the PolarsObject Trait. The polars object trait allows to
@ -141,7 +141,7 @@ impl NuDataFrame {
}
}
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
pub fn try_from_iter<T>(iter: T, maybe_schema: Option<NuSchema>) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
@ -161,14 +161,15 @@ impl NuDataFrame {
conversion::insert_record(
&mut column_values,
Record::from_raw_cols_vals(cols, vals),
&maybe_schema,
)?
}
Value::Record { val: record, .. } => {
conversion::insert_record(&mut column_values, record)?
conversion::insert_record(&mut column_values, record, &maybe_schema)?
}
_ => {
let key = "0".to_string();
conversion::insert_value(value, key, &mut column_values)?
conversion::insert_value(value, key, &mut column_values, &maybe_schema)?
}
}
}
@ -188,13 +189,16 @@ impl NuDataFrame {
Ok(Self::new(false, dataframe))
}
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
pub fn try_from_columns(
columns: Vec<Column>,
maybe_schema: Option<NuSchema>,
) -> Result<Self, ShellError> {
let mut column_values: ColumnMap = IndexMap::new();
for column in columns {
let name = column.name().to_string();
for value in column {
conversion::insert_value(value, name.clone(), &mut column_values)?;
conversion::insert_value(value, name.clone(), &mut column_values, &maybe_schema)?;
}
}
@ -503,4 +507,8 @@ impl NuDataFrame {
Some(Ordering::Equal)
}
pub fn schema(&self) -> NuSchema {
NuSchema::new(self.df.schema())
}
}

View File

@ -0,0 +1,397 @@
use std::sync::Arc;
use nu_protocol::{Record, ShellError, Span, Value};
use polars::prelude::{DataType, Field, Schema, SchemaRef, TimeUnit};
#[derive(Debug, Clone)]
pub struct NuSchema {
pub schema: SchemaRef,
}
impl NuSchema {
pub fn new(schema: Schema) -> Self {
Self {
schema: Arc::new(schema),
}
}
}
impl TryFrom<&Value> for NuSchema {
type Error = ShellError;
fn try_from(value: &Value) -> Result<Self, Self::Error> {
let schema = value_to_schema(value, Span::unknown())?;
Ok(Self::new(schema))
}
}
impl From<NuSchema> for Value {
fn from(schema: NuSchema) -> Self {
fields_to_value(schema.schema.iter_fields(), Span::unknown())
}
}
impl From<NuSchema> for SchemaRef {
fn from(val: NuSchema) -> Self {
Arc::clone(&val.schema)
}
}
fn fields_to_value(fields: impl Iterator<Item = Field>, span: Span) -> Value {
let (cols, vals) = fields
.map(|field| {
let val = dtype_to_value(field.data_type(), span);
let col = field.name().to_string();
(col, val)
})
.unzip();
let record = Record::from_raw_cols_vals(cols, vals);
Value::record(record, Span::unknown())
}
fn dtype_to_value(dtype: &DataType, span: Span) -> Value {
match dtype {
DataType::Struct(fields) => fields_to_value(fields.iter().cloned(), span),
_ => Value::string(dtype.to_string().replace('[', "<").replace(']', ">"), span),
}
}
fn value_to_schema(value: &Value, span: Span) -> Result<Schema, ShellError> {
let fields = value_to_fields(value, span)?;
let schema = Schema::from_iter(fields);
Ok(schema)
}
fn value_to_fields(value: &Value, span: Span) -> Result<Vec<Field>, ShellError> {
let fields = value
.as_record()?
.into_iter()
.map(|(col, val)| match val {
Value::Record { .. } => {
let fields = value_to_fields(val, span)?;
let dtype = DataType::Struct(fields);
Ok(Field::new(col, dtype))
}
_ => {
let dtype = dtype_str_to_schema(&val.as_string()?, span)?;
Ok(Field::new(col, dtype))
}
})
.collect::<Result<Vec<Field>, ShellError>>()?;
Ok(fields)
}
fn dtype_str_to_schema(dtype: &str, span: Span) -> Result<DataType, ShellError> {
match dtype {
"bool" => Ok(DataType::Boolean),
"u8" => Ok(DataType::UInt8),
"u16" => Ok(DataType::UInt16),
"u32" => Ok(DataType::UInt32),
"u64" => Ok(DataType::UInt64),
"i8" => Ok(DataType::Int8),
"i16" => Ok(DataType::Int16),
"i32" => Ok(DataType::Int32),
"i64" => Ok(DataType::Int64),
"f32" => Ok(DataType::Float32),
"f64" => Ok(DataType::Float64),
"str" => Ok(DataType::String),
"binary" => Ok(DataType::Binary),
"date" => Ok(DataType::Date),
"time" => Ok(DataType::Time),
"null" => Ok(DataType::Null),
"unknown" => Ok(DataType::Unknown),
"object" => Ok(DataType::Object("unknown", None)),
_ if dtype.starts_with("list") => {
let dtype = dtype
.trim_start_matches("list")
.trim_start_matches('<')
.trim_end_matches('>')
.trim();
let dtype = dtype_str_to_schema(dtype, span)?;
Ok(DataType::List(Box::new(dtype)))
}
_ if dtype.starts_with("datetime") => {
let dtype = dtype
.trim_start_matches("datetime")
.trim_start_matches('<')
.trim_end_matches('>');
let mut split = dtype.split(',');
let next = split
.next()
.ok_or_else(|| ShellError::GenericError {
error: "Invalid polars data type".into(),
msg: "Missing time unit".into(),
span: Some(span),
help: None,
inner: vec![],
})?
.trim();
let time_unit = str_to_time_unit(next, span)?;
let next = split
.next()
.ok_or_else(|| ShellError::GenericError {
error: "Invalid polars data type".into(),
msg: "Missing time zone".into(),
span: Some(span),
help: None,
inner: vec![],
})?
.trim();
let timezone = if "*" == next {
None
} else {
Some(next.to_string())
};
Ok(DataType::Datetime(time_unit, timezone))
}
_ if dtype.starts_with("duration") => {
let inner = dtype.trim_start_matches("duration<").trim_end_matches('>');
let next = inner
.split(',')
.next()
.ok_or_else(|| ShellError::GenericError {
error: "Invalid polars data type".into(),
msg: "Missing time unit".into(),
span: Some(span),
help: None,
inner: vec![],
})?
.trim();
let time_unit = str_to_time_unit(next, span)?;
Ok(DataType::Duration(time_unit))
}
_ => Err(ShellError::GenericError {
error: "Invalid polars data type".into(),
msg: format!("Unknown type: {dtype}"),
span: Some(span),
help: None,
inner: vec![],
}),
}
}
fn str_to_time_unit(ts_string: &str, span: Span) -> Result<TimeUnit, ShellError> {
match ts_string {
"ms" => Ok(TimeUnit::Milliseconds),
"us" | "μs" => Ok(TimeUnit::Microseconds),
"ns" => Ok(TimeUnit::Nanoseconds),
_ => Err(ShellError::GenericError {
error: "Invalid polars data type".into(),
msg: "Invalid time unit".into(),
span: Some(span),
help: None,
inner: vec![],
}),
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_value_to_schema() {
let value = Value::Record {
val: Record::from_raw_cols_vals(
vec!["name".into(), "age".into(), "address".into()],
vec![
Value::String {
val: "str".into(),
internal_span: Span::test_data(),
},
Value::String {
val: "i32".into(),
internal_span: Span::test_data(),
},
Value::Record {
val: Record::from_raw_cols_vals(
vec!["street".into(), "city".into()],
vec![
Value::String {
val: "str".into(),
internal_span: Span::test_data(),
},
Value::String {
val: "str".into(),
internal_span: Span::test_data(),
},
],
),
internal_span: Span::test_data(),
},
],
),
internal_span: Span::test_data(),
};
let schema = value_to_schema(&value, Span::unknown()).unwrap();
let expected = Schema::from_iter(vec![
Field::new("name", DataType::String),
Field::new("age", DataType::Int32),
Field::new(
"address",
DataType::Struct(vec![
Field::new("street", DataType::String),
Field::new("city", DataType::String),
]),
),
]);
assert_eq!(schema, expected);
}
#[test]
fn test_dtype_str_to_schema_simple_types() {
let dtype = "bool";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Boolean;
assert_eq!(schema, expected);
let dtype = "u8";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::UInt8;
assert_eq!(schema, expected);
let dtype = "u16";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::UInt16;
assert_eq!(schema, expected);
let dtype = "u32";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::UInt32;
assert_eq!(schema, expected);
let dtype = "u64";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::UInt64;
assert_eq!(schema, expected);
let dtype = "i8";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Int8;
assert_eq!(schema, expected);
let dtype = "i16";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Int16;
assert_eq!(schema, expected);
let dtype = "i32";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Int32;
assert_eq!(schema, expected);
let dtype = "i64";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Int64;
assert_eq!(schema, expected);
let dtype = "str";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::String;
assert_eq!(schema, expected);
let dtype = "binary";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Binary;
assert_eq!(schema, expected);
let dtype = "date";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Date;
assert_eq!(schema, expected);
let dtype = "time";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Time;
assert_eq!(schema, expected);
let dtype = "null";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Null;
assert_eq!(schema, expected);
let dtype = "unknown";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Unknown;
assert_eq!(schema, expected);
let dtype = "object";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Object("unknown", None);
assert_eq!(schema, expected);
}
#[test]
fn test_dtype_str_schema_datetime() {
let dtype = "datetime<ms, *>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Milliseconds, None);
assert_eq!(schema, expected);
let dtype = "datetime<us, *>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Microseconds, None);
assert_eq!(schema, expected);
let dtype = "datetime<μs, *>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Microseconds, None);
assert_eq!(schema, expected);
let dtype = "datetime<ns, *>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Nanoseconds, None);
assert_eq!(schema, expected);
let dtype = "datetime<ms, UTC>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Milliseconds, Some("UTC".into()));
assert_eq!(schema, expected);
let dtype = "invalid";
let schema = dtype_str_to_schema(dtype, Span::unknown());
assert!(schema.is_err())
}
#[test]
fn test_dtype_str_schema_duration() {
let dtype = "duration<ms>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Duration(TimeUnit::Milliseconds);
assert_eq!(schema, expected);
let dtype = "duration<us>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Duration(TimeUnit::Microseconds);
assert_eq!(schema, expected);
let dtype = "duration<μs>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Duration(TimeUnit::Microseconds);
assert_eq!(schema, expected);
let dtype = "duration<ns>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::Duration(TimeUnit::Nanoseconds);
assert_eq!(schema, expected);
}
#[test]
fn test_dtype_str_to_schema_list_types() {
let dtype = "list<i32>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::List(Box::new(DataType::Int32));
assert_eq!(schema, expected);
let dtype = "list<duration<ms>>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::List(Box::new(DataType::Duration(TimeUnit::Milliseconds)));
assert_eq!(schema, expected);
let dtype = "list<datetime<ms, *>>";
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
let expected = DataType::List(Box::new(DataType::Datetime(TimeUnit::Milliseconds, None)));
assert_eq!(schema, expected);
}
}