mirror of
https://github.com/nushell/nushell.git
synced 2025-01-11 08:48:23 +01:00
The ability to specify a schema when using dfr open
and dfr into-df
(#11634)
# Description There are times where explicitly specifying a schema for a dataframe is needed such as: - Opening CSV and JSON lines files and needing provide more information to polars to keep it from failing or in a desire to override default type conversion - When converting a nushell value to a dataframe and wanting to override the default conversion behaviors. This pull requests provides: - A flag to allow specifying a schema when using dfr into-df - A flag to allow specifying a schema when using dfr open that works for CSV and JSON types - A new command `dfr schema` which displays schema information and will allow display support schema dtypes Schema is specified creating a record that has the key value and the dtype. Examples usages: ``` {a:1, b:{a:2}} | dfr into-df -s {a: u8, b: {a: i32}} | dfr schema {a: 1, b: {a: [1 2 3]}, c: [a b c]} | dfr into-df -s {a: u8, b: {a: list<u64>}, c: list<str>} | dfr schema dfr open -s {pid: i32, ppid: i32, name: str, status: str, cpu: f64, mem: i64, virtual: i64} /tmp/ps.jsonl | dfr schema ``` Supported dtypes: null bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 str binary date datetime[time_unit: (ms, us, ns) timezone (optional)] duration[time_unit: (ms, us, ns)] time object unknown list[dtype] structs are also supported but are specified via another record: {a: u8, b: {d: str}} Another feature with the dfr schema command is that it returns the data back in a format that can be passed to provide a valid schema that can be passed in as schema argument: <img width="638" alt="Screenshot 2024-01-29 at 10 23 58" src="https://github.com/nushell/nushell/assets/56345/b49c3bff-5cda-4c86-975a-dfd91d991373"> --------- Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
parent
d03ad6a257
commit
f879c00f9d
@ -37,7 +37,8 @@ impl Command for AppendDF {
|
||||
example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df);
|
||||
$a | dfr append $a"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
@ -54,7 +55,9 @@ impl Command for AppendDF {
|
||||
"b_x".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -64,7 +67,8 @@ impl Command for AppendDF {
|
||||
example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df);
|
||||
$a | dfr append $a --col"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
@ -83,7 +87,9 @@ impl Command for AppendDF {
|
||||
Value::test_int(4),
|
||||
],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for DropDF {
|
||||
description: "drop column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr drop a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -46,7 +46,8 @@ impl Command for DropDuplicates {
|
||||
description: "drop duplicates",
|
||||
example: "[[a b]; [1 2] [3 4] [1 2]] | dfr into-df | dfr drop-duplicates",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(3), Value::test_int(1)],
|
||||
@ -55,7 +56,9 @@ impl Command for DropDuplicates {
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(2)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -43,7 +43,8 @@ impl Command for DropNulls {
|
||||
let a = ($df | dfr with-column $res --name res);
|
||||
$a | dfr drop-nulls"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(1)],
|
||||
@ -56,7 +57,9 @@ impl Command for DropNulls {
|
||||
"res".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(1)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -66,7 +69,8 @@ impl Command for DropNulls {
|
||||
example: r#"let s = ([1 2 0 0 3 4] | dfr into-df);
|
||||
($s / $s) | dfr drop-nulls"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"div_0_0".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
@ -74,7 +78,9 @@ impl Command for DropNulls {
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -31,7 +31,8 @@ impl Command for DataTypes {
|
||||
description: "Dataframe dtypes",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr dtypes",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"column".to_string(),
|
||||
vec![Value::test_string("a"), Value::test_string("b")],
|
||||
@ -40,7 +41,9 @@ impl Command for DataTypes {
|
||||
"dtype".to_string(),
|
||||
vec![Value::test_string("i64"), Value::test_string("i64")],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -79,6 +82,7 @@ fn command(
|
||||
.dtype();
|
||||
|
||||
let dtype_str = dtype.to_string();
|
||||
|
||||
dtypes.push(Value::string(dtype_str, call.head));
|
||||
|
||||
Value::string(*v, call.head)
|
||||
@ -88,7 +92,7 @@ fn command(
|
||||
let names_col = Column::new("column".to_string(), names);
|
||||
let dtypes_col = Column::new("dtype".to_string(), dtypes);
|
||||
|
||||
NuDataFrame::try_from_columns(vec![names_col, dtypes_col])
|
||||
NuDataFrame::try_from_columns(vec![names_col, dtypes_col], None)
|
||||
.map(|df| PipelineData::Value(df.into_value(call.head), None))
|
||||
}
|
||||
|
||||
|
@ -43,10 +43,13 @@ impl Command for FilterWith {
|
||||
example: r#"let mask = ([true false] | dfr into-df);
|
||||
[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with $mask"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -55,10 +58,13 @@ impl Command for FilterWith {
|
||||
description: "Filter dataframe using an expression",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with ((dfr col a) > 1)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(3)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)]),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -44,10 +44,13 @@ impl Command for FirstDF {
|
||||
description: "Return the first row of a dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -56,7 +59,8 @@ impl Command for FirstDF {
|
||||
description: "Return the first two rows of a dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first 2",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
@ -65,7 +69,9 @@ impl Command for FirstDF {
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -36,10 +36,13 @@ impl Command for GetDF {
|
||||
description: "Returns the selected column",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr get a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -40,10 +40,13 @@ impl Command for LastDF {
|
||||
description: "Create new dataframe with last rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(3)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)]),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -106,7 +106,7 @@ impl Command for MeltDF {
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
])
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -15,6 +15,7 @@ mod open;
|
||||
mod query_df;
|
||||
mod rename;
|
||||
mod sample;
|
||||
mod schema;
|
||||
mod shape;
|
||||
mod slice;
|
||||
mod sql_context;
|
||||
@ -49,6 +50,7 @@ pub use melt::MeltDF;
|
||||
pub use query_df::QueryDf;
|
||||
pub use rename::RenameDF;
|
||||
pub use sample::SampleDF;
|
||||
pub use schema::SchemaDF;
|
||||
pub use shape::ShapeDF;
|
||||
pub use slice::SliceDF;
|
||||
pub use sql_context::SQLContext;
|
||||
@ -93,6 +95,7 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
|
||||
QueryDf,
|
||||
RenameDF,
|
||||
SampleDF,
|
||||
SchemaDF,
|
||||
ShapeDF,
|
||||
SliceDF,
|
||||
TakeDF,
|
||||
|
@ -1,3 +1,5 @@
|
||||
use crate::dataframe::values::NuSchema;
|
||||
|
||||
use super::super::values::{NuDataFrame, NuLazyFrame};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
@ -70,6 +72,12 @@ impl Command for OpenDataFrame {
|
||||
"Columns to be selected from csv file. CSV and Parquet file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"schema",
|
||||
SyntaxShape::Record(vec![]),
|
||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
||||
Some('s')
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
@ -305,10 +313,19 @@ fn from_json(
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let maybe_schema = call
|
||||
.get_flag(engine_state, stack, "schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
|
||||
let buf_reader = BufReader::new(file);
|
||||
let reader = JsonReader::new(buf_reader);
|
||||
|
||||
let reader = match maybe_schema {
|
||||
Some(schema) => reader.with_schema(schema.into()),
|
||||
None => reader,
|
||||
};
|
||||
|
||||
let df: NuDataFrame = reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
@ -329,6 +346,10 @@ fn from_jsonl(
|
||||
call: &Call,
|
||||
) -> Result<Value, ShellError> {
|
||||
let infer_schema: Option<usize> = call.get_flag(engine_state, stack, "infer-schema")?;
|
||||
let maybe_schema = call
|
||||
.get_flag(engine_state, stack, "schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
let file = File::open(&file.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error opening file".into(),
|
||||
@ -343,6 +364,11 @@ fn from_jsonl(
|
||||
.with_json_format(JsonFormat::JsonLines)
|
||||
.infer_schema_len(infer_schema);
|
||||
|
||||
let reader = match maybe_schema {
|
||||
Some(schema) => reader.with_schema(schema.into()),
|
||||
None => reader,
|
||||
};
|
||||
|
||||
let df: NuDataFrame = reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
@ -368,6 +394,11 @@ fn from_csv(
|
||||
let skip_rows: Option<usize> = call.get_flag(engine_state, stack, "skip-rows")?;
|
||||
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
|
||||
|
||||
let maybe_schema = call
|
||||
.get_flag(engine_state, stack, "schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
|
||||
if call.has_flag(engine_state, stack, "lazy")? {
|
||||
let file: String = call.req(engine_state, stack, 0)?;
|
||||
let csv_reader = LazyCsvReader::new(file);
|
||||
@ -395,6 +426,11 @@ fn from_csv(
|
||||
|
||||
let csv_reader = csv_reader.has_header(!no_header);
|
||||
|
||||
let csv_reader = match maybe_schema {
|
||||
Some(schema) => csv_reader.with_schema(Some(schema.into())),
|
||||
None => csv_reader,
|
||||
};
|
||||
|
||||
let csv_reader = match infer_schema {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.with_infer_schema_length(Some(r)),
|
||||
@ -452,6 +488,11 @@ fn from_csv(
|
||||
|
||||
let csv_reader = csv_reader.has_header(!no_header);
|
||||
|
||||
let csv_reader = match maybe_schema {
|
||||
Some(schema) => csv_reader.with_schema(Some(schema.into())),
|
||||
None => csv_reader,
|
||||
};
|
||||
|
||||
let csv_reader = match infer_schema {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.infer_schema(Some(r)),
|
||||
|
@ -44,10 +44,13 @@ impl Command for QueryDf {
|
||||
description: "Query dataframe using SQL",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr query 'select a from df'",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -46,7 +46,8 @@ impl Command for RenameDF {
|
||||
description: "Renames a series",
|
||||
example: "[5 6 7 8] | dfr into-df | dfr rename '0' new_name",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"new_name".to_string(),
|
||||
vec![
|
||||
Value::test_int(5),
|
||||
@ -54,7 +55,9 @@ impl Command for RenameDF {
|
||||
Value::test_int(7),
|
||||
Value::test_int(8),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -63,7 +66,8 @@ impl Command for RenameDF {
|
||||
description: "Renames a dataframe column",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename a a_new",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a_new".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
@ -72,7 +76,9 @@ impl Command for RenameDF {
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -81,7 +87,8 @@ impl Command for RenameDF {
|
||||
description: "Renames two dataframe columns",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename [a b] [a_new b_new]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a_new".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
@ -90,7 +97,9 @@ impl Command for RenameDF {
|
||||
"b_new".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
119
crates/nu-cmd-dataframe/src/dataframe/eager/schema.rs
Normal file
119
crates/nu-cmd-dataframe/src/dataframe/eager/schema.rs
Normal file
@ -0,0 +1,119 @@
|
||||
use super::super::values::NuDataFrame;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, Record, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SchemaDF;
|
||||
|
||||
impl Command for SchemaDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr schema"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Show schema for a dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.switch("datatype-list", "creates a lazy dataframe", Some('l'))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Dataframe schema",
|
||||
example: r#"[[a b]; [1 "foo"] [3 "bar"]] | dfr into-df | dfr schema"#,
|
||||
result: Some(Value::record(
|
||||
Record::from_raw_cols_vals(
|
||||
vec!["a".to_string(), "b".to_string()],
|
||||
vec![
|
||||
Value::string("i64", Span::test_data()),
|
||||
Value::string("str", Span::test_data()),
|
||||
],
|
||||
),
|
||||
Span::test_data(),
|
||||
)),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
if call.has_flag(engine_state, stack, "datatype-list")? {
|
||||
Ok(PipelineData::Value(datatype_list(Span::unknown()), None))
|
||||
} else {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let schema = df.schema();
|
||||
let value: Value = schema.into();
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
|
||||
fn datatype_list(span: Span) -> Value {
|
||||
let types: Vec<Value> = [
|
||||
("null", ""),
|
||||
("bool", ""),
|
||||
("u8", ""),
|
||||
("u16", ""),
|
||||
("u32", ""),
|
||||
("u64", ""),
|
||||
("i8", ""),
|
||||
("i16", ""),
|
||||
("i32", ""),
|
||||
("i64", ""),
|
||||
("f32", ""),
|
||||
("f64", ""),
|
||||
("str", ""),
|
||||
("binary", ""),
|
||||
("date", ""),
|
||||
("datetime<time_unit: (ms, us, ns) timezone (optional)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns. Timezone wildcard is *. Other Timezone examples: UTC, America/Los_Angeles."),
|
||||
("duration<time_unit: (ms, us, ns)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns."),
|
||||
("time", ""),
|
||||
("object", ""),
|
||||
("unknown", ""),
|
||||
("list<dtype>", ""),
|
||||
]
|
||||
.iter()
|
||||
.map(|(dtype, note)| {
|
||||
Value::record(Record::from_raw_cols_vals(
|
||||
vec!["dtype".to_string(), "note".to_string()],
|
||||
vec![Value::string(*dtype, span), Value::string(*note, span)],
|
||||
),span)
|
||||
})
|
||||
.collect();
|
||||
Value::list(types, span)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(SchemaDF {})])
|
||||
}
|
||||
}
|
@ -34,10 +34,13 @@ impl Command for ShapeDF {
|
||||
description: "Shows row and column shape",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr shape",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("rows".to_string(), vec![Value::test_int(2)]),
|
||||
Column::new("columns".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -70,7 +73,7 @@ fn command(
|
||||
let rows_col = Column::new("rows".to_string(), vec![rows]);
|
||||
let cols_col = Column::new("columns".to_string(), vec![cols]);
|
||||
|
||||
NuDataFrame::try_from_columns(vec![rows_col, cols_col])
|
||||
NuDataFrame::try_from_columns(vec![rows_col, cols_col], None)
|
||||
.map(|df| PipelineData::Value(df.into_value(call.head), None))
|
||||
}
|
||||
|
||||
|
@ -37,10 +37,13 @@ impl Command for SliceDF {
|
||||
description: "Create new dataframe from a slice of the rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr slice 0 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -46,7 +46,8 @@ impl Command for Summary {
|
||||
description: "list dataframe descriptives",
|
||||
example: "[[a b]; [1 1] [1 1]] | dfr into-df | dfr summary",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"descriptor".to_string(),
|
||||
vec![
|
||||
@ -92,7 +93,9 @@ impl Command for Summary {
|
||||
Value::test_float(1.0),
|
||||
],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -44,7 +44,8 @@ impl Command for TakeDF {
|
||||
let indices = ([0 2] | dfr into-df);
|
||||
$df | dfr take $indices"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(4)],
|
||||
@ -53,7 +54,9 @@ impl Command for TakeDF {
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -64,10 +67,13 @@ impl Command for TakeDF {
|
||||
let indices = ([0 2] | dfr into-df);
|
||||
$series | dfr take $indices"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(5)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -1,10 +1,14 @@
|
||||
use crate::dataframe::values::NuSchema;
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::*;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToDataFrame;
|
||||
@ -20,6 +24,12 @@ impl Command for ToDataFrame {
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"schema",
|
||||
SyntaxShape::Record(vec![]),
|
||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
||||
Some('s'),
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
@ -30,7 +40,8 @@ impl Command for ToDataFrame {
|
||||
description: "Takes a dictionary and creates a dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | dfr into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
@ -39,7 +50,9 @@ impl Command for ToDataFrame {
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -48,7 +61,8 @@ impl Command for ToDataFrame {
|
||||
description: "Takes a list of tables and creates a dataframe",
|
||||
example: "[[1 2 a] [3 4 b] [5 6 c]] | dfr into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
|
||||
@ -65,7 +79,9 @@ impl Command for ToDataFrame {
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -74,14 +90,17 @@ impl Command for ToDataFrame {
|
||||
description: "Takes a list and creates a dataframe",
|
||||
example: "[a b c] | dfr into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -90,14 +109,41 @@ impl Command for ToDataFrame {
|
||||
description: "Takes a list of booleans and creates a dataframe",
|
||||
example: "[true true false] | dfr into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Convert to a dataframe and provide a schema",
|
||||
example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| dfr into-df -s {a: u8, b: {a: list<u64>}, c: list<str>}",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(vec![
|
||||
Series::new("a", &[1u8]),
|
||||
{
|
||||
let dtype = DataType::Struct(vec![Field::new("a", DataType::List(Box::new(DataType::UInt64)))]);
|
||||
let vals = vec![AnyValue::StructOwned(
|
||||
Box::new((vec![AnyValue::List(Series::new("a", &[1u64, 2, 3]))], vec![Field::new("a", DataType::String)]))); 1];
|
||||
Series::from_any_values_and_dtype("b", &vals, &dtype, false)
|
||||
.expect("Struct series should not fail")
|
||||
},
|
||||
{
|
||||
let dtype = DataType::List(Box::new(DataType::String));
|
||||
let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))];
|
||||
Series::from_any_values_and_dtype("c", &vals, &dtype, false)
|
||||
.expect("List series should not fail")
|
||||
}
|
||||
], Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -107,12 +153,17 @@ impl Command for ToDataFrame {
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
NuDataFrame::try_from_iter(input.into_iter())
|
||||
let maybe_schema = call
|
||||
.get_flag(engine_state, stack, "schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
|
||||
NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
}
|
||||
|
@ -42,7 +42,8 @@ impl Command for WithColumn {
|
||||
| dfr into-df
|
||||
| dfr with-column ([5 6] | dfr into-df) --name c"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
@ -55,7 +56,9 @@ impl Command for WithColumn {
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(5), Value::test_int(6)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -70,7 +73,8 @@ impl Command for WithColumn {
|
||||
]
|
||||
| dfr collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
@ -87,7 +91,9 @@ impl Command for WithColumn {
|
||||
"d".to_string(),
|
||||
vec![Value::test_int(3), Value::test_int(9)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -32,10 +32,13 @@ impl Command for ExprArgWhere {
|
||||
example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df);
|
||||
$df | dfr select (dfr arg-where ((dfr col b) >= 2) | dfr as b_arg)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"b_arg".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -41,7 +41,8 @@ impl Command for ExprConcatStr {
|
||||
example: r#"let df = ([[a b c]; [one two 1] [three four 2]] | dfr into-df);
|
||||
$df | dfr with-column ((dfr concat-str "-" [(dfr col a) (dfr col b) ((dfr col c) * 2)]) | dfr as concat)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("three")],
|
||||
@ -61,7 +62,9 @@ impl Command for ExprConcatStr {
|
||||
Value::test_string("three-four-4"),
|
||||
],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -52,10 +52,13 @@ impl Command for ExprDatePart {
|
||||
description: "Creates an expression to capture the year date part",
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | dfr into-df | dfr as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | dfr with-column [(dfr col datetime | dfr datepart year | dfr as datetime_year )]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("datetime".to_string(), vec![Value::test_date(dt)]),
|
||||
Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -407,10 +407,13 @@ lazy_expr_command!(
|
||||
description: "Max value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr max",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(6)],),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)],),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -422,7 +425,8 @@ lazy_expr_command!(
|
||||
| dfr group-by a
|
||||
| dfr agg (dfr col b | dfr max)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
@ -431,7 +435,9 @@ lazy_expr_command!(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(1)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -452,10 +458,13 @@ lazy_expr_command!(
|
||||
description: "Min value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr min",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)],),
|
||||
Column::new("b".to_string(), vec![Value::test_int(1)],),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -467,7 +476,8 @@ lazy_expr_command!(
|
||||
| dfr group-by a
|
||||
| dfr agg (dfr col b | dfr min)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
@ -476,7 +486,9 @@ lazy_expr_command!(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(1)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -497,10 +509,13 @@ lazy_expr_command!(
|
||||
description: "Sums all columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sum",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(11)],),
|
||||
Column::new("b".to_string(), vec![Value::test_int(7)],),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -512,7 +527,8 @@ lazy_expr_command!(
|
||||
| dfr group-by a
|
||||
| dfr agg (dfr col b | dfr sum)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
@ -521,7 +537,9 @@ lazy_expr_command!(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(1)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -542,10 +560,13 @@ lazy_expr_command!(
|
||||
description: "Mean value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr mean",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
|
||||
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -557,7 +578,8 @@ lazy_expr_command!(
|
||||
| dfr group-by a
|
||||
| dfr agg (dfr col b | dfr mean)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
@ -566,7 +588,9 @@ lazy_expr_command!(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(3.0), Value::test_float(1.0)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -589,7 +613,8 @@ expr_command!(
|
||||
| dfr group-by a
|
||||
| dfr agg (dfr col b | dfr median)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
@ -598,7 +623,9 @@ expr_command!(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(3.0), Value::test_float(1.0)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -618,10 +645,13 @@ lazy_expr_command!(
|
||||
description: "Std value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr std",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(2.0)],),
|
||||
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -633,7 +663,8 @@ lazy_expr_command!(
|
||||
| dfr group-by a
|
||||
| dfr agg (dfr col b | dfr std)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
@ -642,7 +673,9 @@ lazy_expr_command!(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(0.0), Value::test_float(0.0)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -665,10 +698,13 @@ lazy_expr_command!(
|
||||
"Var value from columns in a dataframe or aggregates columns to their var value",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr var",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
|
||||
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -680,7 +716,8 @@ lazy_expr_command!(
|
||||
| dfr group-by a
|
||||
| dfr agg (dfr col b | dfr var)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
@ -689,7 +726,9 @@ lazy_expr_command!(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(0.0), Value::test_float(0.0)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -39,7 +39,8 @@ impl Command for ExprIsIn {
|
||||
example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df);
|
||||
$df | dfr with-column (dfr col a | dfr is-in [one two] | dfr as a_in)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
@ -60,7 +61,9 @@ impl Command for ExprIsIn {
|
||||
Value::test_bool(false),
|
||||
],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -81,7 +84,8 @@ impl Command for ExprIsIn {
|
||||
let list: Vec<Value> = call.req(engine_state, stack, 0)?;
|
||||
let expr = NuExpression::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)])?;
|
||||
let values =
|
||||
NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)], None)?;
|
||||
let list = values.as_series(call.head)?;
|
||||
|
||||
if matches!(list.dtype(), DataType::Object(..)) {
|
||||
|
@ -54,7 +54,8 @@ impl Command for ExprOtherwise {
|
||||
)
|
||||
| dfr collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
|
||||
@ -71,7 +72,9 @@ impl Command for ExprOtherwise {
|
||||
"d".to_string(),
|
||||
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -41,7 +41,8 @@ impl Command for ExprQuantile {
|
||||
| dfr group-by a
|
||||
| dfr agg (dfr col b | dfr quantile 0.5)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
@ -50,7 +51,9 @@ impl Command for ExprQuantile {
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(4.0), Value::test_float(1.0)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -62,7 +62,8 @@ impl Command for ExprWhen {
|
||||
)
|
||||
| dfr collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
|
||||
@ -79,7 +80,9 @@ impl Command for ExprWhen {
|
||||
"d".to_string(),
|
||||
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -47,7 +47,8 @@ impl Command for LazyAggregate {
|
||||
(dfr col b | dfr sum | dfr as "b_sum")
|
||||
]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
@ -64,7 +65,9 @@ impl Command for LazyAggregate {
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -81,7 +84,8 @@ impl Command for LazyAggregate {
|
||||
]
|
||||
| dfr collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
@ -98,7 +102,9 @@ impl Command for LazyAggregate {
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -33,7 +33,8 @@ impl Command for LazyCollect {
|
||||
description: "drop duplicates",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-lazy | dfr collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
@ -42,7 +43,9 @@ impl Command for LazyCollect {
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -69,7 +69,7 @@ impl Command for LazyExplode {
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
]).expect("simple df for test should not fail")
|
||||
], None).expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
)
|
||||
},
|
||||
@ -86,7 +86,7 @@ impl Command for LazyExplode {
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
]).expect("simple df for test should not fail")
|
||||
], None).expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
|
@ -38,7 +38,8 @@ impl Command for LazyFetch {
|
||||
description: "Fetch a rows from the dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr fetch 2",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(4)],
|
||||
@ -47,7 +48,9 @@ impl Command for LazyFetch {
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -38,7 +38,8 @@ impl Command for LazyFillNA {
|
||||
description: "Fills the NaN values with 0",
|
||||
example: "[1 2 NaN 3 NaN] | dfr into-df | dfr fill-nan 0",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
@ -47,7 +48,9 @@ impl Command for LazyFillNA {
|
||||
Value::test_int(3),
|
||||
Value::test_int(0),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("Df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -56,7 +59,8 @@ impl Command for LazyFillNA {
|
||||
description: "Fills the NaN values of a whole dataframe",
|
||||
example: "[[a b]; [0.2 1] [0.1 NaN]] | dfr into-df | dfr fill-nan 0",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_float(0.2), Value::test_float(0.1)],
|
||||
@ -65,7 +69,9 @@ impl Command for LazyFillNA {
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(0)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("Df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -123,7 +129,7 @@ impl Command for LazyFillNA {
|
||||
})
|
||||
.collect::<Vec<Column>>();
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::try_from_columns(dataframe)?.into_value(call.head),
|
||||
NuDataFrame::try_from_columns(dataframe, None)?.into_value(call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
@ -37,7 +37,8 @@ impl Command for LazyFillNull {
|
||||
description: "Fills the null values by 0",
|
||||
example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr fill-null 0",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
@ -46,7 +47,9 @@ impl Command for LazyFillNull {
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -38,7 +38,8 @@ impl Command for LazyFilter {
|
||||
description: "Filter dataframe using an expression",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr filter ((dfr col a) >= 4)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(4)],
|
||||
@ -47,7 +48,9 @@ impl Command for LazyFilter {
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -71,7 +71,7 @@ Example {
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
]).expect("simple df for test should not fail")
|
||||
], None).expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
)
|
||||
},
|
||||
@ -88,7 +88,7 @@ Example {
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
]).expect("simple df for test should not fail")
|
||||
], None).expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
|
@ -46,7 +46,8 @@ impl Command for ToLazyGroupBy {
|
||||
(dfr col b | dfr sum | dfr as "b_sum")
|
||||
]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
@ -63,7 +64,9 @@ impl Command for ToLazyGroupBy {
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -80,7 +83,8 @@ impl Command for ToLazyGroupBy {
|
||||
]
|
||||
| dfr collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
@ -97,7 +101,9 @@ impl Command for ToLazyGroupBy {
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -53,7 +53,8 @@ impl Command for LazyJoin {
|
||||
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy);
|
||||
$df_a | dfr join $df_b a foo | dfr collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
@ -99,7 +100,9 @@ impl Command for LazyJoin {
|
||||
Value::test_string("let"),
|
||||
],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -110,7 +113,8 @@ impl Command for LazyJoin {
|
||||
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy);
|
||||
$df_a | dfr join $df_b a foo"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
@ -156,7 +160,9 @@ impl Command for LazyJoin {
|
||||
Value::test_string("let"),
|
||||
],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -188,7 +188,8 @@ lazy_command!(
|
||||
description: "Reverses the dataframe.",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr reverse",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),],
|
||||
@ -197,7 +198,9 @@ lazy_command!(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),],
|
||||
),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -231,10 +234,13 @@ lazy_command!(
|
||||
description: "Median value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr median",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
|
||||
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
|
||||
])
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -38,10 +38,13 @@ impl Command for LazyQuantile {
|
||||
description: "quantile value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr quantile 0.5",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
|
||||
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -37,10 +37,13 @@ impl Command for LazySelect {
|
||||
description: "Select a column from the dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr select a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -60,7 +60,7 @@ impl Command for LazySortBy {
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
])
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -89,7 +89,7 @@ impl Command for LazySortBy {
|
||||
Value::test_int(2),
|
||||
],
|
||||
),
|
||||
])
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -1,9 +1,12 @@
|
||||
use crate::dataframe::values::NuSchema;
|
||||
|
||||
use super::super::values::{NuDataFrame, NuLazyFrame};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Type, Value,
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -20,6 +23,12 @@ impl Command for ToLazyFrame {
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"schema",
|
||||
SyntaxShape::Record(vec![]),
|
||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
||||
Some('s'),
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
@ -34,12 +43,17 @@ impl Command for ToLazyFrame {
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_iter(input.into_iter())?;
|
||||
let maybe_schema = call
|
||||
.get_flag(engine_state, stack, "schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
|
||||
let df = NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)?;
|
||||
let lazy = NuLazyFrame::from_dataframe(df);
|
||||
let value = Value::custom_value(Box::new(lazy), call.head);
|
||||
|
||||
|
@ -33,10 +33,13 @@ impl Command for AllFalse {
|
||||
description: "Returns true if all values are false",
|
||||
example: "[false false false] | dfr into-df | dfr all-false",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_false".to_string(),
|
||||
vec![Value::test_bool(true)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -47,10 +50,13 @@ impl Command for AllFalse {
|
||||
let res = ($s > 9);
|
||||
$res | dfr all-false"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_false".to_string(),
|
||||
vec![Value::test_bool(false)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -88,7 +94,10 @@ fn command(
|
||||
|
||||
let value = Value::bool(!bool.any(), call.head);
|
||||
|
||||
NuDataFrame::try_from_columns(vec![Column::new("all_false".to_string(), vec![value])])
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new("all_false".to_string(), vec![value])],
|
||||
None,
|
||||
)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
|
@ -33,10 +33,13 @@ impl Command for AllTrue {
|
||||
description: "Returns true if all values are true",
|
||||
example: "[true true true] | dfr into-df | dfr all-true",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_true".to_string(),
|
||||
vec![Value::test_bool(true)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -47,10 +50,13 @@ impl Command for AllTrue {
|
||||
let res = ($s > 9);
|
||||
$res | dfr all-true"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_true".to_string(),
|
||||
vec![Value::test_bool(false)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -88,7 +94,7 @@ fn command(
|
||||
|
||||
let value = Value::bool(bool.all(), call.head);
|
||||
|
||||
NuDataFrame::try_from_columns(vec![Column::new("all_true".to_string(), vec![value])])
|
||||
NuDataFrame::try_from_columns(vec![Column::new("all_true".to_string(), vec![value])], None)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
|
@ -37,10 +37,10 @@ impl Command for ArgMax {
|
||||
description: "Returns index for max value",
|
||||
example: "[1 3 2] | dfr into-df | dfr arg-max",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"arg_max".to_string(),
|
||||
vec![Value::test_int(1)],
|
||||
)])
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new("arg_max".to_string(), vec![Value::test_int(1)])],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -37,10 +37,10 @@ impl Command for ArgMin {
|
||||
description: "Returns index for min value",
|
||||
example: "[1 3 2] | dfr into-df | dfr arg-min",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"arg_min".to_string(),
|
||||
vec![Value::test_int(0)],
|
||||
)])
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new("arg_min".to_string(), vec![Value::test_int(0)])],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -69,7 +69,8 @@ impl Command for Cumulative {
|
||||
description: "Cumulative sum for a series",
|
||||
example: "[1 2 3 4 5] | dfr into-df | dfr cumulative sum",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0_cumulative_sum".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
@ -78,7 +79,9 @@ impl Command for Cumulative {
|
||||
Value::test_int(10),
|
||||
Value::test_int(15),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -53,7 +53,8 @@ impl Command for AsDateTime {
|
||||
description: "Converts string to datetime",
|
||||
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"datetime".to_string(),
|
||||
vec![
|
||||
Value::date(
|
||||
@ -73,7 +74,9 @@ impl Command for AsDateTime {
|
||||
Span::test_data(),
|
||||
),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -82,7 +85,8 @@ impl Command for AsDateTime {
|
||||
description: "Converts string to datetime with high resolutions",
|
||||
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S.%9f""#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"datetime".to_string(),
|
||||
vec![
|
||||
Value::date(
|
||||
@ -102,7 +106,9 @@ impl Command for AsDateTime {
|
||||
Span::test_data(),
|
||||
),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for GetDay {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr get-day"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(4)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for GetHour {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr get-hour"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(16), Value::test_int(16)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for GetMinute {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr get-minute"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(39), Value::test_int(39)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for GetMonth {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr get-month"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(8), Value::test_int(8)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for GetNanosecond {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr get-nanosecond"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(0), Value::test_int(0)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for GetOrdinal {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr get-ordinal"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(217), Value::test_int(217)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for GetSecond {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr get-second"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(18), Value::test_int(18)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for GetWeek {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr get-week"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(32), Value::test_int(32)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for GetWeekDay {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr get-weekday"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -35,10 +35,13 @@ impl Command for GetYear {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr get-year"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(2020), Value::test_int(2020)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -46,7 +46,8 @@ impl Command for ArgSort {
|
||||
description: "Returns indexes for a sorted series",
|
||||
example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_sort".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
@ -55,7 +56,9 @@ impl Command for ArgSort {
|
||||
Value::test_int(3),
|
||||
Value::test_int(4),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -64,7 +67,8 @@ impl Command for ArgSort {
|
||||
description: "Returns indexes for a sorted series",
|
||||
example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort --reverse",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_sort".to_string(),
|
||||
vec![
|
||||
Value::test_int(3),
|
||||
@ -73,7 +77,9 @@ impl Command for ArgSort {
|
||||
Value::test_int(2),
|
||||
Value::test_int(0),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -37,10 +37,13 @@ impl Command for ArgTrue {
|
||||
description: "Returns indexes where values are true",
|
||||
example: "[false true false] | dfr into-df | dfr arg-true",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_true".to_string(),
|
||||
vec![Value::test_int(1)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -37,10 +37,13 @@ impl Command for ArgUnique {
|
||||
description: "Returns indexes for unique values",
|
||||
example: "[1 2 2 3 3] | dfr into-df | dfr arg-unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_unique".to_string(),
|
||||
vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -43,7 +43,8 @@ impl Command for SetWithIndex {
|
||||
let indices = ([0 2] | dfr into-df);
|
||||
$series | dfr set-with-idx 6 --indices $indices"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(6),
|
||||
@ -53,7 +54,9 @@ impl Command for SetWithIndex {
|
||||
Value::test_int(4),
|
||||
Value::test_int(3),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -34,7 +34,8 @@ impl Command for IsDuplicated {
|
||||
description: "Create mask indicating duplicated values",
|
||||
example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-duplicated",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_duplicated".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
@ -45,7 +46,9 @@ impl Command for IsDuplicated {
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -55,7 +58,8 @@ impl Command for IsDuplicated {
|
||||
example:
|
||||
"[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-duplicated",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_duplicated".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
@ -64,7 +68,9 @@ impl Command for IsDuplicated {
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -36,7 +36,8 @@ impl Command for IsIn {
|
||||
example: r#"let other = ([1 3 6] | dfr into-df);
|
||||
[5 6 6 6 8 8 8] | dfr into-df | dfr is-in $other"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_in".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
@ -47,7 +48,9 @@ impl Command for IsIn {
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -41,7 +41,8 @@ impl Command for IsNotNull {
|
||||
let res = ($s / $s);
|
||||
$res | dfr is-not-null"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_not_null".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
@ -49,7 +50,9 @@ impl Command for IsNotNull {
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -41,7 +41,8 @@ impl Command for IsNull {
|
||||
let res = ($s / $s);
|
||||
$res | dfr is-null"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_null".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
@ -49,7 +50,9 @@ impl Command for IsNull {
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -34,7 +34,8 @@ impl Command for IsUnique {
|
||||
description: "Create mask indicating unique values",
|
||||
example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_unique".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
@ -45,7 +46,9 @@ impl Command for IsUnique {
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -54,7 +57,8 @@ impl Command for IsUnique {
|
||||
description: "Create mask indicating duplicated rows in a dataframe",
|
||||
example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_unique".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
@ -63,7 +67,9 @@ impl Command for IsUnique {
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -34,14 +34,17 @@ impl Command for NotSeries {
|
||||
description: "Inverts boolean mask",
|
||||
example: "[true false true] | dfr into-df | dfr not",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -43,7 +43,8 @@ impl Command for SetSeries {
|
||||
let mask = ($s | dfr is-null);
|
||||
$s | dfr set 0 --mask $mask"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
@ -52,7 +53,9 @@ impl Command for SetSeries {
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -33,10 +33,13 @@ impl Command for NNull {
|
||||
example: r#"let s = ([1 1 0 0 3 3 4] | dfr into-df);
|
||||
($s / $s) | dfr count-null"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"count_null".to_string(),
|
||||
vec![Value::test_int(2)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -65,7 +68,10 @@ fn command(
|
||||
let res = df.as_series(call.head)?.null_count();
|
||||
let value = Value::int(res as i64, call.head);
|
||||
|
||||
NuDataFrame::try_from_columns(vec![Column::new("count_null".to_string(), vec![value])])
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new("count_null".to_string(), vec![value])],
|
||||
None,
|
||||
)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
|
@ -38,10 +38,13 @@ impl Command for NUnique {
|
||||
description: "Counts unique values",
|
||||
example: "[1 1 2 2 3 3 4] | dfr into-df | dfr n-unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"count_unique".to_string(),
|
||||
vec![Value::test_int(4)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -96,7 +99,10 @@ fn command(
|
||||
|
||||
let value = Value::int(res as i64, call.head);
|
||||
|
||||
NuDataFrame::try_from_columns(vec![Column::new("count_unique".to_string(), vec![value])])
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new("count_unique".to_string(), vec![value])],
|
||||
None,
|
||||
)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
|
@ -72,7 +72,8 @@ impl Command for Rolling {
|
||||
description: "Rolling sum for a series",
|
||||
example: "[1 2 3 4 5] | dfr into-df | dfr rolling sum 2 | dfr drop-nulls",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0_rolling_sum".to_string(),
|
||||
vec![
|
||||
Value::test_int(3),
|
||||
@ -80,7 +81,9 @@ impl Command for Rolling {
|
||||
Value::test_int(7),
|
||||
Value::test_int(9),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
@ -89,7 +92,8 @@ impl Command for Rolling {
|
||||
description: "Rolling max for a series",
|
||||
example: "[1 2 3 4 5] | dfr into-df | dfr rolling max 2 | dfr drop-nulls",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0_rolling_max".to_string(),
|
||||
vec![
|
||||
Value::test_int(2),
|
||||
@ -97,7 +101,9 @@ impl Command for Rolling {
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -44,10 +44,13 @@ impl Command for Shift {
|
||||
description: "Shifts the values by a given period",
|
||||
example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr drop-nulls",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2), Value::test_int(2)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -40,14 +40,17 @@ impl Command for Concatenate {
|
||||
example: r#"let other = ([za xs cd] | dfr into-df);
|
||||
[abc abc abc] | dfr into-df | dfr concatenate $other"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_string("abcza"),
|
||||
Value::test_string("abcxs"),
|
||||
Value::test_string("abccd"),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -39,14 +39,17 @@ impl Command for Contains {
|
||||
description: "Returns boolean indicating if pattern was found",
|
||||
example: "[abc acb acb] | dfr into-df | dfr contains ab",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -46,14 +46,17 @@ impl Command for Replace {
|
||||
description: "Replaces string",
|
||||
example: "[abc abc abc] | dfr into-df | dfr replace --pattern ab --replace AB",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_string("ABc"),
|
||||
Value::test_string("ABc"),
|
||||
Value::test_string("ABc"),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -46,14 +46,17 @@ impl Command for ReplaceAll {
|
||||
description: "Replaces string",
|
||||
example: "[abac abac abac] | dfr into-df | dfr replace-all --pattern a --replace A",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_string("AbAc"),
|
||||
Value::test_string("AbAc"),
|
||||
Value::test_string("AbAc"),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -33,10 +33,13 @@ impl Command for StrLengths {
|
||||
description: "Returns string lengths",
|
||||
example: "[a ab abc] | dfr into-df | dfr str-lengths",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -36,14 +36,17 @@ impl Command for StrSlice {
|
||||
description: "Creates slices from the strings",
|
||||
example: "[abcded abc321 abc123] | dfr into-df | dfr str-slice 1 --length 2",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_string("bc"),
|
||||
Value::test_string("bc"),
|
||||
Value::test_string("bc"),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -37,13 +37,16 @@ impl Command for StrFTime {
|
||||
let df = ([$dt $dt] | dfr into-df);
|
||||
$df | dfr strftime "%Y/%m/%d""#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_string("2020/08/04"),
|
||||
Value::test_string("2020/08/04"),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -33,14 +33,17 @@ impl Command for ToLowerCase {
|
||||
description: "Modifies strings to lowercase",
|
||||
example: "[Abc aBc abC] | dfr into-df | dfr lowercase",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_string("abc"),
|
||||
Value::test_string("abc"),
|
||||
Value::test_string("abc"),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -37,14 +37,17 @@ impl Command for ToUpperCase {
|
||||
description: "Modifies strings to uppercase",
|
||||
example: "[Abc aBc abC] | dfr into-df | dfr uppercase",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_string("ABC"),
|
||||
Value::test_string("ABC"),
|
||||
Value::test_string("ABC"),
|
||||
],
|
||||
)])
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -53,10 +53,10 @@ impl Command for Unique {
|
||||
description: "Returns unique values from a series",
|
||||
example: "[2 2 2 2 2] | dfr into-df | dfr unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(2)],
|
||||
)])
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new("0".to_string(), vec![Value::test_int(2)])],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -34,7 +34,8 @@ impl Command for ValueCount {
|
||||
description: "Calculates value counts",
|
||||
example: "[5 5 5 5 6 6] | dfr into-df | dfr value-counts",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(5), Value::test_int(6)],
|
||||
@ -43,7 +44,9 @@ impl Command for ValueCount {
|
||||
"count".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(2)],
|
||||
),
|
||||
])
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
|
@ -2,6 +2,7 @@ mod nu_dataframe;
|
||||
mod nu_expression;
|
||||
mod nu_lazyframe;
|
||||
mod nu_lazygroupby;
|
||||
mod nu_schema;
|
||||
mod nu_when;
|
||||
pub mod utils;
|
||||
|
||||
@ -9,4 +10,5 @@ pub use nu_dataframe::{Axis, Column, NuDataFrame};
|
||||
pub use nu_expression::NuExpression;
|
||||
pub use nu_lazyframe::NuLazyFrame;
|
||||
pub use nu_lazygroupby::NuLazyGroupBy;
|
||||
pub use nu_schema::NuSchema;
|
||||
pub use nu_when::NuWhen;
|
||||
|
@ -9,14 +9,17 @@ use polars::chunked_array::ChunkedArray;
|
||||
use polars::datatypes::AnyValue;
|
||||
use polars::export::arrow::Either;
|
||||
use polars::prelude::{
|
||||
DataFrame, DataType, DatetimeChunked, Float64Type, Int64Type, IntoSeries,
|
||||
ListBooleanChunkedBuilder, ListBuilderTrait, ListPrimitiveChunkedBuilder,
|
||||
ListStringChunkedBuilder, ListType, NamedFrom, NewChunkedArray, ObjectType, Series,
|
||||
TemporalMethods, TimeUnit,
|
||||
DataFrame, DataType, DatetimeChunked, Float32Type, Float64Type, Int16Type, Int32Type,
|
||||
Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder, ListBuilderTrait,
|
||||
ListPrimitiveChunkedBuilder, ListStringChunkedBuilder, ListType, NamedFrom, NewChunkedArray,
|
||||
ObjectType, Schema, Series, StructChunked, TemporalMethods, TimeUnit, UInt16Type, UInt32Type,
|
||||
UInt64Type, UInt8Type,
|
||||
};
|
||||
|
||||
use nu_protocol::{Record, ShellError, Span, Value};
|
||||
|
||||
use crate::dataframe::values::NuSchema;
|
||||
|
||||
use super::{DataFrameValue, NuDataFrame};
|
||||
|
||||
const NANOS_PER_DAY: i64 = 86_400_000_000_000;
|
||||
@ -28,6 +31,39 @@ const NANOS_PER_DAY: i64 = 86_400_000_000_000;
|
||||
// practical reasons (~ a few thousand rows).
|
||||
const VALUES_CAPACITY: usize = 10;
|
||||
|
||||
macro_rules! value_to_primitive {
|
||||
($value:ident, u8) => {
|
||||
$value.as_i64().map(|v| v as u8)
|
||||
};
|
||||
($value:ident, u16) => {
|
||||
$value.as_i64().map(|v| v as u16)
|
||||
};
|
||||
($value:ident, u32) => {
|
||||
$value.as_i64().map(|v| v as u32)
|
||||
};
|
||||
($value:ident, u64) => {
|
||||
$value.as_i64().map(|v| v as u64)
|
||||
};
|
||||
($value:ident, i8) => {
|
||||
$value.as_i64().map(|v| v as i8)
|
||||
};
|
||||
($value:ident, i16) => {
|
||||
$value.as_i64().map(|v| v as i16)
|
||||
};
|
||||
($value:ident, i32) => {
|
||||
$value.as_i64().map(|v| v as i32)
|
||||
};
|
||||
($value:ident, i64) => {
|
||||
$value.as_i64()
|
||||
};
|
||||
($value:ident, f32) => {
|
||||
$value.as_f64().map(|v| v as f32)
|
||||
};
|
||||
($value:ident, f64) => {
|
||||
$value.as_f64()
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Column {
|
||||
name: String,
|
||||
@ -74,23 +110,10 @@ impl DerefMut for Column {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum InputType {
|
||||
Integer,
|
||||
Float,
|
||||
String,
|
||||
Boolean,
|
||||
Object,
|
||||
Date,
|
||||
Duration,
|
||||
Filesize,
|
||||
List(Box<InputType>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TypedColumn {
|
||||
column: Column,
|
||||
column_type: Option<InputType>,
|
||||
column_type: Option<DataType>,
|
||||
}
|
||||
|
||||
impl TypedColumn {
|
||||
@ -144,9 +167,13 @@ pub fn add_separator(values: &mut Vec<Value>, df: &DataFrame, span: Span) {
|
||||
}
|
||||
|
||||
// Inserting the values found in a Value::List or Value::Record
|
||||
pub fn insert_record(column_values: &mut ColumnMap, record: Record) -> Result<(), ShellError> {
|
||||
pub fn insert_record(
|
||||
column_values: &mut ColumnMap,
|
||||
record: Record,
|
||||
maybe_schema: &Option<NuSchema>,
|
||||
) -> Result<(), ShellError> {
|
||||
for (col, value) in record {
|
||||
insert_value(value, col, column_values)?;
|
||||
insert_value(value, col, column_values, maybe_schema)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@ -156,16 +183,26 @@ pub fn insert_value(
|
||||
value: Value,
|
||||
key: String,
|
||||
column_values: &mut ColumnMap,
|
||||
maybe_schema: &Option<NuSchema>,
|
||||
) -> Result<(), ShellError> {
|
||||
let col_val = match column_values.entry(key.clone()) {
|
||||
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)),
|
||||
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key.clone())),
|
||||
Entry::Occupied(entry) => entry.into_mut(),
|
||||
};
|
||||
|
||||
// Checking that the type for the value is the same
|
||||
// for the previous value in the column
|
||||
if col_val.values.is_empty() {
|
||||
col_val.column_type = Some(value_to_input_type(&value));
|
||||
if let Some(schema) = maybe_schema {
|
||||
if let Some(field) = schema.schema.get_field(&key) {
|
||||
col_val.column_type = Some(field.data_type().clone());
|
||||
}
|
||||
}
|
||||
|
||||
if col_val.column_type.is_none() {
|
||||
col_val.column_type = Some(value_to_data_type(&value));
|
||||
}
|
||||
|
||||
col_val.values.push(value);
|
||||
} else {
|
||||
let prev_value = &col_val.values[col_val.values.len() - 1];
|
||||
@ -179,11 +216,11 @@ pub fn insert_value(
|
||||
| (Value::Filesize { .. }, Value::Filesize { .. })
|
||||
| (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value),
|
||||
(Value::List { .. }, _) => {
|
||||
col_val.column_type = Some(value_to_input_type(&value));
|
||||
col_val.column_type = Some(value_to_data_type(&value));
|
||||
col_val.values.push(value);
|
||||
}
|
||||
_ => {
|
||||
col_val.column_type = Some(InputType::Object);
|
||||
col_val.column_type = Some(DataType::Object("Value", None));
|
||||
col_val.values.push(value);
|
||||
}
|
||||
}
|
||||
@ -192,15 +229,15 @@ pub fn insert_value(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn value_to_input_type(value: &Value) -> InputType {
|
||||
fn value_to_data_type(value: &Value) -> DataType {
|
||||
match &value {
|
||||
Value::Int { .. } => InputType::Integer,
|
||||
Value::Float { .. } => InputType::Float,
|
||||
Value::String { .. } => InputType::String,
|
||||
Value::Bool { .. } => InputType::Boolean,
|
||||
Value::Date { .. } => InputType::Date,
|
||||
Value::Duration { .. } => InputType::Duration,
|
||||
Value::Filesize { .. } => InputType::Filesize,
|
||||
Value::Int { .. } => DataType::Int64,
|
||||
Value::Float { .. } => DataType::Float64,
|
||||
Value::String { .. } => DataType::String,
|
||||
Value::Bool { .. } => DataType::Boolean,
|
||||
Value::Date { .. } => DataType::Date,
|
||||
Value::Duration { .. } => DataType::Duration(TimeUnit::Nanoseconds),
|
||||
Value::Filesize { .. } => DataType::Int64,
|
||||
Value::List { vals, .. } => {
|
||||
// We need to determined the type inside of the list.
|
||||
// Since Value::List does not have any kind of
|
||||
@ -211,13 +248,213 @@ fn value_to_input_type(value: &Value) -> InputType {
|
||||
let list_type = vals
|
||||
.iter()
|
||||
.filter(|v| !matches!(v, Value::Nothing { .. }))
|
||||
.map(value_to_input_type)
|
||||
.map(value_to_data_type)
|
||||
.nth(1)
|
||||
.unwrap_or(InputType::Object);
|
||||
.unwrap_or(DataType::Object("Value", None));
|
||||
|
||||
InputType::List(Box::new(list_type))
|
||||
DataType::List(Box::new(list_type))
|
||||
}
|
||||
_ => InputType::Object,
|
||||
_ => DataType::Object("Value", None),
|
||||
}
|
||||
}
|
||||
|
||||
fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, ShellError> {
|
||||
if let Some(column_type) = &column.column_type {
|
||||
match column_type {
|
||||
DataType::Float32 => {
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_f64().map(|v| v as f32))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::Float64 => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_f64()).collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::UInt8 => {
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as u8))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::UInt16 => {
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as u16))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::UInt32 => {
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as u32))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::UInt64 => {
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as u64))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::Int8 => {
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as i8))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::Int16 => {
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as i16))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::Int32 => {
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as i32))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::Int64 => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_i64()).collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::Boolean => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_bool()).collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::String => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_string()).collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::Object(_, _) => value_to_series(name, &column.values),
|
||||
DataType::Duration(time_unit) => {
|
||||
//todo - finish type conversion
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| nanos_from_timeunit(v, *time_unit)))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::List(list_type) => {
|
||||
match input_type_list_to_series(name, list_type.as_ref(), &column.values) {
|
||||
Ok(series) => Ok(series),
|
||||
Err(_) => {
|
||||
// An error case will occur when there are lists of mixed types.
|
||||
// If this happens, fallback to object list
|
||||
input_type_list_to_series(
|
||||
name,
|
||||
&DataType::Object("unknown", None),
|
||||
&column.values,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
DataType::Date => {
|
||||
let it = column.values.iter().map(|v| {
|
||||
if let Value::Date { val, .. } = &v {
|
||||
Some(val.timestamp_nanos_opt().unwrap_or_default())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let res: DatetimeChunked = ChunkedArray::<Int64Type>::from_iter_options(name, it)
|
||||
.into_datetime(TimeUnit::Nanoseconds, None);
|
||||
|
||||
Ok(res.into_series())
|
||||
}
|
||||
DataType::Datetime(tu, maybe_tz) => {
|
||||
let dates = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::Date { val, .. } = &v {
|
||||
// If there is a timezone specified, make sure
|
||||
// the value is converted to it
|
||||
Ok(maybe_tz
|
||||
.as_ref()
|
||||
.map(|tz| tz.parse::<Tz>().map(|tz| val.with_timezone(&tz)))
|
||||
.transpose()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error parsing timezone".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: Some(e.to_string()),
|
||||
inner: vec![],
|
||||
})?
|
||||
.and_then(|dt| dt.timestamp_nanos_opt())
|
||||
.map(|nanos| nanos_from_timeunit(nanos, *tu)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<Option<i64>>, ShellError>>()?;
|
||||
|
||||
let res: DatetimeChunked =
|
||||
ChunkedArray::<Int64Type>::from_iter_options(name, dates.into_iter())
|
||||
.into_datetime(*tu, maybe_tz.clone());
|
||||
|
||||
Ok(res.into_series())
|
||||
}
|
||||
DataType::Struct(fields) => {
|
||||
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone())));
|
||||
let mut structs: Vec<Series> = Vec::new();
|
||||
|
||||
for v in column.values.iter() {
|
||||
let mut column_values: ColumnMap = IndexMap::new();
|
||||
let record = v.as_record()?;
|
||||
insert_record(&mut column_values, record.clone(), &schema)?;
|
||||
let df = from_parsed_columns(column_values)?;
|
||||
structs.push(df.as_series(Span::unknown())?);
|
||||
}
|
||||
|
||||
let chunked = StructChunked::new(column.name(), structs.as_ref()).map_err(|e| {
|
||||
ShellError::GenericError {
|
||||
error: format!("Error creating struct: {e}"),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}
|
||||
})?;
|
||||
Ok(chunked.into_series())
|
||||
}
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: format!("Error creating dataframe: Unsupported type: {column_type:?}"),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}
|
||||
} else {
|
||||
Err(ShellError::GenericError {
|
||||
error: "Passed a type column with no type".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -227,80 +464,22 @@ fn value_to_input_type(value: &Value) -> InputType {
|
||||
pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, ShellError> {
|
||||
let mut df_series: Vec<Series> = Vec::new();
|
||||
for (name, column) in column_values {
|
||||
if let Some(column_type) = &column.column_type {
|
||||
match column_type {
|
||||
InputType::Float => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_f64()).collect();
|
||||
let series = Series::new(&name, series_values?);
|
||||
df_series.push(series)
|
||||
}
|
||||
InputType::Integer | InputType::Filesize | InputType::Duration => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_i64()).collect();
|
||||
let series = Series::new(&name, series_values?);
|
||||
df_series.push(series)
|
||||
}
|
||||
InputType::String => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_string()).collect();
|
||||
let series = Series::new(&name, series_values?);
|
||||
df_series.push(series)
|
||||
}
|
||||
InputType::Boolean => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_bool()).collect();
|
||||
let series = Series::new(&name, series_values?);
|
||||
df_series.push(series)
|
||||
}
|
||||
InputType::Object => {
|
||||
df_series.push(input_type_object_to_series(&name, &column.values)?)
|
||||
}
|
||||
InputType::List(list_type) => {
|
||||
match input_type_list_to_series(&name, list_type.as_ref(), &column.values) {
|
||||
Ok(series) => df_series.push(series),
|
||||
Err(_) => {
|
||||
// An error case will occur when there are lists of mixed types.
|
||||
// If this happens, fallback to object list
|
||||
df_series.push(input_type_list_to_series(
|
||||
&name,
|
||||
&InputType::Object,
|
||||
&column.values,
|
||||
)?)
|
||||
}
|
||||
}
|
||||
}
|
||||
InputType::Date => {
|
||||
let it = column.values.iter().map(|v| {
|
||||
if let Value::Date { val, .. } = &v {
|
||||
Some(val.timestamp_nanos_opt().unwrap_or_default())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let res: DatetimeChunked =
|
||||
ChunkedArray::<Int64Type>::from_iter_options(&name, it)
|
||||
.into_datetime(TimeUnit::Nanoseconds, None);
|
||||
|
||||
df_series.push(res.into_series())
|
||||
}
|
||||
}
|
||||
}
|
||||
let series = typed_column_to_series(&name, column)?;
|
||||
df_series.push(series);
|
||||
}
|
||||
|
||||
DataFrame::new(df_series)
|
||||
.map(|df| NuDataFrame::new(false, df))
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error creating dataframe".into(),
|
||||
msg: "".into(),
|
||||
msg: e.to_string(),
|
||||
span: None,
|
||||
help: Some(e.to_string()),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
fn input_type_object_to_series(name: &str, values: &[Value]) -> Result<Series, ShellError> {
|
||||
fn value_to_series(name: &str, values: &[Value]) -> Result<Series, ShellError> {
|
||||
let mut builder = ObjectChunkedBuilder::<DataFrameValue>::new(name, values.len());
|
||||
|
||||
for v in values {
|
||||
@ -313,21 +492,45 @@ fn input_type_object_to_series(name: &str, values: &[Value]) -> Result<Series, S
|
||||
|
||||
fn input_type_list_to_series(
|
||||
name: &str,
|
||||
list_type: &InputType,
|
||||
data_type: &DataType,
|
||||
values: &[Value],
|
||||
) -> Result<Series, ShellError> {
|
||||
let inconsistent_error = |_| ShellError::GenericError {
|
||||
error: format!(
|
||||
"column {name} contains a list with inconsistent types: Expecting: {list_type:?}"
|
||||
"column {name} contains a list with inconsistent types: Expecting: {data_type:?}"
|
||||
),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
};
|
||||
match *list_type {
|
||||
|
||||
macro_rules! primitive_list_series {
|
||||
($list_type:ty, $vec_type:tt) => {{
|
||||
let mut builder = ListPrimitiveChunkedBuilder::<$list_type>::new(
|
||||
name,
|
||||
values.len(),
|
||||
VALUES_CAPACITY,
|
||||
data_type.clone(),
|
||||
);
|
||||
|
||||
for v in values {
|
||||
let value_list = v
|
||||
.as_list()?
|
||||
.iter()
|
||||
.map(|v| value_to_primitive!(v, $vec_type))
|
||||
.collect::<Result<Vec<$vec_type>, _>>()
|
||||
.map_err(inconsistent_error)?;
|
||||
builder.append_iter_values(value_list.iter().copied());
|
||||
}
|
||||
let res = builder.finish();
|
||||
Ok(res.into_series())
|
||||
}};
|
||||
}
|
||||
|
||||
match *data_type {
|
||||
// list of boolean values
|
||||
InputType::Boolean => {
|
||||
DataType::Boolean => {
|
||||
let mut builder = ListBooleanChunkedBuilder::new(name, values.len(), VALUES_CAPACITY);
|
||||
for v in values {
|
||||
let value_list = v
|
||||
@ -341,52 +544,18 @@ fn input_type_list_to_series(
|
||||
let res = builder.finish();
|
||||
Ok(res.into_series())
|
||||
}
|
||||
// list of values that reduce down to i64
|
||||
InputType::Integer | InputType::Filesize | InputType::Duration => {
|
||||
let logical_type = match list_type {
|
||||
InputType::Duration => DataType::Duration(TimeUnit::Milliseconds),
|
||||
_ => DataType::Int64,
|
||||
};
|
||||
|
||||
let mut builder = ListPrimitiveChunkedBuilder::<Int64Type>::new(
|
||||
name,
|
||||
values.len(),
|
||||
VALUES_CAPACITY,
|
||||
logical_type,
|
||||
);
|
||||
|
||||
for v in values {
|
||||
let value_list = v
|
||||
.as_list()?
|
||||
.iter()
|
||||
.map(|v| v.as_i64())
|
||||
.collect::<Result<Vec<i64>, _>>()
|
||||
.map_err(inconsistent_error)?;
|
||||
builder.append_iter_values(value_list.iter().copied());
|
||||
}
|
||||
let res = builder.finish();
|
||||
Ok(res.into_series())
|
||||
}
|
||||
InputType::Float => {
|
||||
let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
|
||||
name,
|
||||
values.len(),
|
||||
VALUES_CAPACITY,
|
||||
DataType::Float64,
|
||||
);
|
||||
for v in values {
|
||||
let value_list = v
|
||||
.as_list()?
|
||||
.iter()
|
||||
.map(|v| v.as_f64())
|
||||
.collect::<Result<Vec<f64>, _>>()
|
||||
.map_err(inconsistent_error)?;
|
||||
builder.append_iter_values(value_list.iter().copied());
|
||||
}
|
||||
let res = builder.finish();
|
||||
Ok(res.into_series())
|
||||
}
|
||||
InputType::String => {
|
||||
DataType::Duration(_) => primitive_list_series!(Int64Type, i64),
|
||||
DataType::UInt8 => primitive_list_series!(UInt8Type, u8),
|
||||
DataType::UInt16 => primitive_list_series!(UInt16Type, u16),
|
||||
DataType::UInt32 => primitive_list_series!(UInt32Type, u32),
|
||||
DataType::UInt64 => primitive_list_series!(UInt64Type, u64),
|
||||
DataType::Int8 => primitive_list_series!(Int8Type, i8),
|
||||
DataType::Int16 => primitive_list_series!(Int16Type, i16),
|
||||
DataType::Int32 => primitive_list_series!(Int32Type, i32),
|
||||
DataType::Int64 => primitive_list_series!(Int64Type, i64),
|
||||
DataType::Float32 => primitive_list_series!(Float32Type, f32),
|
||||
DataType::Float64 => primitive_list_series!(Float64Type, f64),
|
||||
DataType::String => {
|
||||
let mut builder = ListStringChunkedBuilder::new(name, values.len(), VALUES_CAPACITY);
|
||||
for v in values {
|
||||
let value_list = v
|
||||
@ -400,9 +569,7 @@ fn input_type_list_to_series(
|
||||
let res = builder.finish();
|
||||
Ok(res.into_series())
|
||||
}
|
||||
// Treat lists as objects at this depth as it is expensive to calculate the list type
|
||||
// We can revisit this later if necessary
|
||||
InputType::Date => {
|
||||
DataType::Date => {
|
||||
let mut builder = AnonymousOwnedListBuilder::new(
|
||||
name,
|
||||
values.len(),
|
||||
@ -434,11 +601,11 @@ fn input_type_list_to_series(
|
||||
let res = builder.finish();
|
||||
Ok(res.into_series())
|
||||
}
|
||||
InputType::List(ref sub_list_type) => {
|
||||
DataType::List(ref sub_list_type) => {
|
||||
Ok(input_type_list_to_series(name, sub_list_type, values)?)
|
||||
}
|
||||
// treat everything else as an object
|
||||
_ => Ok(input_type_object_to_series(name, values)?),
|
||||
_ => Ok(value_to_series(name, values)?),
|
||||
}
|
||||
}
|
||||
|
||||
@ -1081,7 +1248,7 @@ mod tests {
|
||||
};
|
||||
let typed_column = TypedColumn {
|
||||
column,
|
||||
column_type: Some(InputType::List(Box::new(InputType::String))),
|
||||
column_type: Some(DataType::List(Box::new(DataType::String))),
|
||||
};
|
||||
|
||||
let column_map = indexmap!("foo".to_string() => typed_column);
|
||||
|
@ -13,7 +13,7 @@ use polars_utils::total_ord::TotalEq;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
|
||||
|
||||
use super::{utils::DEFAULT_ROWS, NuLazyFrame};
|
||||
use super::{nu_schema::NuSchema, utils::DEFAULT_ROWS, NuLazyFrame};
|
||||
|
||||
// DataFrameValue is an encapsulation of Nushell Value that can be used
|
||||
// to define the PolarsObject Trait. The polars object trait allows to
|
||||
@ -141,7 +141,7 @@ impl NuDataFrame {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
|
||||
pub fn try_from_iter<T>(iter: T, maybe_schema: Option<NuSchema>) -> Result<Self, ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
@ -161,14 +161,15 @@ impl NuDataFrame {
|
||||
conversion::insert_record(
|
||||
&mut column_values,
|
||||
Record::from_raw_cols_vals(cols, vals),
|
||||
&maybe_schema,
|
||||
)?
|
||||
}
|
||||
Value::Record { val: record, .. } => {
|
||||
conversion::insert_record(&mut column_values, record)?
|
||||
conversion::insert_record(&mut column_values, record, &maybe_schema)?
|
||||
}
|
||||
_ => {
|
||||
let key = "0".to_string();
|
||||
conversion::insert_value(value, key, &mut column_values)?
|
||||
conversion::insert_value(value, key, &mut column_values, &maybe_schema)?
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -188,13 +189,16 @@ impl NuDataFrame {
|
||||
Ok(Self::new(false, dataframe))
|
||||
}
|
||||
|
||||
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
|
||||
pub fn try_from_columns(
|
||||
columns: Vec<Column>,
|
||||
maybe_schema: Option<NuSchema>,
|
||||
) -> Result<Self, ShellError> {
|
||||
let mut column_values: ColumnMap = IndexMap::new();
|
||||
|
||||
for column in columns {
|
||||
let name = column.name().to_string();
|
||||
for value in column {
|
||||
conversion::insert_value(value, name.clone(), &mut column_values)?;
|
||||
conversion::insert_value(value, name.clone(), &mut column_values, &maybe_schema)?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -503,4 +507,8 @@ impl NuDataFrame {
|
||||
|
||||
Some(Ordering::Equal)
|
||||
}
|
||||
|
||||
pub fn schema(&self) -> NuSchema {
|
||||
NuSchema::new(self.df.schema())
|
||||
}
|
||||
}
|
||||
|
397
crates/nu-cmd-dataframe/src/dataframe/values/nu_schema.rs
Normal file
397
crates/nu-cmd-dataframe/src/dataframe/values/nu_schema.rs
Normal file
@ -0,0 +1,397 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use nu_protocol::{Record, ShellError, Span, Value};
|
||||
use polars::prelude::{DataType, Field, Schema, SchemaRef, TimeUnit};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NuSchema {
|
||||
pub schema: SchemaRef,
|
||||
}
|
||||
|
||||
impl NuSchema {
|
||||
pub fn new(schema: Schema) -> Self {
|
||||
Self {
|
||||
schema: Arc::new(schema),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&Value> for NuSchema {
|
||||
type Error = ShellError;
|
||||
fn try_from(value: &Value) -> Result<Self, Self::Error> {
|
||||
let schema = value_to_schema(value, Span::unknown())?;
|
||||
Ok(Self::new(schema))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NuSchema> for Value {
|
||||
fn from(schema: NuSchema) -> Self {
|
||||
fields_to_value(schema.schema.iter_fields(), Span::unknown())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NuSchema> for SchemaRef {
|
||||
fn from(val: NuSchema) -> Self {
|
||||
Arc::clone(&val.schema)
|
||||
}
|
||||
}
|
||||
|
||||
fn fields_to_value(fields: impl Iterator<Item = Field>, span: Span) -> Value {
|
||||
let (cols, vals) = fields
|
||||
.map(|field| {
|
||||
let val = dtype_to_value(field.data_type(), span);
|
||||
let col = field.name().to_string();
|
||||
(col, val)
|
||||
})
|
||||
.unzip();
|
||||
|
||||
let record = Record::from_raw_cols_vals(cols, vals);
|
||||
Value::record(record, Span::unknown())
|
||||
}
|
||||
|
||||
fn dtype_to_value(dtype: &DataType, span: Span) -> Value {
|
||||
match dtype {
|
||||
DataType::Struct(fields) => fields_to_value(fields.iter().cloned(), span),
|
||||
_ => Value::string(dtype.to_string().replace('[', "<").replace(']', ">"), span),
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_schema(value: &Value, span: Span) -> Result<Schema, ShellError> {
|
||||
let fields = value_to_fields(value, span)?;
|
||||
let schema = Schema::from_iter(fields);
|
||||
Ok(schema)
|
||||
}
|
||||
|
||||
fn value_to_fields(value: &Value, span: Span) -> Result<Vec<Field>, ShellError> {
|
||||
let fields = value
|
||||
.as_record()?
|
||||
.into_iter()
|
||||
.map(|(col, val)| match val {
|
||||
Value::Record { .. } => {
|
||||
let fields = value_to_fields(val, span)?;
|
||||
let dtype = DataType::Struct(fields);
|
||||
Ok(Field::new(col, dtype))
|
||||
}
|
||||
_ => {
|
||||
let dtype = dtype_str_to_schema(&val.as_string()?, span)?;
|
||||
Ok(Field::new(col, dtype))
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<Field>, ShellError>>()?;
|
||||
Ok(fields)
|
||||
}
|
||||
|
||||
fn dtype_str_to_schema(dtype: &str, span: Span) -> Result<DataType, ShellError> {
|
||||
match dtype {
|
||||
"bool" => Ok(DataType::Boolean),
|
||||
"u8" => Ok(DataType::UInt8),
|
||||
"u16" => Ok(DataType::UInt16),
|
||||
"u32" => Ok(DataType::UInt32),
|
||||
"u64" => Ok(DataType::UInt64),
|
||||
"i8" => Ok(DataType::Int8),
|
||||
"i16" => Ok(DataType::Int16),
|
||||
"i32" => Ok(DataType::Int32),
|
||||
"i64" => Ok(DataType::Int64),
|
||||
"f32" => Ok(DataType::Float32),
|
||||
"f64" => Ok(DataType::Float64),
|
||||
"str" => Ok(DataType::String),
|
||||
"binary" => Ok(DataType::Binary),
|
||||
"date" => Ok(DataType::Date),
|
||||
"time" => Ok(DataType::Time),
|
||||
"null" => Ok(DataType::Null),
|
||||
"unknown" => Ok(DataType::Unknown),
|
||||
"object" => Ok(DataType::Object("unknown", None)),
|
||||
_ if dtype.starts_with("list") => {
|
||||
let dtype = dtype
|
||||
.trim_start_matches("list")
|
||||
.trim_start_matches('<')
|
||||
.trim_end_matches('>')
|
||||
.trim();
|
||||
let dtype = dtype_str_to_schema(dtype, span)?;
|
||||
Ok(DataType::List(Box::new(dtype)))
|
||||
}
|
||||
_ if dtype.starts_with("datetime") => {
|
||||
let dtype = dtype
|
||||
.trim_start_matches("datetime")
|
||||
.trim_start_matches('<')
|
||||
.trim_end_matches('>');
|
||||
let mut split = dtype.split(',');
|
||||
let next = split
|
||||
.next()
|
||||
.ok_or_else(|| ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: "Missing time unit".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.trim();
|
||||
let time_unit = str_to_time_unit(next, span)?;
|
||||
let next = split
|
||||
.next()
|
||||
.ok_or_else(|| ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: "Missing time zone".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.trim();
|
||||
let timezone = if "*" == next {
|
||||
None
|
||||
} else {
|
||||
Some(next.to_string())
|
||||
};
|
||||
Ok(DataType::Datetime(time_unit, timezone))
|
||||
}
|
||||
_ if dtype.starts_with("duration") => {
|
||||
let inner = dtype.trim_start_matches("duration<").trim_end_matches('>');
|
||||
let next = inner
|
||||
.split(',')
|
||||
.next()
|
||||
.ok_or_else(|| ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: "Missing time unit".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.trim();
|
||||
let time_unit = str_to_time_unit(next, span)?;
|
||||
Ok(DataType::Duration(time_unit))
|
||||
}
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: format!("Unknown type: {dtype}"),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn str_to_time_unit(ts_string: &str, span: Span) -> Result<TimeUnit, ShellError> {
|
||||
match ts_string {
|
||||
"ms" => Ok(TimeUnit::Milliseconds),
|
||||
"us" | "μs" => Ok(TimeUnit::Microseconds),
|
||||
"ns" => Ok(TimeUnit::Nanoseconds),
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: "Invalid time unit".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_value_to_schema() {
|
||||
let value = Value::Record {
|
||||
val: Record::from_raw_cols_vals(
|
||||
vec!["name".into(), "age".into(), "address".into()],
|
||||
vec![
|
||||
Value::String {
|
||||
val: "str".into(),
|
||||
internal_span: Span::test_data(),
|
||||
},
|
||||
Value::String {
|
||||
val: "i32".into(),
|
||||
internal_span: Span::test_data(),
|
||||
},
|
||||
Value::Record {
|
||||
val: Record::from_raw_cols_vals(
|
||||
vec!["street".into(), "city".into()],
|
||||
vec![
|
||||
Value::String {
|
||||
val: "str".into(),
|
||||
internal_span: Span::test_data(),
|
||||
},
|
||||
Value::String {
|
||||
val: "str".into(),
|
||||
internal_span: Span::test_data(),
|
||||
},
|
||||
],
|
||||
),
|
||||
internal_span: Span::test_data(),
|
||||
},
|
||||
],
|
||||
),
|
||||
internal_span: Span::test_data(),
|
||||
};
|
||||
let schema = value_to_schema(&value, Span::unknown()).unwrap();
|
||||
let expected = Schema::from_iter(vec![
|
||||
Field::new("name", DataType::String),
|
||||
Field::new("age", DataType::Int32),
|
||||
Field::new(
|
||||
"address",
|
||||
DataType::Struct(vec![
|
||||
Field::new("street", DataType::String),
|
||||
Field::new("city", DataType::String),
|
||||
]),
|
||||
),
|
||||
]);
|
||||
assert_eq!(schema, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dtype_str_to_schema_simple_types() {
|
||||
let dtype = "bool";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Boolean;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "u8";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::UInt8;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "u16";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::UInt16;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "u32";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::UInt32;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "u64";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::UInt64;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "i8";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Int8;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "i16";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Int16;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "i32";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Int32;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "i64";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Int64;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "str";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::String;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "binary";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Binary;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "date";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Date;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "time";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Time;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "null";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Null;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "unknown";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Unknown;
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "object";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Object("unknown", None);
|
||||
assert_eq!(schema, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dtype_str_schema_datetime() {
|
||||
let dtype = "datetime<ms, *>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Datetime(TimeUnit::Milliseconds, None);
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "datetime<us, *>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Datetime(TimeUnit::Microseconds, None);
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "datetime<μs, *>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Datetime(TimeUnit::Microseconds, None);
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "datetime<ns, *>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Datetime(TimeUnit::Nanoseconds, None);
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "datetime<ms, UTC>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Datetime(TimeUnit::Milliseconds, Some("UTC".into()));
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "invalid";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown());
|
||||
assert!(schema.is_err())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dtype_str_schema_duration() {
|
||||
let dtype = "duration<ms>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Duration(TimeUnit::Milliseconds);
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "duration<us>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Duration(TimeUnit::Microseconds);
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "duration<μs>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Duration(TimeUnit::Microseconds);
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "duration<ns>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Duration(TimeUnit::Nanoseconds);
|
||||
assert_eq!(schema, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dtype_str_to_schema_list_types() {
|
||||
let dtype = "list<i32>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::List(Box::new(DataType::Int32));
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "list<duration<ms>>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::List(Box::new(DataType::Duration(TimeUnit::Milliseconds)));
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "list<datetime<ms, *>>";
|
||||
let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::List(Box::new(DataType::Datetime(TimeUnit::Milliseconds, None)));
|
||||
assert_eq!(schema, expected);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user