mirror of
https://github.com/nushell/nushell.git
synced 2025-06-20 09:58:15 +02:00
polars into-df
/polars into-lazy
: --schema
will not throw error if only some columns are defined (#15473)
# Description The current implementation of `polars into-df` and `polars into-lazy` will throw an error if `--schema` is provided but not all columns are defined. This PR seeks to remove this requirement so that when a partial `--schema` is provided, the types on the defined columns are overridden while the remaining columns take on their default types. **Current Implementation** ``` $ [[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s {a: str} | polars schema Error: × Schema does not contain column: b ╭─[entry #88:1:12] 1 │ [[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s {a: str} | polars schema · ───── ╰──── ``` **New Implementation (no error thrown on partial schema definition)** Column b is not defined in `--schema` ``` $ [[a b]; [1 "foo"] [2 "bar"]] | polars into-df --schema {a: str} | polars schema ╭───┬─────╮ │ a │ str │ │ b │ str │ ╰───┴─────╯ ``` # User-Facing Changes Soft breaking change: The user's previous (erroneous) code that would have thrown an error would no longer throw an error. The user's previous working code will still work. # Tests + Formatting # After Submitting
This commit is contained in:
parent
12a1eefe73
commit
147009a161
@ -35,7 +35,7 @@ impl PluginCommand for ToDataFrame {
|
|||||||
.named(
|
.named(
|
||||||
"schema",
|
"schema",
|
||||||
SyntaxShape::Record(vec![]),
|
SyntaxShape::Record(vec![]),
|
||||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
r#"Polars Schema in format [{name: str}]."#,
|
||||||
Some('s'),
|
Some('s'),
|
||||||
)
|
)
|
||||||
.switch(
|
.switch(
|
||||||
@ -193,6 +193,16 @@ impl PluginCommand for ToDataFrame {
|
|||||||
.expect("simple df for test should not fail")
|
.expect("simple df for test should not fail")
|
||||||
.into_value(Span::test_data()),
|
.into_value(Span::test_data()),
|
||||||
),
|
),
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "If a provided schema specifies a subset of columns, only those columns are selected",
|
||||||
|
example: r#"[[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s {a: str}"#,
|
||||||
|
result: Some(NuDataFrame::try_from_series_vec(vec![
|
||||||
|
Series::new("a".into(), ["1", "2"]),
|
||||||
|
], Span::test_data())
|
||||||
|
.expect("simple df for test should not fail")
|
||||||
|
.into_value(Span::test_data()),
|
||||||
|
),
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,9 @@ use crate::{dataframe::values::NuSchema, values::CustomValueSupport, Cacheable,
|
|||||||
use crate::values::{NuDataFrame, NuLazyFrame};
|
use crate::values::{NuDataFrame, NuLazyFrame};
|
||||||
|
|
||||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||||
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type};
|
use nu_protocol::{
|
||||||
|
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct ToLazyFrame;
|
pub struct ToLazyFrame;
|
||||||
@ -24,7 +26,7 @@ impl PluginCommand for ToLazyFrame {
|
|||||||
.named(
|
.named(
|
||||||
"schema",
|
"schema",
|
||||||
SyntaxShape::Record(vec![]),
|
SyntaxShape::Record(vec![]),
|
||||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
r#"Polars Schema in format [{name: str}]."#,
|
||||||
Some('s'),
|
Some('s'),
|
||||||
)
|
)
|
||||||
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
|
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
|
||||||
@ -40,7 +42,7 @@ impl PluginCommand for ToLazyFrame {
|
|||||||
Example {
|
Example {
|
||||||
description: "Takes a table, creates a lazyframe, assigns column 'b' type str, displays the schema",
|
description: "Takes a table, creates a lazyframe, assigns column 'b' type str, displays the schema",
|
||||||
example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema",
|
example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema",
|
||||||
result: None
|
result: Some(Value::test_record(record! {"b" => Value::test_string("str")})),
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -70,6 +72,7 @@ impl PluginCommand for ToLazyFrame {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use crate::test::test_polars_plugin_command;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use nu_plugin_test_support::PluginTest;
|
use nu_plugin_test_support::PluginTest;
|
||||||
@ -87,4 +90,9 @@ mod tests {
|
|||||||
assert!(!df.from_eager);
|
assert!(!df.from_eager);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_examples() -> Result<(), ShellError> {
|
||||||
|
test_polars_plugin_command(&ToLazyFrame)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -205,6 +205,13 @@ pub fn insert_value(
|
|||||||
column_values: &mut ColumnMap,
|
column_values: &mut ColumnMap,
|
||||||
maybe_schema: &Option<NuSchema>,
|
maybe_schema: &Option<NuSchema>,
|
||||||
) -> Result<(), ShellError> {
|
) -> Result<(), ShellError> {
|
||||||
|
// If we have a schema but a key is not provided, do not create that column
|
||||||
|
if let Some(schema) = maybe_schema {
|
||||||
|
if !schema.schema.contains(&key) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let col_val = match column_values.entry(key.clone()) {
|
let col_val = match column_values.entry(key.clone()) {
|
||||||
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key.clone())),
|
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key.clone())),
|
||||||
Entry::Occupied(entry) => entry.into_mut(),
|
Entry::Occupied(entry) => entry.into_mut(),
|
||||||
@ -215,28 +222,22 @@ pub fn insert_value(
|
|||||||
if let Some(field) = schema.schema.get_field(&key) {
|
if let Some(field) = schema.schema.get_field(&key) {
|
||||||
col_val.column_type = Some(field.dtype().clone());
|
col_val.column_type = Some(field.dtype().clone());
|
||||||
col_val.values.push(value);
|
col_val.values.push(value);
|
||||||
Ok(())
|
return Ok(());
|
||||||
} else {
|
|
||||||
Err(ShellError::GenericError {
|
|
||||||
error: format!("Schema does not contain column: {key}"),
|
|
||||||
msg: "".into(),
|
|
||||||
span: Some(value.span()),
|
|
||||||
help: None,
|
|
||||||
inner: vec![],
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
let current_data_type = value_to_data_type(&value);
|
|
||||||
if col_val.column_type.is_none() {
|
|
||||||
col_val.column_type = value_to_data_type(&value);
|
|
||||||
} else if let Some(current_data_type) = current_data_type {
|
|
||||||
if col_val.column_type.as_ref() != Some(¤t_data_type) {
|
|
||||||
col_val.column_type = Some(DataType::Object("Value", None));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
col_val.values.push(value);
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we do not have a schema, use defaults specified in `value_to_data_type`
|
||||||
|
let current_data_type = value_to_data_type(&value);
|
||||||
|
if col_val.column_type.is_none() {
|
||||||
|
col_val.column_type = value_to_data_type(&value);
|
||||||
|
} else if let Some(current_data_type) = current_data_type {
|
||||||
|
if col_val.column_type.as_ref() != Some(¤t_data_type) {
|
||||||
|
col_val.column_type = Some(DataType::Object("Value", None));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
col_val.values.push(value);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn value_to_data_type(value: &Value) -> Option<DataType> {
|
fn value_to_data_type(value: &Value) -> Option<DataType> {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user