mirror of
https://github.com/nushell/nushell.git
synced 2025-06-20 09:58:15 +02:00
polars into-df
/polars into-lazy
: --schema
will not throw error if only some columns are defined (#15473)
# Description The current implementation of `polars into-df` and `polars into-lazy` will throw an error if `--schema` is provided but not all columns are defined. This PR seeks to remove this requirement so that when a partial `--schema` is provided, the types on the defined columns are overridden while the remaining columns take on their default types. **Current Implementation** ``` $ [[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s {a: str} | polars schema Error: × Schema does not contain column: b ╭─[entry #88:1:12] 1 │ [[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s {a: str} | polars schema · ───── ╰──── ``` **New Implementation (no error thrown on partial schema definition)** Column b is not defined in `--schema` ``` $ [[a b]; [1 "foo"] [2 "bar"]] | polars into-df --schema {a: str} | polars schema ╭───┬─────╮ │ a │ str │ │ b │ str │ ╰───┴─────╯ ``` # User-Facing Changes Soft breaking change: The user's previous (erroneous) code that would have thrown an error would no longer throw an error. The user's previous working code will still work. # Tests + Formatting # After Submitting
This commit is contained in:
parent
12a1eefe73
commit
147009a161
@ -35,7 +35,7 @@ impl PluginCommand for ToDataFrame {
|
||||
.named(
|
||||
"schema",
|
||||
SyntaxShape::Record(vec![]),
|
||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
||||
r#"Polars Schema in format [{name: str}]."#,
|
||||
Some('s'),
|
||||
)
|
||||
.switch(
|
||||
@ -193,6 +193,16 @@ impl PluginCommand for ToDataFrame {
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "If a provided schema specifies a subset of columns, only those columns are selected",
|
||||
example: r#"[[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s {a: str}"#,
|
||||
result: Some(NuDataFrame::try_from_series_vec(vec![
|
||||
Series::new("a".into(), ["1", "2"]),
|
||||
], Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -3,7 +3,9 @@ use crate::{dataframe::values::NuSchema, values::CustomValueSupport, Cacheable,
|
||||
use crate::values::{NuDataFrame, NuLazyFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type};
|
||||
use nu_protocol::{
|
||||
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToLazyFrame;
|
||||
@ -24,7 +26,7 @@ impl PluginCommand for ToLazyFrame {
|
||||
.named(
|
||||
"schema",
|
||||
SyntaxShape::Record(vec![]),
|
||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
||||
r#"Polars Schema in format [{name: str}]."#,
|
||||
Some('s'),
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
|
||||
@ -40,7 +42,7 @@ impl PluginCommand for ToLazyFrame {
|
||||
Example {
|
||||
description: "Takes a table, creates a lazyframe, assigns column 'b' type str, displays the schema",
|
||||
example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema",
|
||||
result: None
|
||||
result: Some(Value::test_record(record! {"b" => Value::test_string("str")})),
|
||||
},
|
||||
]
|
||||
}
|
||||
@ -70,6 +72,7 @@ impl PluginCommand for ToLazyFrame {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
use std::sync::Arc;
|
||||
|
||||
use nu_plugin_test_support::PluginTest;
|
||||
@ -87,4 +90,9 @@ mod tests {
|
||||
assert!(!df.from_eager);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ToLazyFrame)
|
||||
}
|
||||
}
|
||||
|
@ -205,6 +205,13 @@ pub fn insert_value(
|
||||
column_values: &mut ColumnMap,
|
||||
maybe_schema: &Option<NuSchema>,
|
||||
) -> Result<(), ShellError> {
|
||||
// If we have a schema but a key is not provided, do not create that column
|
||||
if let Some(schema) = maybe_schema {
|
||||
if !schema.schema.contains(&key) {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
let col_val = match column_values.entry(key.clone()) {
|
||||
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key.clone())),
|
||||
Entry::Occupied(entry) => entry.into_mut(),
|
||||
@ -215,17 +222,11 @@ pub fn insert_value(
|
||||
if let Some(field) = schema.schema.get_field(&key) {
|
||||
col_val.column_type = Some(field.dtype().clone());
|
||||
col_val.values.push(value);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ShellError::GenericError {
|
||||
error: format!("Schema does not contain column: {key}"),
|
||||
msg: "".into(),
|
||||
span: Some(value.span()),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})
|
||||
return Ok(());
|
||||
}
|
||||
} else {
|
||||
}
|
||||
|
||||
// If we do not have a schema, use defaults specified in `value_to_data_type`
|
||||
let current_data_type = value_to_data_type(&value);
|
||||
if col_val.column_type.is_none() {
|
||||
col_val.column_type = value_to_data_type(&value);
|
||||
@ -235,8 +236,8 @@ pub fn insert_value(
|
||||
}
|
||||
}
|
||||
col_val.values.push(value);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_data_type(value: &Value) -> Option<DataType> {
|
||||
|
Loading…
x
Reference in New Issue
Block a user