From b1c4fdfe1dde5d63fdf0ffcb15ec77cd5f036bb8 Mon Sep 17 00:00:00 2001 From: Jack Wright Date: Wed, 9 Apr 2025 13:18:30 -0700 Subject: [PATCH] Introducing `polars into-schema` --- .../src/dataframe/command/core/mod.rs | 2 + .../src/dataframe/command/core/open.rs | 4 +- .../src/dataframe/command/core/to_df.rs | 15 ++++- .../src/dataframe/command/core/to_lazy.rs | 39 ++++++++---- .../src/dataframe/command/core/to_schema.rs | 61 +++++++++++++++++++ 5 files changed, 105 insertions(+), 16 deletions(-) create mode 100644 crates/nu_plugin_polars/src/dataframe/command/core/to_schema.rs diff --git a/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs b/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs index c2b3d26b18..a199e96d36 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs @@ -13,6 +13,7 @@ mod to_dtype; mod to_lazy; mod to_nu; mod to_repr; +mod to_schema; pub use self::open::OpenDataFrame; use crate::PolarsPlugin; @@ -42,5 +43,6 @@ pub(crate) fn core_commands() -> Vec Vec { - vec![Example { - description: "Takes a table and creates a lazyframe", - example: "[[a b];[1 2] [3 4]] | polars into-lazy", - result: None, - }, - Example { - description: "Takes a table, creates a lazyframe, assigns column 'b' type str, displays the schema", - example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema", - result: Some(Value::test_record(record! {"b" => Value::test_string("str")})), - }, + vec![ + Example { + description: "Takes a table and creates a lazyframe", + example: "[[a b];[1 2] [3 4]] | polars into-lazy", + result: None, + }, + Example { + description: "Takes a table, creates a lazyframe, assigns column 'b' type str, displays the schema", + example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema", + result: Some(Value::test_record(record! {"b" => Value::test_string("str")})), + }, + Example { + description: "Use a predefined schama", + example: r#"let schema = {a: str, b: str}; [[a b]; [1 "foo"] [2 "bar"]] | polars into-lazy -s {a: str, b: str}"#, + result: Some(NuDataFrame::try_from_series_vec(vec![ + Series::new("a".into(), ["1", "2"]), + Series::new("b".into(), ["foo", "bar"]), + ], Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, ] } diff --git a/crates/nu_plugin_polars/src/dataframe/command/core/to_schema.rs b/crates/nu_plugin_polars/src/dataframe/command/core/to_schema.rs new file mode 100644 index 0000000000..7430008653 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/command/core/to_schema.rs @@ -0,0 +1,61 @@ +use nu_plugin::PluginCommand; +use nu_protocol::{record, Category, Example, ShellError, Signature, Span, Type, Value}; + +use crate::{ + values::{CustomValueSupport, NuSchema}, + PolarsPlugin, +}; + +pub struct ToSchema; + +impl PluginCommand for ToSchema { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars into-schema" + } + + fn description(&self) -> &str { + "Convert a value to a polars schema object" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type(Type::Any, Type::Custom("schema".into())) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Convert a record into a schema", + example: r#"{a: str, b: u8} | polars into-schema"#, + result: Some(Value::record( + record! { + "a" => Value::string("str", Span::test_data()), + "b" => Value::string("u8", Span::test_data()), + }, + Span::test_data(), + )), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &nu_plugin::EngineInterface, + call: &nu_plugin::EvaluatedCall, + input: nu_protocol::PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(nu_protocol::LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &nu_plugin::EngineInterface, + call: &nu_plugin::EvaluatedCall, + input: nu_protocol::PipelineData, +) -> Result { + NuSchema::try_from_pipeline(plugin, input, call.head)? + .to_pipeline_data(plugin, engine, call.head) +}