Introducing polars into-schema (#15534)

# Description
Introduces `polars into-schema` which allows converting Values such as
records to a schema. This implicitly happens when when passing records
into commands like `polars into-df` today. This allows you to convert to
a schema object ahead of time and reuse the schema object. This can be
useful for guaranteeing your schema object is correct.

```nu
> ❯ : let schema = ({name: str, type: str} | polars into-schema)

> ❯ : ls | select name type | polars into-lazy -s $schema | polars schema
╭──────┬─────╮
│ name │ str │
│ type │ str │
╰──────┴─────╯
```

# User-Facing Changes
- Introduces `polars into-schema` allowing records to be converted to
schema objects.
This commit is contained in:
Jack Wright 2025-04-10 16:07:44 -07:00 committed by GitHub
parent 7b57f132bb
commit f8ed4b45fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 171 additions and 43 deletions

View File

@ -13,6 +13,7 @@ mod to_dtype;
mod to_lazy; mod to_lazy;
mod to_nu; mod to_nu;
mod to_repr; mod to_repr;
mod to_schema;
pub use self::open::OpenDataFrame; pub use self::open::OpenDataFrame;
use crate::PolarsPlugin; use crate::PolarsPlugin;
@ -42,5 +43,6 @@ pub(crate) fn core_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin
Box::new(ToLazyFrame), Box::new(ToLazyFrame),
Box::new(ToRepr), Box::new(ToRepr),
Box::new(to_dtype::ToDataType), Box::new(to_dtype::ToDataType),
Box::new(to_schema::ToSchema),
] ]
} }

View File

@ -86,7 +86,7 @@ impl PluginCommand for OpenDataFrame {
) )
.named( .named(
"schema", "schema",
SyntaxShape::Record(vec![]), SyntaxShape::Any,
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s') Some('s')
) )
@ -103,7 +103,7 @@ impl PluginCommand for OpenDataFrame {
) )
.named( .named(
"hive-schema", "hive-schema",
SyntaxShape::Record(vec![]), SyntaxShape::Any,
r#"Hive schema in format [{name: str}]. Parquet and Arrow files"#, r#"Hive schema in format [{name: str}]. Parquet and Arrow files"#,
None, None,
) )

View File

@ -34,7 +34,7 @@ impl PluginCommand for ToDataFrame {
Signature::build(self.name()) Signature::build(self.name())
.named( .named(
"schema", "schema",
SyntaxShape::Record(vec![]), SyntaxShape::Any,
r#"Polars Schema in format [{name: str}]."#, r#"Polars Schema in format [{name: str}]."#,
Some('s'), Some('s'),
) )
@ -203,7 +203,18 @@ impl PluginCommand for ToDataFrame {
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
} },
Example {
description: "Use a predefined schama",
example: r#"let schema = {a: str, b: str}; [[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s $schema"#,
result: Some(NuDataFrame::try_from_series_vec(vec![
Series::new("a".into(), ["1", "2"]),
Series::new("b".into(), ["foo", "bar"]),
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
] ]
} }

View File

@ -27,8 +27,8 @@ impl PluginCommand for ToDataType {
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![Example {
description: "Convert a string to a specific datatype", description: "Convert a string to a specific datatype and back to a nu object",
example: r#""i64" | polars into-dtype"#, example: r#"'i64' | polars into-dtype | polars into-nu"#,
result: Some(Value::string("i64", Span::test_data())), result: Some(Value::string("i64", Span::test_data())),
}] }]
} }
@ -53,3 +53,16 @@ fn command(
NuDataType::try_from_pipeline(plugin, input, call.head)? NuDataType::try_from_pipeline(plugin, input, call.head)?
.to_pipeline_data(plugin, engine, call.head) .to_pipeline_data(plugin, engine, call.head)
} }
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
use nu_protocol::ShellError;
#[test]
fn test_into_dtype() -> Result<(), ShellError> {
test_polars_plugin_command(&ToDataType)
}
}

View File

@ -4,8 +4,11 @@ use crate::values::{NuDataFrame, NuLazyFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{ use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value, record, Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type,
Value,
}; };
use polars::prelude::NamedFrom;
use polars::series::Series;
#[derive(Clone)] #[derive(Clone)]
pub struct ToLazyFrame; pub struct ToLazyFrame;
@ -25,7 +28,7 @@ impl PluginCommand for ToLazyFrame {
Signature::build(self.name()) Signature::build(self.name())
.named( .named(
"schema", "schema",
SyntaxShape::Record(vec![]), SyntaxShape::Any,
r#"Polars Schema in format [{name: str}]."#, r#"Polars Schema in format [{name: str}]."#,
Some('s'), Some('s'),
) )
@ -34,7 +37,8 @@ impl PluginCommand for ToLazyFrame {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
Example {
description: "Takes a table and creates a lazyframe", description: "Takes a table and creates a lazyframe",
example: "[[a b];[1 2] [3 4]] | polars into-lazy", example: "[[a b];[1 2] [3 4]] | polars into-lazy",
result: None, result: None,
@ -44,6 +48,17 @@ impl PluginCommand for ToLazyFrame {
example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema", example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema",
result: Some(Value::test_record(record! {"b" => Value::test_string("str")})), result: Some(Value::test_record(record! {"b" => Value::test_string("str")})),
}, },
Example {
description: "Use a predefined schama",
example: r#"let schema = {a: str, b: str}; [[a b]; [1 "foo"] [2 "bar"]] | polars into-lazy -s $schema"#,
result: Some(NuDataFrame::try_from_series_vec(vec![
Series::new("a".into(), ["1", "2"]),
Series::new("b".into(), ["foo", "bar"]),
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
] ]
} }

View File

@ -5,8 +5,7 @@ use nu_protocol::{
}; };
use crate::{ use crate::{
dataframe::values::NuExpression, values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
values::{CustomValueSupport, NuLazyFrame},
PolarsPlugin, PolarsPlugin,
}; };
@ -39,6 +38,8 @@ impl PluginCommand for ToNu {
.input_output_types(vec![ .input_output_types(vec![
(Type::Custom("expression".into()), Type::Any), (Type::Custom("expression".into()), Type::Any),
(Type::Custom("dataframe".into()), Type::table()), (Type::Custom("dataframe".into()), Type::table()),
(Type::Custom("datatype".into()), Type::Any),
(Type::Custom("schema".into()), Type::Any),
]) ])
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
@ -86,31 +87,54 @@ impl PluginCommand for ToNu {
fn run( fn run(
&self, &self,
plugin: &Self::Plugin, plugin: &Self::Plugin,
_engine: &EngineInterface, engine: &EngineInterface,
call: &EvaluatedCall, call: &EvaluatedCall,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, LabeledError> { ) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head)?; command(plugin, engine, call, input).map_err(LabeledError::from)
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
dataframe_command(plugin, call, value)
} else {
expression_command(plugin, call, value)
}
.map_err(|e| e.into())
} }
} }
fn dataframe_command( fn command(
plugin: &PolarsPlugin, plugin: &PolarsPlugin,
_engine: &EngineInterface,
call: &EvaluatedCall, call: &EvaluatedCall,
input: Value, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head)?;
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => dataframe_command(call, df),
PolarsPluginObject::NuLazyFrame(lazy) => dataframe_command(call, lazy.collect(call.head)?),
PolarsPluginObject::NuExpression(expr) => {
let value = expr.to_value(call.head)?;
Ok(PipelineData::Value(value, None))
}
PolarsPluginObject::NuDataType(dt) => {
let value = dt.base_value(call.head)?;
Ok(PipelineData::Value(value, None))
}
PolarsPluginObject::NuSchema(schema) => {
let value = schema.base_value(call.head)?;
Ok(PipelineData::Value(value, None))
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
PolarsPluginType::NuDataType,
PolarsPluginType::NuSchema,
],
)),
}
}
fn dataframe_command(call: &EvaluatedCall, df: NuDataFrame) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.get_flag("rows")?; let rows: Option<usize> = call.get_flag("rows")?;
let tail: bool = call.has_flag("tail")?; let tail: bool = call.has_flag("tail")?;
let index: bool = call.has_flag("index")?; let index: bool = call.has_flag("index")?;
let df = NuDataFrame::try_from_value_coerce(plugin, &input, call.head)?;
let values = if tail { let values = if tail {
df.tail(rows, index, call.head)? df.tail(rows, index, call.head)?
} else { } else {
@ -127,17 +151,6 @@ fn dataframe_command(
Ok(PipelineData::Value(value, None)) Ok(PipelineData::Value(value, None))
} }
fn expression_command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: Value,
) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_value(plugin, &input)?;
let value = expr.to_value(call.head)?;
Ok(PipelineData::Value(value, None))
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;

View File

@ -0,0 +1,74 @@
use nu_plugin::PluginCommand;
use nu_protocol::{record, Category, Example, ShellError, Signature, Span, Type, Value};
use crate::{
values::{CustomValueSupport, NuSchema},
PolarsPlugin,
};
pub struct ToSchema;
impl PluginCommand for ToSchema {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-schema"
}
fn description(&self) -> &str {
"Convert a value to a polars schema object"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(Type::Any, Type::Custom("schema".into()))
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Convert a record into a schema and back to a nu object",
example: r#"{a: str, b: u8} | polars into-schema | polars into-nu"#,
result: Some(Value::record(
record! {
"a" => Value::string("str", Span::test_data()),
"b" => Value::string("u8", Span::test_data()),
},
Span::test_data(),
)),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
input: nu_protocol::PipelineData,
) -> Result<nu_protocol::PipelineData, nu_protocol::LabeledError> {
command(plugin, engine, call, input).map_err(nu_protocol::LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
input: nu_protocol::PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
NuSchema::try_from_pipeline(plugin, input, call.head)?
.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
use nu_protocol::ShellError;
#[test]
fn test_into_schema() -> Result<(), ShellError> {
test_polars_plugin_command(&ToSchema)
}
}