Introducing polars into-schema (#15534)

# Description
Introduces `polars into-schema` which allows converting Values such as
records to a schema. This implicitly happens when when passing records
into commands like `polars into-df` today. This allows you to convert to
a schema object ahead of time and reuse the schema object. This can be
useful for guaranteeing your schema object is correct.

```nu
> ❯ : let schema = ({name: str, type: str} | polars into-schema)

> ❯ : ls | select name type | polars into-lazy -s $schema | polars schema
╭──────┬─────╮
│ name │ str │
│ type │ str │
╰──────┴─────╯
```

# User-Facing Changes
- Introduces `polars into-schema` allowing records to be converted to
schema objects.
This commit is contained in:
Jack Wright 2025-04-10 16:07:44 -07:00 committed by GitHub
parent 7b57f132bb
commit f8ed4b45fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 171 additions and 43 deletions

View File

@ -13,6 +13,7 @@ mod to_dtype;
mod to_lazy;
mod to_nu;
mod to_repr;
mod to_schema;
pub use self::open::OpenDataFrame;
use crate::PolarsPlugin;
@ -42,5 +43,6 @@ pub(crate) fn core_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin
Box::new(ToLazyFrame),
Box::new(ToRepr),
Box::new(to_dtype::ToDataType),
Box::new(to_schema::ToSchema),
]
}

View File

@ -86,7 +86,7 @@ impl PluginCommand for OpenDataFrame {
)
.named(
"schema",
SyntaxShape::Record(vec![]),
SyntaxShape::Any,
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s')
)
@ -103,7 +103,7 @@ impl PluginCommand for OpenDataFrame {
)
.named(
"hive-schema",
SyntaxShape::Record(vec![]),
SyntaxShape::Any,
r#"Hive schema in format [{name: str}]. Parquet and Arrow files"#,
None,
)

View File

@ -34,7 +34,7 @@ impl PluginCommand for ToDataFrame {
Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
SyntaxShape::Any,
r#"Polars Schema in format [{name: str}]."#,
Some('s'),
)
@ -203,7 +203,18 @@ impl PluginCommand for ToDataFrame {
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}
},
Example {
description: "Use a predefined schama",
example: r#"let schema = {a: str, b: str}; [[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s $schema"#,
result: Some(NuDataFrame::try_from_series_vec(vec![
Series::new("a".into(), ["1", "2"]),
Series::new("b".into(), ["foo", "bar"]),
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}

View File

@ -27,8 +27,8 @@ impl PluginCommand for ToDataType {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Convert a string to a specific datatype",
example: r#""i64" | polars into-dtype"#,
description: "Convert a string to a specific datatype and back to a nu object",
example: r#"'i64' | polars into-dtype | polars into-nu"#,
result: Some(Value::string("i64", Span::test_data())),
}]
}
@ -53,3 +53,16 @@ fn command(
NuDataType::try_from_pipeline(plugin, input, call.head)?
.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
use nu_protocol::ShellError;
#[test]
fn test_into_dtype() -> Result<(), ShellError> {
test_polars_plugin_command(&ToDataType)
}
}

View File

@ -4,8 +4,11 @@ use crate::values::{NuDataFrame, NuLazyFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
record, Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::NamedFrom;
use polars::series::Series;
#[derive(Clone)]
pub struct ToLazyFrame;
@ -25,7 +28,7 @@ impl PluginCommand for ToLazyFrame {
Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
SyntaxShape::Any,
r#"Polars Schema in format [{name: str}]."#,
Some('s'),
)
@ -34,16 +37,28 @@ impl PluginCommand for ToLazyFrame {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes a table and creates a lazyframe",
example: "[[a b];[1 2] [3 4]] | polars into-lazy",
result: None,
},
Example {
description: "Takes a table, creates a lazyframe, assigns column 'b' type str, displays the schema",
example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema",
result: Some(Value::test_record(record! {"b" => Value::test_string("str")})),
},
vec![
Example {
description: "Takes a table and creates a lazyframe",
example: "[[a b];[1 2] [3 4]] | polars into-lazy",
result: None,
},
Example {
description: "Takes a table, creates a lazyframe, assigns column 'b' type str, displays the schema",
example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema",
result: Some(Value::test_record(record! {"b" => Value::test_string("str")})),
},
Example {
description: "Use a predefined schama",
example: r#"let schema = {a: str, b: str}; [[a b]; [1 "foo"] [2 "bar"]] | polars into-lazy -s $schema"#,
result: Some(NuDataFrame::try_from_series_vec(vec![
Series::new("a".into(), ["1", "2"]),
Series::new("b".into(), ["foo", "bar"]),
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}

View File

@ -5,8 +5,7 @@ use nu_protocol::{
};
use crate::{
dataframe::values::NuExpression,
values::{CustomValueSupport, NuLazyFrame},
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
PolarsPlugin,
};
@ -39,6 +38,8 @@ impl PluginCommand for ToNu {
.input_output_types(vec![
(Type::Custom("expression".into()), Type::Any),
(Type::Custom("dataframe".into()), Type::table()),
(Type::Custom("datatype".into()), Type::Any),
(Type::Custom("schema".into()), Type::Any),
])
.category(Category::Custom("dataframe".into()))
}
@ -86,31 +87,54 @@ impl PluginCommand for ToNu {
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head)?;
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
dataframe_command(plugin, call, value)
} else {
expression_command(plugin, call, value)
}
.map_err(|e| e.into())
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn dataframe_command(
fn command(
plugin: &PolarsPlugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: Value,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head)?;
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => dataframe_command(call, df),
PolarsPluginObject::NuLazyFrame(lazy) => dataframe_command(call, lazy.collect(call.head)?),
PolarsPluginObject::NuExpression(expr) => {
let value = expr.to_value(call.head)?;
Ok(PipelineData::Value(value, None))
}
PolarsPluginObject::NuDataType(dt) => {
let value = dt.base_value(call.head)?;
Ok(PipelineData::Value(value, None))
}
PolarsPluginObject::NuSchema(schema) => {
let value = schema.base_value(call.head)?;
Ok(PipelineData::Value(value, None))
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
PolarsPluginType::NuDataType,
PolarsPluginType::NuSchema,
],
)),
}
}
fn dataframe_command(call: &EvaluatedCall, df: NuDataFrame) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.get_flag("rows")?;
let tail: bool = call.has_flag("tail")?;
let index: bool = call.has_flag("index")?;
let df = NuDataFrame::try_from_value_coerce(plugin, &input, call.head)?;
let values = if tail {
df.tail(rows, index, call.head)?
} else {
@ -127,17 +151,6 @@ fn dataframe_command(
Ok(PipelineData::Value(value, None))
}
fn expression_command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: Value,
) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_value(plugin, &input)?;
let value = expr.to_value(call.head)?;
Ok(PipelineData::Value(value, None))
}
#[cfg(test)]
mod test {
use super::*;

View File

@ -0,0 +1,74 @@
use nu_plugin::PluginCommand;
use nu_protocol::{record, Category, Example, ShellError, Signature, Span, Type, Value};
use crate::{
values::{CustomValueSupport, NuSchema},
PolarsPlugin,
};
pub struct ToSchema;
impl PluginCommand for ToSchema {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-schema"
}
fn description(&self) -> &str {
"Convert a value to a polars schema object"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(Type::Any, Type::Custom("schema".into()))
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Convert a record into a schema and back to a nu object",
example: r#"{a: str, b: u8} | polars into-schema | polars into-nu"#,
result: Some(Value::record(
record! {
"a" => Value::string("str", Span::test_data()),
"b" => Value::string("u8", Span::test_data()),
},
Span::test_data(),
)),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
input: nu_protocol::PipelineData,
) -> Result<nu_protocol::PipelineData, nu_protocol::LabeledError> {
command(plugin, engine, call, input).map_err(nu_protocol::LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
input: nu_protocol::PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
NuSchema::try_from_pipeline(plugin, input, call.head)?
.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
use nu_protocol::ShellError;
#[test]
fn test_into_schema() -> Result<(), ShellError> {
test_polars_plugin_command(&ToSchema)
}
}