nu_plugin_polars: add polars into-repr to display dataframe in portable repr format (#14917)

<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx

you can also mention related issues, PRs or discussions!
-->

# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
This PR adds a new command that outputs a NuDataFrame or NuLazyFrame in
its repr format, which can then be ingested in another polars instance.
Advantages of serializing a dataframe in this format are that it can be
viewed as a table, carries type information, and can easily be copied to
the clipboard.

```nushell
# In Nushell
> [[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-df | polars into-lazy | polars into-repr

shape: (2, 2)
┌─────────────────────┬─────┐
│ a                   ┆ b   │
│ ---                 ┆ --- │
│ datetime[ns]        ┆ i64 │
╞═════════════════════╪═════╡
│ 2025-01-01 00:00:00 ┆ 2   │
│ 2025-01-02 00:00:00 ┆ 4   │
└─────────────────────┴─────┘
```

```python
# In python
>>> import polars as pl
>>> df = pl.from_repr("""
... shape: (2, 2)
... ┌─────────────────────┬─────┐
... │ a                   ┆ b   │
... │ ---                 ┆ --- │
... │ datetime[ns]        ┆ i64 │
... ╞═════════════════════╪═════╡
... │ 2025-01-01 00:00:00 ┆ 2   │
... │ 2025-01-02 00:00:00 ┆ 4   │
... └─────────────────────┴─────┘""")
shape: (2, 2)
┌─────────────────────┬─────┐
│ a                   ┆ b   │
│ ---                 ┆ --- │
│ datetime[ns]        ┆ i64 │
╞═════════════════════╪═════╡
│ 2025-01-01 00:00:00 ┆ 2   │
│ 2025-01-02 00:00:00 ┆ 4   │
└─────────────────────┴─────┘

>>> df.select(pl.col("a").dt.offset_by("12m"))
shape: (2, 1)
┌─────────────────────┐
│ a                   │
│ ---                 │
│ datetime[ns]        │
╞═════════════════════╡
│ 2025-01-01 00:12:00 │
│ 2025-01-02 00:12:00 │
└─────────────────────┘
```

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
A new command `polars into-repr` is added. No other commands are
impacted by the changes in this PR.

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
Examples were added in the command definition.

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
pyz4 2025-01-27 07:02:18 -05:00 committed by GitHub
parent 7ea4895513
commit 0ad5f4389c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 125 additions and 0 deletions

View File

@ -10,6 +10,7 @@ mod summary;
mod to_df; mod to_df;
mod to_lazy; mod to_lazy;
mod to_nu; mod to_nu;
mod to_repr;
use crate::PolarsPlugin; use crate::PolarsPlugin;
use nu_plugin::PluginCommand; use nu_plugin::PluginCommand;
@ -22,6 +23,7 @@ pub use summary::Summary;
pub use to_df::ToDataFrame; pub use to_df::ToDataFrame;
pub use to_lazy::ToLazyFrame; pub use to_lazy::ToLazyFrame;
pub use to_nu::ToNu; pub use to_nu::ToNu;
pub use to_repr::ToRepr;
pub(crate) fn core_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> { pub(crate) fn core_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![ vec![
@ -37,5 +39,6 @@ pub(crate) fn core_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin
Box::new(ToDataFrame), Box::new(ToDataFrame),
Box::new(save::SaveDF), Box::new(save::SaveDF),
Box::new(ToLazyFrame), Box::new(ToLazyFrame),
Box::new(ToRepr),
] ]
} }

View File

@ -0,0 +1,115 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use crate::{
values::{cant_convert_err, CustomValueSupport, NuLazyFrame, PolarsPluginType},
PolarsPlugin,
};
use crate::values::NuDataFrame;
#[derive(Clone)]
pub struct ToRepr;
impl PluginCommand for ToRepr {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-repr"
}
fn description(&self) -> &str {
"Display a dataframe in its repr format."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![(Type::Custom("dataframe".into()), Type::String)])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Shows dataframe in repr format",
example:
"[[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-df | polars into-repr",
result: Some(Value::string(
r#"
shape: (2, 2)
a b
--- ---
datetime[ns] i64
2025-01-01 00:00:00 2
2025-01-02 00:00:00 4
"#
.trim(),
Span::test_data(),
)),
},
Example {
description: "Shows lazy dataframe in repr format",
example:
"[[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-df | polars into-lazy | polars into-repr",
result: Some(Value::string(
r#"
shape: (2, 2)
a b
--- ---
datetime[ns] i64
2025-01-01 00:00:00 2
2025-01-02 00:00:00 4
"#
.trim(),
Span::test_data(),
)),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head)?;
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
dataframe_command(plugin, call, value)
} else {
Err(cant_convert_err(
&value,
&[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame],
))
}
.map_err(|e| e.into())
}
}
fn dataframe_command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: Value,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_value_coerce(plugin, &input, call.head)?;
let value = Value::string(format!("{}", df), call.head);
Ok(PipelineData::Value(value, None))
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ToRepr)
}
}

View File

@ -13,6 +13,7 @@ use polars::prelude::{
}; };
use polars_plan::prelude::{lit, Expr, Null}; use polars_plan::prelude::{lit, Expr, Null};
use polars_utils::total_ord::{TotalEq, TotalHash}; use polars_utils::total_ord::{TotalEq, TotalHash};
use std::fmt;
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
collections::HashSet, collections::HashSet,
@ -118,6 +119,12 @@ impl From<DataFrame> for NuDataFrame {
} }
} }
impl fmt::Display for NuDataFrame {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.df)
}
}
impl NuDataFrame { impl NuDataFrame {
pub fn new(from_lazy: bool, df: DataFrame) -> Self { pub fn new(from_lazy: bool, df: DataFrame) -> Self {
let id = Uuid::new_v4(); let id = Uuid::new_v4();