From 0ad5f4389cce3df5dfe03dc25d27c46a16cf096d Mon Sep 17 00:00:00 2001 From: pyz4 <42039243+pyz4@users.noreply.github.com> Date: Mon, 27 Jan 2025 07:02:18 -0500 Subject: [PATCH] nu_plugin_polars: add `polars into-repr` to display dataframe in portable repr format (#14917) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR adds a new command that outputs a NuDataFrame or NuLazyFrame in its repr format, which can then be ingested in another polars instance. Advantages of serializing a dataframe in this format are that it can be viewed as a table, carries type information, and can easily be copied to the clipboard. ```nushell # In Nushell > [[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-df | polars into-lazy | polars into-repr shape: (2, 2) ┌─────────────────────┬─────┐ │ a ┆ b │ │ --- ┆ --- │ │ datetime[ns] ┆ i64 │ ╞═════════════════════╪═════╡ │ 2025-01-01 00:00:00 ┆ 2 │ │ 2025-01-02 00:00:00 ┆ 4 │ └─────────────────────┴─────┘ ``` ```python # In python >>> import polars as pl >>> df = pl.from_repr(""" ... shape: (2, 2) ... ┌─────────────────────┬─────┐ ... │ a ┆ b │ ... │ --- ┆ --- │ ... │ datetime[ns] ┆ i64 │ ... ╞═════════════════════╪═════╡ ... │ 2025-01-01 00:00:00 ┆ 2 │ ... │ 2025-01-02 00:00:00 ┆ 4 │ ... └─────────────────────┴─────┘""") shape: (2, 2) ┌─────────────────────┬─────┐ │ a ┆ b │ │ --- ┆ --- │ │ datetime[ns] ┆ i64 │ ╞═════════════════════╪═════╡ │ 2025-01-01 00:00:00 ┆ 2 │ │ 2025-01-02 00:00:00 ┆ 4 │ └─────────────────────┴─────┘ >>> df.select(pl.col("a").dt.offset_by("12m")) shape: (2, 1) ┌─────────────────────┐ │ a │ │ --- │ │ datetime[ns] │ ╞═════════════════════╡ │ 2025-01-01 00:12:00 │ │ 2025-01-02 00:12:00 │ └─────────────────────┘ ``` # User-Facing Changes A new command `polars into-repr` is added. No other commands are impacted by the changes in this PR. # Tests + Formatting Examples were added in the command definition. # After Submitting --- .../src/dataframe/command/core/mod.rs | 3 + .../src/dataframe/command/core/to_repr.rs | 115 ++++++++++++++++++ .../src/dataframe/values/nu_dataframe/mod.rs | 7 ++ 3 files changed, 125 insertions(+) create mode 100644 crates/nu_plugin_polars/src/dataframe/command/core/to_repr.rs diff --git a/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs b/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs index 1badf9d5f1..f71a3dfdbd 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs @@ -10,6 +10,7 @@ mod summary; mod to_df; mod to_lazy; mod to_nu; +mod to_repr; use crate::PolarsPlugin; use nu_plugin::PluginCommand; @@ -22,6 +23,7 @@ pub use summary::Summary; pub use to_df::ToDataFrame; pub use to_lazy::ToLazyFrame; pub use to_nu::ToNu; +pub use to_repr::ToRepr; pub(crate) fn core_commands() -> Vec>> { vec![ @@ -37,5 +39,6 @@ pub(crate) fn core_commands() -> Vec &str { + "polars into-repr" + } + + fn description(&self) -> &str { + "Display a dataframe in its repr format." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![(Type::Custom("dataframe".into()), Type::String)]) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Shows dataframe in repr format", + example: + "[[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-df | polars into-repr", + result: Some(Value::string( + r#" +shape: (2, 2) +┌─────────────────────┬─────┐ +│ a ┆ b │ +│ --- ┆ --- │ +│ datetime[ns] ┆ i64 │ +╞═════════════════════╪═════╡ +│ 2025-01-01 00:00:00 ┆ 2 │ +│ 2025-01-02 00:00:00 ┆ 4 │ +└─────────────────────┴─────┘"# + .trim(), + Span::test_data(), + )), + }, + Example { + description: "Shows lazy dataframe in repr format", + example: + "[[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-df | polars into-lazy | polars into-repr", + result: Some(Value::string( + r#" +shape: (2, 2) +┌─────────────────────┬─────┐ +│ a ┆ b │ +│ --- ┆ --- │ +│ datetime[ns] ┆ i64 │ +╞═════════════════════╪═════╡ +│ 2025-01-01 00:00:00 ┆ 2 │ +│ 2025-01-02 00:00:00 ┆ 4 │ +└─────────────────────┴─────┘"# + .trim(), + Span::test_data(), + )), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head)?; + if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { + dataframe_command(plugin, call, value) + } else { + Err(cant_convert_err( + &value, + &[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame], + )) + } + .map_err(|e| e.into()) + } +} + +fn dataframe_command( + plugin: &PolarsPlugin, + call: &EvaluatedCall, + input: Value, +) -> Result { + let df = NuDataFrame::try_from_value_coerce(plugin, &input, call.head)?; + let value = Value::string(format!("{}", df), call.head); + Ok(PipelineData::Value(value, None)) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ToRepr) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs index 1138896d74..bb273a0a8a 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs @@ -13,6 +13,7 @@ use polars::prelude::{ }; use polars_plan::prelude::{lit, Expr, Null}; use polars_utils::total_ord::{TotalEq, TotalHash}; +use std::fmt; use std::{ cmp::Ordering, collections::HashSet, @@ -118,6 +119,12 @@ impl From for NuDataFrame { } } +impl fmt::Display for NuDataFrame { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.df) + } +} + impl NuDataFrame { pub fn new(from_lazy: bool, df: DataFrame) -> Self { let id = Uuid::new_v4();