diff --git a/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs b/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs index 4acf6778e14..1badf9d5f13 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/core/mod.rs @@ -2,6 +2,7 @@ mod cache; mod columns; mod fetch; mod open; +mod profile; mod save; mod schema; mod shape; @@ -28,6 +29,7 @@ pub(crate) fn core_commands() -> Vec &str { + "polars profile" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn description(&self) -> &str { + "Profile a lazy dataframe. This will run the query and return a record containing the materialized DataFrame and a DataFrame that contains profiling information of each node that is executed. + +The units of the timings are microseconds." + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Profile a lazy dataframe", + example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] + | polars into-lazy + | polars group-by a + | polars agg [ + (polars col b | polars min | polars as "b_min") + (polars col b | polars max | polars as "b_max") + (polars col b | polars sum | polars as "b_sum") + ] + | polars profile +"#, + result: None, + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head)?; + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command_lazy(plugin, engine, call, df.lazy()), + PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), + _ => Err(cant_convert_err( + &value, + &[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame], + )), + } + .map_err(LabeledError::from) + } +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let (df, profiling_df) = lazy + .to_polars() + .profile() + .map_err(|e| ShellError::GenericError { + error: format!("Could not profile dataframe: {e}"), + msg: "".into(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let df = NuDataFrame::from(df).cache_and_to_value(plugin, engine, call.head)?; + let profiling_df = + NuDataFrame::from(profiling_df).cache_and_to_value(plugin, engine, call.head)?; + + let result = Value::record( + record!( + "dataframe" => df, + "profiling" => profiling_df, + ), + call.head, + ); + + Ok(PipelineData::Value(result, None)) +}