mirror of
https://github.com/nushell/nushell.git
synced 2025-04-10 14:08:40 +02:00
Added command polars profile
for profiling lazy dataframes (#13904)
# Description Introduce a new command `polars profile` for profiling lazy dataframes: <img width="965" alt="Screenshot 2024-09-22 at 23 46 18" src="https://github.com/user-attachments/assets/11402dd3-8256-43df-a986-64241c15354f"> # User-Facing Changes - Introduces new command `polars profile`
This commit is contained in:
parent
6f47990a63
commit
28a7461057
@ -2,6 +2,7 @@ mod cache;
|
|||||||
mod columns;
|
mod columns;
|
||||||
mod fetch;
|
mod fetch;
|
||||||
mod open;
|
mod open;
|
||||||
|
mod profile;
|
||||||
mod save;
|
mod save;
|
||||||
mod schema;
|
mod schema;
|
||||||
mod shape;
|
mod shape;
|
||||||
@ -28,6 +29,7 @@ pub(crate) fn core_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin
|
|||||||
Box::new(cache::LazyCache),
|
Box::new(cache::LazyCache),
|
||||||
Box::new(LazyFetch),
|
Box::new(LazyFetch),
|
||||||
Box::new(OpenDataFrame),
|
Box::new(OpenDataFrame),
|
||||||
|
Box::new(profile::ProfileDF),
|
||||||
Box::new(Summary),
|
Box::new(Summary),
|
||||||
Box::new(ShapeDF),
|
Box::new(ShapeDF),
|
||||||
Box::new(SchemaCmd),
|
Box::new(SchemaCmd),
|
||||||
|
105
crates/nu_plugin_polars/src/dataframe/command/core/profile.rs
Normal file
105
crates/nu_plugin_polars/src/dataframe/command/core/profile.rs
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||||
|
use nu_protocol::{
|
||||||
|
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Type, Value,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
values::{
|
||||||
|
cant_convert_err, CustomValueSupport, NuDataFrame, NuLazyFrame, PolarsPluginObject,
|
||||||
|
PolarsPluginType,
|
||||||
|
},
|
||||||
|
PolarsPlugin,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct ProfileDF;
|
||||||
|
|
||||||
|
impl PluginCommand for ProfileDF {
|
||||||
|
type Plugin = PolarsPlugin;
|
||||||
|
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"polars profile"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build(self.name())
|
||||||
|
.input_output_type(
|
||||||
|
Type::Custom("dataframe".into()),
|
||||||
|
Type::Custom("dataframe".into()),
|
||||||
|
)
|
||||||
|
.category(Category::Custom("dataframe".into()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> &str {
|
||||||
|
"Profile a lazy dataframe. This will run the query and return a record containing the materialized DataFrame and a DataFrame that contains profiling information of each node that is executed.
|
||||||
|
|
||||||
|
The units of the timings are microseconds."
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![Example {
|
||||||
|
description: "Profile a lazy dataframe",
|
||||||
|
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||||
|
| polars into-lazy
|
||||||
|
| polars group-by a
|
||||||
|
| polars agg [
|
||||||
|
(polars col b | polars min | polars as "b_min")
|
||||||
|
(polars col b | polars max | polars as "b_max")
|
||||||
|
(polars col b | polars sum | polars as "b_sum")
|
||||||
|
]
|
||||||
|
| polars profile
|
||||||
|
"#,
|
||||||
|
result: None,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(
|
||||||
|
&self,
|
||||||
|
plugin: &Self::Plugin,
|
||||||
|
engine: &EngineInterface,
|
||||||
|
call: &EvaluatedCall,
|
||||||
|
input: PipelineData,
|
||||||
|
) -> Result<PipelineData, LabeledError> {
|
||||||
|
let value = input.into_value(call.head)?;
|
||||||
|
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||||
|
PolarsPluginObject::NuDataFrame(df) => command_lazy(plugin, engine, call, df.lazy()),
|
||||||
|
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
|
||||||
|
_ => Err(cant_convert_err(
|
||||||
|
&value,
|
||||||
|
&[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame],
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
.map_err(LabeledError::from)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn command_lazy(
|
||||||
|
plugin: &PolarsPlugin,
|
||||||
|
engine: &EngineInterface,
|
||||||
|
call: &EvaluatedCall,
|
||||||
|
lazy: NuLazyFrame,
|
||||||
|
) -> Result<PipelineData, ShellError> {
|
||||||
|
let (df, profiling_df) = lazy
|
||||||
|
.to_polars()
|
||||||
|
.profile()
|
||||||
|
.map_err(|e| ShellError::GenericError {
|
||||||
|
error: format!("Could not profile dataframe: {e}"),
|
||||||
|
msg: "".into(),
|
||||||
|
span: Some(call.head),
|
||||||
|
help: None,
|
||||||
|
inner: vec![],
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let df = NuDataFrame::from(df).cache_and_to_value(plugin, engine, call.head)?;
|
||||||
|
let profiling_df =
|
||||||
|
NuDataFrame::from(profiling_df).cache_and_to_value(plugin, engine, call.head)?;
|
||||||
|
|
||||||
|
let result = Value::record(
|
||||||
|
record!(
|
||||||
|
"dataframe" => df,
|
||||||
|
"profiling" => profiling_df,
|
||||||
|
),
|
||||||
|
call.head,
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(PipelineData::Value(result, None))
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user