polars first and polars last will now handle lazy frames natively (#13555)

# Description
Prior this pull request `polars first` and `polars last` would collect a
lazy frame into an eager frame before performing operations. Now `polars
first` will to a `LazyFrame::limit` and `polars last` will perform a
`LazyFrame::tail`. This is really useful in working with very large
datasets.
This commit is contained in:
Jack Wright 2024-08-07 04:36:52 -07:00 committed by GitHub
parent ff09c7964e
commit ec3e0e593d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 57 additions and 20 deletions

View File

@ -1,5 +1,5 @@
use crate::{ use crate::{
values::{Column, CustomValueSupport, NuLazyFrame}, values::{Column, CustomValueSupport, NuLazyFrame, PolarsPluginObject},
PolarsPlugin, PolarsPlugin,
}; };
@ -98,10 +98,14 @@ impl PluginCommand for FirstDF {
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, LabeledError> { ) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head)?; let value = input.into_value(call.head)?;
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { match PolarsPluginObject::try_from_value(plugin, &value)? {
let df = NuDataFrame::try_from_value_coerce(plugin, &value, call.head)?; PolarsPluginObject::NuDataFrame(df) => {
command(plugin, engine, call, df).map_err(|e| e.into()) command_eager(plugin, engine, call, df).map_err(|e| e.into())
} else { }
PolarsPluginObject::NuLazyFrame(lazy) => {
command_lazy(plugin, engine, call, lazy).map_err(|e| e.into())
}
_ => {
let expr = NuExpression::try_from_value(plugin, &value)?; let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.into_polars().first().into(); let expr: NuExpression = expr.into_polars().first().into();
@ -109,9 +113,10 @@ impl PluginCommand for FirstDF {
.map_err(LabeledError::from) .map_err(LabeledError::from)
} }
} }
}
} }
fn command( fn command_eager(
plugin: &PolarsPlugin, plugin: &PolarsPlugin,
engine: &EngineInterface, engine: &EngineInterface,
call: &EvaluatedCall, call: &EvaluatedCall,
@ -126,6 +131,19 @@ fn command(
res.to_pipeline_data(plugin, engine, call.head) res.to_pipeline_data(plugin, engine, call.head)
} }
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let rows: Option<u32> = call.opt(0)?;
let rows = rows.unwrap_or(1);
let res: NuLazyFrame = lazy.to_polars().limit(rows).into();
res.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;

View File

@ -1,5 +1,5 @@
use crate::{ use crate::{
values::{Column, CustomValueSupport, NuLazyFrame}, values::{Column, CustomValueSupport, NuLazyFrame, PolarsPluginObject},
PolarsPlugin, PolarsPlugin,
}; };
@ -73,10 +73,14 @@ impl PluginCommand for LastDF {
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, LabeledError> { ) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head)?; let value = input.into_value(call.head)?;
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { match PolarsPluginObject::try_from_value(plugin, &value)? {
let df = NuDataFrame::try_from_value_coerce(plugin, &value, call.head)?; PolarsPluginObject::NuDataFrame(df) => {
command(plugin, engine, call, df).map_err(|e| e.into()) command_eager(plugin, engine, call, df).map_err(|e| e.into())
} else { }
PolarsPluginObject::NuLazyFrame(lazy) => {
command_lazy(plugin, engine, call, lazy).map_err(|e| e.into())
}
_ => {
let expr = NuExpression::try_from_value(plugin, &value)?; let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.into_polars().last().into(); let expr: NuExpression = expr.into_polars().last().into();
@ -84,9 +88,10 @@ impl PluginCommand for LastDF {
.map_err(LabeledError::from) .map_err(LabeledError::from)
} }
} }
}
} }
fn command( fn command_eager(
plugin: &PolarsPlugin, plugin: &PolarsPlugin,
engine: &EngineInterface, engine: &EngineInterface,
call: &EvaluatedCall, call: &EvaluatedCall,
@ -100,6 +105,20 @@ fn command(
res.to_pipeline_data(plugin, engine, call.head) res.to_pipeline_data(plugin, engine, call.head)
} }
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let rows: Option<u32> = call.opt(0)?;
let rows = rows.unwrap_or(DEFAULT_ROWS as u32);
let res: NuLazyFrame = lazy.to_polars().tail(rows).into();
res.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::test::test_polars_plugin_command; use crate::test::test_polars_plugin_command;