Ensure that lazy frames converted via to-lazy are not converted back to eager frames later in the pipeline. (#12525)

# Description
@maxim-uvarov discovered the following error:
```
> [[a b]; [6 2] [1 4] [4 1]] | polars into-lazy | polars sort-by a | polars unique --subset [a]
Error:   × Error using as series
   ╭─[entry #1:1:68]
 1 │ [[a b]; [6 2] [1 4] [4 1]] | polars into-lazy | polars sort-by a | polars unique --subset [a]
   ·                                                                    ──────┬──────
   ·                                                                          ╰── dataframe has more than one column
   ╰────
 ```
 
During investigation, I discovered the root cause was that the lazy frame was incorrectly converted back to a eager dataframe. In order to keep this from happening, I explicitly set that the dataframe did not come from an eager frame. This causes the conversion logic to not attempt to convert the dataframe later in the pipeline.

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright 2024-04-15 16:29:42 -07:00 committed by GitHub
parent 078ba5aabe
commit 5f818eaefe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 36 additions and 7 deletions

View File

@ -52,10 +52,33 @@ impl PluginCommand for ToLazyFrame {
.transpose()?;
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?;
let lazy = NuLazyFrame::from_dataframe(df);
let mut lazy = NuLazyFrame::from_dataframe(df);
// We don't want this converted back to an eager dataframe at some point
lazy.from_eager = false;
Ok(PipelineData::Value(
lazy.cache(plugin, engine, call.head)?.into_value(call.head),
None,
))
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use nu_plugin_test_support::PluginTest;
use nu_protocol::{ShellError, Span};
use super::*;
#[test]
fn test_to_lazy() -> Result<(), ShellError> {
let plugin: Arc<PolarsPlugin> = PolarsPlugin::new_test_mode().into();
let mut plugin_test = PluginTest::new("polars", Arc::clone(&plugin))?;
let pipeline_data = plugin_test.eval("[[a b]; [6 2] [1 4] [4 1]] | polars into-lazy")?;
let value = pipeline_data.into_value(Span::test_data());
let df = NuLazyFrame::try_from_value(&plugin, &value)?;
assert!(!df.from_eager);
Ok(())
}
}

View File

@ -64,7 +64,7 @@ impl NuLazyFrame {
help: None,
inner: vec![],
})
.map(|df| NuDataFrame::new(!self.from_eager, df))
.map(|df| NuDataFrame::new(false, df))
}
pub fn apply_with_expr<F>(self, expr: NuExpression, f: F) -> Self

View File

@ -182,12 +182,18 @@ pub mod test {
use nu_plugin_test_support::PluginTest;
use nu_protocol::{ShellError, Span};
pub fn test_polars_plugin_command(command: &impl PluginCommand) -> Result<(), ShellError> {
let plugin = PolarsPlugin {
impl PolarsPlugin {
/// Creates a new polars plugin in test mode
pub fn new_test_mode() -> Self {
PolarsPlugin {
disable_cache_drop: true,
..PolarsPlugin::default()
};
}
}
}
pub fn test_polars_plugin_command(command: &impl PluginCommand) -> Result<(), ShellError> {
let plugin = PolarsPlugin::new_test_mode();
let examples = command.examples();
// we need to cache values in the examples