mirror of
https://github.com/nushell/nushell.git
synced 2025-06-03 16:45:41 +02:00
Allow polars first
to be used with polars group-by
(#15855)
# Description Provides functionality similar to https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.dataframe.group_by.GroupBy.first.html by allowing polars first to be used with a group by ``` > ❯ : [[a b c d]; [1 0.5 true Apple] [2 0.5 true Orange] [2 4 true Apple] [3 10 false Apple] [4 13 false Banana] [5 14 true Banana]] | polars into-df -s {a: u8, b: f32, c: bool, d: str} | polars group-by d | polars first | polars collect ╭───┬────────┬───┬───────┬───────╮ │ # │ d │ a │ b │ c │ ├───┼────────┼───┼───────┼───────┤ │ 0 │ Apple │ 1 │ 0.50 │ true │ │ 1 │ Banana │ 4 │ 13.00 │ false │ │ 2 │ Orange │ 2 │ 0.50 │ true │ ╰───┴────────┴───┴───────┴───────╯ ``` Additionally, I am setting the POLARS_ALLOW_EXTENSION to true to avoid panicking with operations using the dtype object. The conversion will fallback to object when the type cannot be determining, so this could be a common case. # User-Facing Changes - `polars first` can now be used with `polars group-by` --------- Co-authored-by: Jack Wright <jack.wright@nike.com>
This commit is contained in:
parent
d9ecb7da93
commit
8b9f02246f
@ -1,6 +1,6 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
PolarsPlugin,
|
PolarsPlugin,
|
||||||
values::{Column, CustomValueSupport, NuLazyFrame, PolarsPluginObject},
|
values::{Column, CustomValueSupport, NuLazyFrame, NuLazyGroupBy, PolarsPluginObject},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::values::{NuDataFrame, NuExpression};
|
use crate::values::{NuDataFrame, NuExpression};
|
||||||
@ -9,6 +9,7 @@ use nu_protocol::{
|
|||||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||||
Value,
|
Value,
|
||||||
};
|
};
|
||||||
|
use polars::df;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct FirstDF;
|
pub struct FirstDF;
|
||||||
@ -87,6 +88,24 @@ impl PluginCommand for FirstDF {
|
|||||||
example: "polars col a | polars first",
|
example: "polars col a | polars first",
|
||||||
result: None,
|
result: None,
|
||||||
},
|
},
|
||||||
|
Example {
|
||||||
|
description: "Aggregate the first values in the group.",
|
||||||
|
example: "[[a b c d]; [1 0.5 true Apple] [2 0.5 true Orange] [2 4 true Apple] [3 10 false Apple] [4 13 false Banana] [5 14 true Banana]] | polars into-df -s {a: u8, b: f32, c: bool, d: str} | polars group-by d | polars first | polars sort-by [a] | polars collect",
|
||||||
|
result: Some(
|
||||||
|
NuDataFrame::new(
|
||||||
|
false,
|
||||||
|
df!(
|
||||||
|
"d" => &["Apple", "Orange", "Banana"],
|
||||||
|
"a" => &[1, 2, 4],
|
||||||
|
"b" => &[0.5, 0.5, 13.0],
|
||||||
|
"c" => &[true, true, false],
|
||||||
|
|
||||||
|
)
|
||||||
|
.expect("dataframe creation should succeed"),
|
||||||
|
)
|
||||||
|
.into_value(Span::test_data()),
|
||||||
|
),
|
||||||
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -106,6 +125,9 @@ impl PluginCommand for FirstDF {
|
|||||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||||
command_lazy(plugin, engine, call, lazy).map_err(|e| e.into())
|
command_lazy(plugin, engine, call, lazy).map_err(|e| e.into())
|
||||||
}
|
}
|
||||||
|
PolarsPluginObject::NuLazyGroupBy(groupby) => {
|
||||||
|
command_groupby(plugin, engine, call, groupby).map_err(|e| e.into())
|
||||||
|
}
|
||||||
_ => {
|
_ => {
|
||||||
let expr = NuExpression::try_from_value(plugin, &value)?;
|
let expr = NuExpression::try_from_value(plugin, &value)?;
|
||||||
let expr: NuExpression = expr.into_polars().first().into();
|
let expr: NuExpression = expr.into_polars().first().into();
|
||||||
@ -146,6 +168,20 @@ fn command_lazy(
|
|||||||
res.to_pipeline_data(plugin, engine, call.head)
|
res.to_pipeline_data(plugin, engine, call.head)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn command_groupby(
|
||||||
|
plugin: &PolarsPlugin,
|
||||||
|
engine: &EngineInterface,
|
||||||
|
call: &EvaluatedCall,
|
||||||
|
groupby: NuLazyGroupBy,
|
||||||
|
) -> Result<PipelineData, ShellError> {
|
||||||
|
let rows: Option<usize> = call.opt(0)?;
|
||||||
|
let rows = rows.unwrap_or(1);
|
||||||
|
let res = groupby.to_polars().head(Some(rows));
|
||||||
|
let res: NuLazyFrame = res.into();
|
||||||
|
|
||||||
|
res.to_pipeline_data(plugin, engine, call.head)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -153,6 +189,10 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_examples() -> Result<(), ShellError> {
|
fn test_examples() -> Result<(), ShellError> {
|
||||||
|
// // Extensions are required for the group-by functionality to work
|
||||||
|
// unsafe {
|
||||||
|
// std::env::set_var("POLARS_ALLOW_EXTENSION", "true");
|
||||||
|
// }
|
||||||
test_polars_plugin_command(&FirstDF)
|
test_polars_plugin_command(&FirstDF)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,13 @@ use nu_plugin_polars::PolarsPlugin;
|
|||||||
fn main() {
|
fn main() {
|
||||||
env_logger::init();
|
env_logger::init();
|
||||||
|
|
||||||
|
// Set config options via environment variable
|
||||||
|
unsafe {
|
||||||
|
// Extensions are required for certain things like aggregates with object dtypes to work
|
||||||
|
// correctly. It is disabled by default because of unsafe code.
|
||||||
|
// See https://docs.rs/polars/latest/polars/#user-guide for details
|
||||||
|
std::env::set_var("POLARS_ALLOW_EXTENSION", "true");
|
||||||
|
}
|
||||||
match PolarsPlugin::new() {
|
match PolarsPlugin::new() {
|
||||||
Ok(ref plugin) => serve_plugin(plugin, MsgPackSerializer {}),
|
Ok(ref plugin) => serve_plugin(plugin, MsgPackSerializer {}),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user