forked from extern/nushell
Dataframe commands (#3498)
* Sample command * Join command with checks * More dataframes commands * Groupby and aggregate commands * Missing feature dataframe flag * Renamed file
This commit is contained in:
@ -1,3 +1,12 @@
|
||||
pub mod nu_dataframe;
|
||||
pub mod nu_groupby;
|
||||
|
||||
pub use nu_dataframe::NuDataFrame;
|
||||
pub use nu_groupby::NuGroupBy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||
pub enum PolarsData {
|
||||
EagerDataFrame(NuDataFrame),
|
||||
GroupBy(NuGroupBy),
|
||||
}
|
||||
|
@ -59,8 +59,18 @@ impl Default for NuDataFrame {
|
||||
}
|
||||
|
||||
impl NuDataFrame {
|
||||
fn new() -> Self {
|
||||
Self::default()
|
||||
pub fn new(df: polars::prelude::DataFrame) -> Self {
|
||||
NuDataFrame {
|
||||
dataframe: Some(df),
|
||||
name: String::from("dataframe"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_name(df: polars::prelude::DataFrame, name: String) -> Self {
|
||||
NuDataFrame {
|
||||
dataframe: Some(df),
|
||||
name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -103,7 +113,7 @@ impl<'de> Deserialize<'de> for NuDataFrame {
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_i32(NuDataFrame::new())
|
||||
deserializer.deserialize_i32(NuDataFrame::default())
|
||||
}
|
||||
}
|
||||
|
||||
@ -137,22 +147,23 @@ impl NuDataFrame {
|
||||
// Print is made out a head and if the dataframe is too large, then a tail
|
||||
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||
if let Some(df) = &self.dataframe {
|
||||
let size: usize = 5;
|
||||
let mut values = self.head(Some(size))?;
|
||||
let size: usize = 20;
|
||||
|
||||
if df.height() > size {
|
||||
let sample_size = size / 2;
|
||||
let mut values = self.head(Some(sample_size))?;
|
||||
add_separator(&mut values, df);
|
||||
|
||||
let remaining = df.height() - size;
|
||||
let tail_size = remaining.min(size);
|
||||
let remaining = df.height() - sample_size;
|
||||
let tail_size = remaining.min(sample_size);
|
||||
let mut tail_values = self.tail(Some(tail_size))?;
|
||||
|
||||
values.append(&mut tail_values);
|
||||
}
|
||||
|
||||
Ok(values)
|
||||
Ok(values)
|
||||
} else {
|
||||
Ok(self.head(Some(size))?)
|
||||
}
|
||||
} else {
|
||||
unreachable!()
|
||||
unreachable!("No dataframe found in print command")
|
||||
}
|
||||
}
|
||||
|
||||
|
54
crates/nu-protocol/src/dataframe/nu_groupby.rs
Normal file
54
crates/nu-protocol/src/dataframe/nu_groupby.rs
Normal file
@ -0,0 +1,54 @@
|
||||
use nu_source::Tag;
|
||||
use polars::frame::groupby::{GroupBy, GroupTuples};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::NuDataFrame;
|
||||
use nu_errors::ShellError;
|
||||
|
||||
use crate::{TaggedDictBuilder, Value};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||
pub struct NuGroupBy {
|
||||
dataframe: NuDataFrame,
|
||||
by: Vec<String>,
|
||||
groups: GroupTuples,
|
||||
}
|
||||
|
||||
impl NuGroupBy {
|
||||
pub fn new(dataframe: NuDataFrame, by: Vec<String>, groups: GroupTuples) -> Self {
|
||||
NuGroupBy {
|
||||
dataframe,
|
||||
by,
|
||||
groups,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_groupby(&self) -> Result<GroupBy, ShellError> {
|
||||
let df = match &self.dataframe.dataframe {
|
||||
Some(df) => df,
|
||||
None => unreachable!("No dataframe in nu_dataframe"),
|
||||
};
|
||||
|
||||
let by = df.select_series(&self.by).map_err(|e| {
|
||||
ShellError::labeled_error("Error creating groupby", format!("{}", e), Tag::unknown())
|
||||
})?;
|
||||
|
||||
Ok(GroupBy::new(df, by, self.groups.clone(), None))
|
||||
}
|
||||
|
||||
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||
let mut values: Vec<Value> = Vec::new();
|
||||
|
||||
let mut data = TaggedDictBuilder::new(Tag::unknown());
|
||||
data.insert_value("property", "dataframe");
|
||||
data.insert_value("value", self.dataframe.name.as_ref());
|
||||
values.push(data.into_value());
|
||||
|
||||
let mut data = TaggedDictBuilder::new(Tag::unknown());
|
||||
data.insert_value("property", "group by");
|
||||
data.insert_value("value", self.by.join(", "));
|
||||
values.push(data.into_value());
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
}
|
@ -31,7 +31,7 @@ use std::path::PathBuf;
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[cfg(feature = "dataframe")]
|
||||
use crate::dataframe::NuDataFrame;
|
||||
use crate::dataframe::PolarsData;
|
||||
|
||||
/// The core structured values that flow through a pipeline
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||
@ -54,7 +54,7 @@ pub enum UntaggedValue {
|
||||
/// Data option that holds the polars structs required to to data
|
||||
/// manipulation and operations using polars dataframes
|
||||
#[cfg(feature = "dataframe")]
|
||||
DataFrame(NuDataFrame),
|
||||
DataFrame(PolarsData),
|
||||
}
|
||||
|
||||
impl UntaggedValue {
|
||||
|
@ -364,8 +364,8 @@ macro_rules! from_native_to_primitive {
|
||||
($native_type:ty, $primitive_type:expr, $converter: expr) => {
|
||||
// e.g. from u32 -> Primitive
|
||||
impl From<$native_type> for Primitive {
|
||||
fn from(int: $native_type) -> Primitive {
|
||||
if let Some(i) = $converter(int) {
|
||||
fn from(value: $native_type) -> Primitive {
|
||||
if let Some(i) = $converter(value) {
|
||||
$primitive_type(i)
|
||||
} else {
|
||||
unreachable!("Internal error: protocol did not use compatible decimal")
|
||||
|
Reference in New Issue
Block a user