Dataframe commands (#3498)

* Sample command

* Join command with checks

* More dataframes commands

* Groupby and aggregate commands

* Missing feature dataframe flag

* Renamed file
This commit is contained in:
Fernando Herrera
2021-05-27 06:09:48 +01:00
committed by GitHub
parent d8c4b9c4fb
commit 3a5ee1aed0
26 changed files with 1221 additions and 290 deletions

View File

@ -1,3 +1,12 @@
pub mod nu_dataframe;
pub mod nu_groupby;
pub use nu_dataframe::NuDataFrame;
pub use nu_groupby::NuGroupBy;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
pub enum PolarsData {
EagerDataFrame(NuDataFrame),
GroupBy(NuGroupBy),
}

View File

@ -59,8 +59,18 @@ impl Default for NuDataFrame {
}
impl NuDataFrame {
fn new() -> Self {
Self::default()
pub fn new(df: polars::prelude::DataFrame) -> Self {
NuDataFrame {
dataframe: Some(df),
name: String::from("dataframe"),
}
}
pub fn new_with_name(df: polars::prelude::DataFrame, name: String) -> Self {
NuDataFrame {
dataframe: Some(df),
name,
}
}
}
@ -103,7 +113,7 @@ impl<'de> Deserialize<'de> for NuDataFrame {
where
D: Deserializer<'de>,
{
deserializer.deserialize_i32(NuDataFrame::new())
deserializer.deserialize_i32(NuDataFrame::default())
}
}
@ -137,22 +147,23 @@ impl NuDataFrame {
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
if let Some(df) = &self.dataframe {
let size: usize = 5;
let mut values = self.head(Some(size))?;
let size: usize = 20;
if df.height() > size {
let sample_size = size / 2;
let mut values = self.head(Some(sample_size))?;
add_separator(&mut values, df);
let remaining = df.height() - size;
let tail_size = remaining.min(size);
let remaining = df.height() - sample_size;
let tail_size = remaining.min(sample_size);
let mut tail_values = self.tail(Some(tail_size))?;
values.append(&mut tail_values);
}
Ok(values)
Ok(values)
} else {
Ok(self.head(Some(size))?)
}
} else {
unreachable!()
unreachable!("No dataframe found in print command")
}
}

View File

@ -0,0 +1,54 @@
use nu_source::Tag;
use polars::frame::groupby::{GroupBy, GroupTuples};
use serde::{Deserialize, Serialize};
use super::NuDataFrame;
use nu_errors::ShellError;
use crate::{TaggedDictBuilder, Value};
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
pub struct NuGroupBy {
dataframe: NuDataFrame,
by: Vec<String>,
groups: GroupTuples,
}
impl NuGroupBy {
pub fn new(dataframe: NuDataFrame, by: Vec<String>, groups: GroupTuples) -> Self {
NuGroupBy {
dataframe,
by,
groups,
}
}
pub fn to_groupby(&self) -> Result<GroupBy, ShellError> {
let df = match &self.dataframe.dataframe {
Some(df) => df,
None => unreachable!("No dataframe in nu_dataframe"),
};
let by = df.select_series(&self.by).map_err(|e| {
ShellError::labeled_error("Error creating groupby", format!("{}", e), Tag::unknown())
})?;
Ok(GroupBy::new(df, by, self.groups.clone(), None))
}
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
let mut values: Vec<Value> = Vec::new();
let mut data = TaggedDictBuilder::new(Tag::unknown());
data.insert_value("property", "dataframe");
data.insert_value("value", self.dataframe.name.as_ref());
values.push(data.into_value());
let mut data = TaggedDictBuilder::new(Tag::unknown());
data.insert_value("property", "group by");
data.insert_value("value", self.by.join(", "));
values.push(data.into_value());
Ok(values)
}
}

View File

@ -31,7 +31,7 @@ use std::path::PathBuf;
use std::time::SystemTime;
#[cfg(feature = "dataframe")]
use crate::dataframe::NuDataFrame;
use crate::dataframe::PolarsData;
/// The core structured values that flow through a pipeline
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
@ -54,7 +54,7 @@ pub enum UntaggedValue {
/// Data option that holds the polars structs required to to data
/// manipulation and operations using polars dataframes
#[cfg(feature = "dataframe")]
DataFrame(NuDataFrame),
DataFrame(PolarsData),
}
impl UntaggedValue {

View File

@ -364,8 +364,8 @@ macro_rules! from_native_to_primitive {
($native_type:ty, $primitive_type:expr, $converter: expr) => {
// e.g. from u32 -> Primitive
impl From<$native_type> for Primitive {
fn from(int: $native_type) -> Primitive {
if let Some(i) = $converter(int) {
fn from(value: $native_type) -> Primitive {
if let Some(i) = $converter(value) {
$primitive_type(i)
} else {
unreachable!("Internal error: protocol did not use compatible decimal")