All is a DataFrame (#3812)

* nuframe in its own type in UntaggedValue * Removed eager dataframe from enum * Dataframe created from list of values * Corrected order in dataframe columns * Returned tag from stream collection * Removed series from dataframe commands * Arithmetic operators * forced push * forced push * Replace all command * String commands * appending operations with dfs * Testing suite for dataframes * Unit test for dataframe commands * improved equality for dataframes
2025-08-09 07:05:47 +02:00 · 2021-07-25 11:01:54 +01:00
parent 9120a64cfb
commit f1ee9113ac
83 changed files with 3293 additions and 1422 deletions
--- a/crates/nu-protocol/src/dataframe/mod.rs
+++ b/crates/nu-protocol/src/dataframe/mod.rs
@ -1,15 +1,11 @@
 pub mod nu_dataframe;
 pub mod nu_groupby;
-pub mod nu_series;

-pub use nu_dataframe::NuDataFrame;
+pub use nu_dataframe::{Column, NuDataFrame};
 pub use nu_groupby::NuGroupBy;
-pub use nu_series::NuSeries;
 use serde::{Deserialize, Serialize};

 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
-pub enum PolarsData {
-    EagerDataFrame(NuDataFrame),
+pub enum FrameStruct {
    GroupBy(NuGroupBy),
-    Series(NuSeries),
 }
--- a/crates/nu-protocol/src/dataframe/nu_dataframe.rs
+++ b/crates/nu-protocol/src/dataframe/nu_dataframe.rs
@ -1,54 +1,147 @@
+use indexmap::{map::Entry, IndexMap};
+use std::cmp::Ordering;
 use std::hash::{Hash, Hasher};
-use std::{cmp::Ordering, collections::hash_map::Entry, collections::HashMap};
+use std::ops::{Deref, DerefMut};

 use bigdecimal::FromPrimitive;
 use chrono::{DateTime, FixedOffset, NaiveDateTime};
 use nu_errors::ShellError;
 use nu_source::{Span, Tag};
 use num_bigint::BigInt;
-use polars::prelude::{AnyValue, DataFrame, NamedFrom, Series, TimeUnit};
+use polars::prelude::{AnyValue, DataFrame, DataType, NamedFrom, Series, TimeUnit};
 use serde::{Deserialize, Serialize};

 use crate::{Dictionary, Primitive, UntaggedValue, Value};

-use super::PolarsData;
-
 const SECS_PER_DAY: i64 = 86_400;

 #[derive(Debug)]
-enum InputValue {
-    Integer,
-    Decimal,
-    String,
+pub struct Column {
+    name: String,
+    values: Vec<Value>,
+}
+
+impl Column {
+    pub fn new(name: String, values: Vec<Value>) -> Self {
+        Self { name, values }
+    }
+
+    pub fn new_empty(name: String) -> Self {
+        Self {
+            name,
+            values: Vec::new(),
+        }
+    }
+
+    pub fn push(&mut self, value: Value) {
+        self.values.push(value)
+    }
 }

 #[derive(Debug)]
-struct ColumnValues {
-    pub value_type: InputValue,
-    pub values: Vec<Value>,
+enum InputType {
+    Integer,
+    Decimal,
+    String,
+    Boolean,
 }

-impl Default for ColumnValues {
-    fn default() -> Self {
+#[derive(Debug)]
+struct TypedColumn {
+    pub column: Column,
+    pub column_type: Option<InputType>,
+}
+
+impl TypedColumn {
+    fn new_empty(name: String) -> Self {
        Self {
-            value_type: InputValue::Integer,
-            values: Vec::new(),
+            column: Column::new_empty(name),
+            column_type: None,
        }
    }
 }

-type ColumnMap = HashMap<String, ColumnValues>;
+impl Deref for TypedColumn {
+    type Target = Column;
+
+    fn deref(&self) -> &Self::Target {
+        &self.column
+    }
+}
+
+impl DerefMut for TypedColumn {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.column
+    }
+}
+
+type ColumnMap = IndexMap<String, TypedColumn>;

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct NuDataFrame {
    dataframe: DataFrame,
 }

-// TODO. Better definition of equality and comparison for a dataframe.
-// Probably it make sense to have a name field and use it for comparisons
+// Dataframes are considered equal if they have the same shape, column name
+// and values
 impl PartialEq for NuDataFrame {
-    fn eq(&self, _: &Self) -> bool {
-        false
+    fn eq(&self, other: &Self) -> bool {
+        if self.as_ref().width() == 0 {
+            // checking for empty dataframe
+            return false;
+        }
+
+        if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
+            // checking both dataframes share the same names
+            return false;
+        }
+
+        if self.as_ref().height() != other.as_ref().height() {
+            // checking both dataframes have the same row size
+            return false;
+        }
+
+        // sorting dataframe by the first column
+        let column_names = self.as_ref().get_column_names();
+        let first_col = column_names
+            .get(0)
+            .expect("already checked that dataframe is different than 0");
+
+        // if unable to sort, then unable to compare
+        let lhs = match self.as_ref().sort(*first_col, false) {
+            Ok(df) => df,
+            Err(_) => return false,
+        };
+
+        let rhs = match other.as_ref().sort(*first_col, false) {
+            Ok(df) => df,
+            Err(_) => return false,
+        };
+
+        for name in self.as_ref().get_column_names() {
+            let self_series = lhs.column(name).expect("name from dataframe names");
+
+            let other_series = rhs
+                .column(name)
+                .expect("already checked that name in other");
+
+            let self_series = match self_series.dtype() {
+                // Casting needed to compare other numeric types with nushell numeric type.
+                // In nushell we only have i64 integer numeric types and any array created
+                // with nushell untagged primitives will be of type i64
+                DataType::UInt32 => match self_series.cast_with_dtype(&DataType::Int64) {
+                    Ok(series) => series,
+                    Err(_) => return false,
+                },
+                _ => self_series.clone(),
+            };
+
+            if !self_series.series_equal(&other_series) {
+                return false;
+            }
+        }
+
+        true
    }
 }

@ -87,14 +180,14 @@ impl NuDataFrame {
        NuDataFrame { dataframe }
    }

-    pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuDataFrame, ShellError>
+    pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<(Self, Tag), ShellError>
    where
        T: Iterator<Item = Value>,
    {
        input
            .next()
            .and_then(|value| match value.value {
-                UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df),
+                UntaggedValue::DataFrame(df) => Some((df, value.tag)),
                _ => None,
            })
            .ok_or_else(|| {
@ -113,41 +206,127 @@ impl NuDataFrame {
        // Dictionary to store the columnar data extracted from
        // the input. During the iteration we check if the values
        // have different type
-        let mut column_values: ColumnMap = HashMap::new();
+        let mut column_values: ColumnMap = IndexMap::new();

        for value in iter {
            match value.value {
                UntaggedValue::Row(dictionary) => insert_row(&mut column_values, dictionary)?,
                UntaggedValue::Table(table) => insert_table(&mut column_values, table)?,
+                UntaggedValue::Primitive(Primitive::Int(_))
+                | UntaggedValue::Primitive(Primitive::Decimal(_))
+                | UntaggedValue::Primitive(Primitive::String(_))
+                | UntaggedValue::Primitive(Primitive::Boolean(_)) => {
+                    let key = format!("{}", 0);
+                    insert_value(value, key, &mut column_values)?
+                }
                _ => {
                    return Err(ShellError::labeled_error_with_secondary(
                        "Format not supported",
                        "Value not supported for conversion",
                        &value.tag,
-                        "Perhaps you want to use a List of Tables or a Dictionary",
+                        "Perhaps you want to use a List, a List of Tables or a Dictionary",
                        &value.tag,
                    ));
                }
            }
        }

-        from_parsed_columns(column_values, tag)
+        from_parsed_columns(column_values, &tag.span)
+    }
+
+    pub fn try_from_series(columns: Vec<Series>, span: &Span) -> Result<Self, ShellError> {
+        let dataframe = DataFrame::new(columns).map_err(|e| {
+            ShellError::labeled_error(
+                "DataFrame Creation",
+                format!("Unable to create DataFrame: {}", e),
+                span,
+            )
+        })?;
+
+        Ok(Self { dataframe })
+    }
+
+    pub fn try_from_columns(columns: Vec<Column>, span: &Span) -> Result<Self, ShellError> {
+        let mut column_values: ColumnMap = IndexMap::new();
+
+        for column in columns {
+            for value in column.values {
+                insert_value(value, column.name.clone(), &mut column_values)?;
+            }
+        }
+
+        from_parsed_columns(column_values, span)
    }

    pub fn into_value(self, tag: Tag) -> Value {
        Value {
-            value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(self)),
+            value: Self::into_untagged(self),
            tag,
        }
    }

+    pub fn into_untagged(self) -> UntaggedValue {
+        UntaggedValue::DataFrame(self)
+    }
+
    pub fn dataframe_to_value(df: DataFrame, tag: Tag) -> Value {
        Value {
-            value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(df))),
+            value: Self::dataframe_to_untagged(df),
            tag,
        }
    }

+    pub fn dataframe_to_untagged(df: DataFrame) -> UntaggedValue {
+        UntaggedValue::DataFrame(Self::new(df))
+    }
+
+    pub fn series_to_untagged(series: Series, span: &Span) -> UntaggedValue {
+        match DataFrame::new(vec![series]) {
+            Ok(dataframe) => UntaggedValue::DataFrame(Self { dataframe }),
+            Err(e) => UntaggedValue::Error(ShellError::labeled_error(
+                "DataFrame Creation",
+                format!("Unable to create DataFrame: {}", e),
+                span,
+            )),
+        }
+    }
+
+    pub fn column(&self, column: &str, tag: &Tag) -> Result<Self, ShellError> {
+        let s = self.as_ref().column(column).map_err(|e| {
+            ShellError::labeled_error("Column not found", format!("{}", e), tag.span)
+        })?;
+
+        let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| {
+            ShellError::labeled_error("DataFrame error", format!("{}", e), tag.span)
+        })?;
+
+        Ok(Self { dataframe })
+    }
+
+    pub fn is_series(&self) -> bool {
+        self.as_ref().width() == 1
+    }
+
+    pub fn as_series(&self, span: &Span) -> Result<Series, ShellError> {
+        if !self.is_series() {
+            return Err(ShellError::labeled_error_with_secondary(
+                "Not a Series",
+                "DataFrame cannot be used as Series",
+                span,
+                "Note that a Series is a DataFrame with one column",
+                span,
+            ));
+        }
+
+        let series = self
+            .as_ref()
+            .get_columns()
+            .get(0)
+            .expect("We have already checked that the width is 1");
+
+        Ok(series.clone())
+    }
+
    // Print is made out a head and if the dataframe is too large, then a tail
    pub fn print(&self) -> Result<Vec<Value>, ShellError> {
        let df = &self.as_ref();
@ -188,24 +367,17 @@ impl NuDataFrame {

    pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
        let df = self.as_ref();
-        let column_names = df.get_column_names();
+        let upper_row = to_row.min(df.height());

        let mut values: Vec<Value> = Vec::new();
-
-        let upper_row = to_row.min(df.height());
        for i in from_row..upper_row {
-            let row = df.get_row(i);
            let mut dictionary_row = Dictionary::default();
-
-            for (val, name) in row.0.iter().zip(column_names.iter()) {
-                let untagged_val = anyvalue_to_untagged(val)?;
-
+            for col in df.get_columns() {
                let dict_val = Value {
-                    value: untagged_val,
+                    value: anyvalue_to_untagged(&col.get(i))?,
                    tag: Tag::unknown(),
                };
-
-                dictionary_row.insert(name.to_string(), dict_val);
+                dictionary_row.insert(col.name().into(), dict_val);
            }

            let value = Value {
@ -213,7 +385,7 @@ impl NuDataFrame {
                tag: Tag::unknown(),
            };

-            values.push(value);
+            values.push(value)
        }

        Ok(values)
@ -336,8 +508,8 @@ fn insert_value(
    key: String,
    column_values: &mut ColumnMap,
 ) -> Result<(), ShellError> {
-    let col_val = match column_values.entry(key) {
-        Entry::Vacant(entry) => entry.insert(ColumnValues::default()),
+    let col_val = match column_values.entry(key.clone()) {
+        Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)),
        Entry::Occupied(entry) => entry.into_mut(),
    };

@ -346,13 +518,16 @@ fn insert_value(
    if col_val.values.is_empty() {
        match &value.value {
            UntaggedValue::Primitive(Primitive::Int(_)) => {
-                col_val.value_type = InputValue::Integer;
+                col_val.column_type = Some(InputType::Integer);
            }
            UntaggedValue::Primitive(Primitive::Decimal(_)) => {
-                col_val.value_type = InputValue::Decimal;
+                col_val.column_type = Some(InputType::Decimal);
            }
            UntaggedValue::Primitive(Primitive::String(_)) => {
-                col_val.value_type = InputValue::String;
+                col_val.column_type = Some(InputType::String);
+            }
+            UntaggedValue::Primitive(Primitive::Boolean(_)) => {
+                col_val.column_type = Some(InputType::Boolean);
            }
            _ => {
                return Err(ShellError::labeled_error(
@ -378,6 +553,10 @@ fn insert_value(
            | (
                UntaggedValue::Primitive(Primitive::String(_)),
                UntaggedValue::Primitive(Primitive::String(_)),
+            )
+            | (
+                UntaggedValue::Primitive(Primitive::Boolean(_)),
+                UntaggedValue::Primitive(Primitive::Boolean(_)),
            ) => col_val.values.push(value),
            _ => {
                return Err(ShellError::labeled_error_with_secondary(
@ -397,27 +576,35 @@ fn insert_value(
 // The ColumnMap has the parsed data from the StreamInput
 // This data can be used to create a Series object that can initialize
 // the dataframe based on the type of data that is found
-fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFrame, ShellError> {
+fn from_parsed_columns(column_values: ColumnMap, span: &Span) -> Result<NuDataFrame, ShellError> {
    let mut df_series: Vec<Series> = Vec::new();
    for (name, column) in column_values {
-        match column.value_type {
-            InputValue::Decimal => {
-                let series_values: Result<Vec<_>, _> =
-                    column.values.iter().map(|v| v.as_f64()).collect();
-                let series = Series::new(&name, series_values?);
-                df_series.push(series)
-            }
-            InputValue::Integer => {
-                let series_values: Result<Vec<_>, _> =
-                    column.values.iter().map(|v| v.as_i64()).collect();
-                let series = Series::new(&name, series_values?);
-                df_series.push(series)
-            }
-            InputValue::String => {
-                let series_values: Result<Vec<_>, _> =
-                    column.values.iter().map(|v| v.as_string()).collect();
-                let series = Series::new(&name, series_values?);
-                df_series.push(series)
+        if let Some(column_type) = &column.column_type {
+            match column_type {
+                InputType::Decimal => {
+                    let series_values: Result<Vec<_>, _> =
+                        column.values.iter().map(|v| v.as_f64()).collect();
+                    let series = Series::new(&name, series_values?);
+                    df_series.push(series)
+                }
+                InputType::Integer => {
+                    let series_values: Result<Vec<_>, _> =
+                        column.values.iter().map(|v| v.as_i64()).collect();
+                    let series = Series::new(&name, series_values?);
+                    df_series.push(series)
+                }
+                InputType::String => {
+                    let series_values: Result<Vec<_>, _> =
+                        column.values.iter().map(|v| v.as_string()).collect();
+                    let series = Series::new(&name, series_values?);
+                    df_series.push(series)
+                }
+                InputType::Boolean => {
+                    let series_values: Result<Vec<_>, _> =
+                        column.values.iter().map(|v| v.as_bool()).collect();
+                    let series = Series::new(&name, series_values?);
+                    df_series.push(series)
+                }
            }
        }
    }
@ -430,7 +617,7 @@ fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFram
            return Err(ShellError::labeled_error(
                "Error while creating dataframe",
                format!("{}", e),
-                tag,
+                span,
            ))
        }
    }
--- a/crates/nu-protocol/src/dataframe/nu_groupby.rs
+++ b/crates/nu-protocol/src/dataframe/nu_groupby.rs
@ -2,7 +2,7 @@ use nu_source::{Span, Tag};
 use polars::frame::groupby::{GroupBy, GroupTuples};
 use serde::{Deserialize, Serialize};

-use super::{NuDataFrame, PolarsData};
+use super::{FrameStruct, NuDataFrame};
 use nu_errors::ShellError;

 use crate::{TaggedDictBuilder, UntaggedValue, Value};
@ -30,7 +30,7 @@ impl NuGroupBy {
        input
            .next()
            .and_then(|value| match value.value {
-                UntaggedValue::DataFrame(PolarsData::GroupBy(group)) => Some(group),
+                UntaggedValue::FrameStruct(FrameStruct::GroupBy(group)) => Some(group),
                _ => None,
            })
            .ok_or_else(|| {
--- a/crates/nu-protocol/src/dataframe/nu_series.rs
+++ b/crates/nu-protocol/src/dataframe/nu_series.rs
@ -1,345 +0,0 @@
-use std::cmp::Ordering;
-use std::hash::{Hash, Hasher};
-use std::vec;
-
-use nu_errors::ShellError;
-use nu_source::{Span, Tag};
-use polars::prelude::{DataType, NamedFrom, Series};
-use serde::{Deserialize, Serialize};
-
-use crate::{Dictionary, Primitive, UntaggedValue, Value};
-
-use super::PolarsData;
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct NuSeries {
-    series: Series,
-    dtype: String,
-}
-
-// TODO. Better definition of equality and comparison for a dataframe.
-// Probably it make sense to have a name field and use it for comparisons
-impl PartialEq for NuSeries {
-    fn eq(&self, _: &Self) -> bool {
-        false
-    }
-}
-
-impl Eq for NuSeries {}
-
-impl PartialOrd for NuSeries {
-    fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
-        Some(Ordering::Equal)
-    }
-}
-
-impl Ord for NuSeries {
-    fn cmp(&self, _: &Self) -> Ordering {
-        Ordering::Equal
-    }
-}
-
-impl Hash for NuSeries {
-    fn hash<H: Hasher>(&self, _: &mut H) {}
-}
-
-impl NuSeries {
-    pub fn new(series: Series) -> Self {
-        let dtype = series.dtype().to_string();
-
-        NuSeries { series, dtype }
-    }
-
-    pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuSeries, ShellError>
-    where
-        T: Iterator<Item = Value>,
-    {
-        input
-            .next()
-            .and_then(|value| match value.value {
-                UntaggedValue::DataFrame(PolarsData::Series(series)) => Some(series),
-                _ => None,
-            })
-            .ok_or_else(|| {
-                ShellError::labeled_error(
-                    "No series in stream",
-                    "no series found in input stream",
-                    span,
-                )
-            })
-    }
-
-    pub fn try_from_iter<T>(iter: T, name: Option<String>) -> Result<Self, ShellError>
-    where
-        T: Iterator<Item = Value>,
-    {
-        let mut vec_values: Vec<Value> = Vec::new();
-
-        for value in iter {
-            match value.value {
-                UntaggedValue::Primitive(Primitive::Int(_))
-                | UntaggedValue::Primitive(Primitive::Decimal(_))
-                | UntaggedValue::Primitive(Primitive::String(_))
-                | UntaggedValue::Primitive(Primitive::Boolean(_)) => {
-                    insert_value(value, &mut vec_values)?
-                }
-                _ => {
-                    return Err(ShellError::labeled_error_with_secondary(
-                        "Format not supported",
-                        "Value not supported for conversion",
-                        &value.tag.span,
-                        "Perhaps you want to use a list of primitive values (int, decimal, string, or bool)",
-                        &value.tag.span,
-                    ));
-                }
-            }
-        }
-
-        from_parsed_vector(vec_values, name)
-    }
-
-    pub fn into_value(self, tag: Tag) -> Value {
-        Value {
-            value: UntaggedValue::DataFrame(PolarsData::Series(self)),
-            tag,
-        }
-    }
-
-    pub fn series_to_value(series: Series, tag: Tag) -> Value {
-        Value {
-            value: UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series))),
-            tag,
-        }
-    }
-
-    pub fn series_to_untagged(series: Series) -> UntaggedValue {
-        UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series)))
-    }
-
-    pub fn dtype(&self) -> &str {
-        &self.dtype
-    }
-
-    pub fn series(self) -> Series {
-        self.series
-    }
-}
-
-impl AsRef<Series> for NuSeries {
-    fn as_ref(&self) -> &Series {
-        &self.series
-    }
-}
-
-impl AsMut<Series> for NuSeries {
-    fn as_mut(&mut self) -> &mut Series {
-        &mut self.series
-    }
-}
-
-macro_rules! series_to_chunked {
-    ($converter: expr, $self: expr) => {{
-        let chunked_array = $converter.map_err(|e| {
-            ShellError::labeled_error("Parsing Error", format!("{}", e), Span::unknown())
-        })?;
-
-        let size = 20;
-
-        let (head_size, skip, tail_size) = if $self.as_ref().len() > size {
-            let remaining = $self.as_ref().len() - (size / 2);
-            let skip = $self.as_ref().len() - remaining;
-            (size / 2, skip, remaining.min(size / 2))
-        } else {
-            (size, 0, 0)
-        };
-
-        let head = chunked_array.into_iter().take(head_size).map(|value| {
-            let value = match value {
-                Some(v) => Value {
-                    value: UntaggedValue::Primitive(v.into()),
-                    tag: Tag::unknown(),
-                },
-                None => Value {
-                    value: UntaggedValue::Primitive(Primitive::Nothing),
-                    tag: Tag::unknown(),
-                },
-            };
-
-            let mut dictionary_row = Dictionary::default();
-            let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
-            dictionary_row.insert(header, value);
-
-            Value {
-                value: UntaggedValue::Row(dictionary_row),
-                tag: Tag::unknown(),
-            }
-        });
-
-        let res = if $self.as_ref().len() < size {
-            head.collect::<Vec<Value>>()
-        } else {
-            let middle = std::iter::once({
-                let mut dictionary_row = Dictionary::default();
-
-                let value = Value {
-                    value: UntaggedValue::Primitive("...".into()),
-                    tag: Tag::unknown(),
-                };
-
-                let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
-                dictionary_row.insert(header, value);
-
-                Value {
-                    value: UntaggedValue::Row(dictionary_row),
-                    tag: Tag::unknown(),
-                }
-            });
-
-            let tail =
-                chunked_array
-                    .into_iter()
-                    .skip(skip)
-                    .take(tail_size)
-                    .map(|value| match value {
-                        Some(v) => {
-                            let mut dictionary_row = Dictionary::default();
-
-                            let value = Value {
-                                value: UntaggedValue::Primitive(v.into()),
-                                tag: Tag::unknown(),
-                            };
-
-                            let header = format!("{} ({})", $self.as_ref().name(), $self.dtype());
-                            dictionary_row.insert(header, value);
-
-                            Value {
-                                value: UntaggedValue::Row(dictionary_row),
-                                tag: Tag::unknown(),
-                            }
-                        }
-                        None => Value {
-                            value: UntaggedValue::Primitive(Primitive::Nothing),
-                            tag: Tag::unknown(),
-                        },
-                    });
-
-            head.chain(middle).chain(tail).collect::<Vec<Value>>()
-        };
-
-        Ok(res)
-    }};
-}
-
-impl NuSeries {
-    pub fn print(&self) -> Result<Vec<Value>, ShellError> {
-        match self.as_ref().dtype() {
-            DataType::Boolean => series_to_chunked!(self.as_ref().bool(), self),
-            DataType::UInt8 => series_to_chunked!(self.as_ref().u8(), self),
-            DataType::UInt16 => series_to_chunked!(self.as_ref().u16(), self),
-            DataType::UInt32 => series_to_chunked!(self.as_ref().u32(), self),
-            DataType::UInt64 => series_to_chunked!(self.as_ref().u64(), self),
-            DataType::Int8 => series_to_chunked!(self.as_ref().i8(), self),
-            DataType::Int16 => series_to_chunked!(self.as_ref().i16(), self),
-            DataType::Int32 => series_to_chunked!(self.as_ref().i32(), self),
-            DataType::Int64 => series_to_chunked!(self.as_ref().i64(), self),
-            DataType::Float32 => series_to_chunked!(self.as_ref().f32(), self),
-            DataType::Float64 => series_to_chunked!(self.as_ref().f64(), self),
-            DataType::Utf8 => series_to_chunked!(self.as_ref().utf8(), self),
-            DataType::Date32 => series_to_chunked!(self.as_ref().date32(), self),
-            DataType::Date64 => series_to_chunked!(self.as_ref().date64(), self),
-            DataType::Null => Ok(vec![Value {
-                value: UntaggedValue::Primitive(Primitive::Nothing),
-                tag: Tag::unknown(),
-            }]),
-            //DataType::List(_) => None,
-            //DataType::Time64(TimeUnit) => None,
-            //DataType::Duration(TimeUnit) => None,
-            //    DataType::Categorical => None,
-            _ => unimplemented!(),
-        }
-    }
-}
-
-fn insert_value(value: Value, vec_values: &mut Vec<Value>) -> Result<(), ShellError> {
-    // Checking that the type for the value is the same
-    // for the previous value in the column
-    if vec_values.is_empty() {
-        vec_values.push(value);
-        Ok(())
-    } else {
-        let prev_value = &vec_values[vec_values.len() - 1];
-
-        match (&prev_value.value, &value.value) {
-            (
-                UntaggedValue::Primitive(Primitive::Int(_)),
-                UntaggedValue::Primitive(Primitive::Int(_)),
-            )
-            | (
-                UntaggedValue::Primitive(Primitive::Decimal(_)),
-                UntaggedValue::Primitive(Primitive::Decimal(_)),
-            )
-            | (
-                UntaggedValue::Primitive(Primitive::String(_)),
-                UntaggedValue::Primitive(Primitive::String(_)),
-            )
-            | (
-                UntaggedValue::Primitive(Primitive::Boolean(_)),
-                UntaggedValue::Primitive(Primitive::Boolean(_)),
-            ) => {
-                vec_values.push(value);
-                Ok(())
-            }
-            _ => Err(ShellError::labeled_error_with_secondary(
-                "Different values in column",
-                "Value with different type",
-                &value.tag,
-                "Perhaps you want to change it to this value type",
-                &prev_value.tag,
-            )),
-        }
-    }
-}
-
-fn from_parsed_vector(
-    vec_values: Vec<Value>,
-    name: Option<String>,
-) -> Result<NuSeries, ShellError> {
-    let series = match &vec_values[0].value {
-        UntaggedValue::Primitive(Primitive::Int(_)) => {
-            let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_i64()).collect();
-            let series_name = match &name {
-                Some(n) => n.as_ref(),
-                None => "int",
-            };
-            Series::new(series_name, series_values?)
-        }
-        UntaggedValue::Primitive(Primitive::Decimal(_)) => {
-            let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_f64()).collect();
-            let series_name = match &name {
-                Some(n) => n.as_ref(),
-                None => "decimal",
-            };
-            Series::new(series_name, series_values?)
-        }
-        UntaggedValue::Primitive(Primitive::String(_)) => {
-            let series_values: Result<Vec<_>, _> =
-                vec_values.iter().map(|v| v.as_string()).collect();
-            let series_name = match &name {
-                Some(n) => n.as_ref(),
-                None => "string",
-            };
-            Series::new(series_name, series_values?)
-        }
-        UntaggedValue::Primitive(Primitive::Boolean(_)) => {
-            let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_bool()).collect();
-            let series_name = match &name {
-                Some(n) => n.as_ref(),
-                None => "string",
-            };
-            Series::new(series_name, series_values?)
-        }
-        _ => unreachable!("The untagged type is checked while creating vec_values"),
-    };
-
-    Ok(NuSeries::new(series))
-}
--- a/crates/nu-protocol/src/type_shape.rs
+++ b/crates/nu-protocol/src/type_shape.rs
@ -75,6 +75,10 @@ pub enum Type {
    /// Dataframe
    #[cfg(feature = "dataframe")]
    DataFrame,
+
+    /// Dataframe
+    #[cfg(feature = "dataframe")]
+    FrameStruct,
 }

 /// A shape representation of the type of a row
@ -192,6 +196,8 @@ impl Type {
            UntaggedValue::Block(_) => Type::Block,
            #[cfg(feature = "dataframe")]
            UntaggedValue::DataFrame(_) => Type::DataFrame,
+            #[cfg(feature = "dataframe")]
+            UntaggedValue::FrameStruct(_) => Type::DataFrame,
        }
    }
 }
@ -298,7 +304,7 @@ impl PrettyDebug for Type {
            }
            Type::Block => ty("block"),
            #[cfg(feature = "dataframe")]
-            Type::DataFrame => ty("data_type_formatter"),
+            Type::DataFrame | Type::FrameStruct => ty("data_type_formatter"),
        }
    }
 }
--- a/crates/nu-protocol/src/value.rs
+++ b/crates/nu-protocol/src/value.rs
@ -31,7 +31,7 @@ use std::path::PathBuf;
 use std::time::SystemTime;

 #[cfg(feature = "dataframe")]
-use crate::dataframe::PolarsData;
+use crate::dataframe::{FrameStruct, NuDataFrame};

 /// The core structured values that flow through a pipeline
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
@ -51,10 +51,15 @@ pub enum UntaggedValue {
    /// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values
    Block(Box<hir::CapturedBlock>),

-    /// Data option that holds the polars structs required to to data
-    /// manipulation and operations using polars dataframes
+    /// Main nushell dataframe
    #[cfg(feature = "dataframe")]
-    DataFrame(PolarsData),
+    DataFrame(NuDataFrame),
+
+    /// Data option that holds intermediate struct required to do data
+    /// manipulation and operations for dataframes such as groupby, lazy frames
+    /// and lazy groupby
+    #[cfg(feature = "dataframe")]
+    FrameStruct(FrameStruct),
 }

 impl UntaggedValue {
@ -685,11 +690,9 @@ impl ShellTypeName for UntaggedValue {
            UntaggedValue::Error(_) => "error",
            UntaggedValue::Block(_) => "block",
            #[cfg(feature = "dataframe")]
-            UntaggedValue::DataFrame(PolarsData::EagerDataFrame(_)) => "dataframe",
+            UntaggedValue::DataFrame(_) => "dataframe",
            #[cfg(feature = "dataframe")]
-            UntaggedValue::DataFrame(PolarsData::Series(_)) => "series",
-            #[cfg(feature = "dataframe")]
-            UntaggedValue::DataFrame(PolarsData::GroupBy(_)) => "groupby",
+            UntaggedValue::FrameStruct(FrameStruct::GroupBy(_)) => "groupby",
        }
    }
 }
--- a/crates/nu-protocol/src/value/debug.rs
+++ b/crates/nu-protocol/src/value/debug.rs
@ -25,7 +25,9 @@ impl PrettyDebug for Value {
            UntaggedValue::Error(_) => DbgDocBldr::error("error"),
            UntaggedValue::Block(_) => DbgDocBldr::opaque("block"),
            #[cfg(feature = "dataframe")]
-            UntaggedValue::DataFrame(_) => DbgDocBldr::opaque("dataframe_prettydebug_for_data"),
+            UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
+                DbgDocBldr::opaque("dataframe")
+            }
        }
    }
 }