Complete Dataframe MVP (#3373)

* Dataframe MVP * Removed test csv file * Dataframe MVP * Removed test csv file * New revision polars * New revision polars * csv file reader * argument parser for file reader * Parser from Row primitive * Column conversion * Added as f32 and f64 * Parsing row to dataframe * Removed repeated push to vector * Accept table values to create dataframe * Removed default serde * Dataframe to rows to show data * Save name of file with dataframe * Usage example * Upgrade polars version * Clippy changes * Added print function with head and tail * Move dataframe struct to folder * Lock file after running tests and merge * Optional feature for dataframe * Removed dataframe from plugins * Update primitive.rs Co-authored-by: JT <jonathandturner@users.noreply.github.com>
2025-08-09 09:45:50 +02:00 · 2021-05-12 02:01:31 +01:00
parent e73491441a
commit c80a9585b0
25 changed files with 1474 additions and 448 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -158,6 +158,15 @@ zip-support = ["nu-cli/zip", "nu-command/zip"]
 #This is disabled in extra for now
 table-pager = ["nu-command/table-pager"]
 #dataframe feature for nushell
 dataframe = [
    "nu-protocol/dataframe",
    "nu-command/dataframe",
    "nu-value-ext/dataframe",
    "nu-data/dataframe"
 ]
 [profile.release]
 #strip = "symbols" #Couldn't get working +nightly
 codegen-units = 1 #Reduce parallel codegen units
--- a/crates/nu-command/Cargo.toml
+++ b/crates/nu-command/Cargo.toml
@ -99,6 +99,8 @@ uuid_crate = { package = "uuid", version = "0.8.2", features = ["v4"], optional
 which = { version = "4.1.0", optional = true }
 zip = { version = "0.5.9", optional = true }
 polars = {version="0.13.1", git = "https://github.com/ritchie46/polars", rev = "3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0", optional = true}
 [target.'cfg(unix)'.dependencies]
 umask = "1.0.0"
 users = "0.11.0"
@ -130,3 +132,4 @@ trash-support = ["trash"]
 directories = ["directories-next"]
 dirs = ["dirs-next"]
 table-pager = ["minus", "crossterm"]
 dataframe = ["nu-protocol/dataframe", "polars"]
--- a/crates/nu-command/src/commands.rs
+++ b/crates/nu-command/src/commands.rs
@ -26,6 +26,8 @@ pub(crate) mod compact;
 pub(crate) mod config;
 pub(crate) mod constants;
 pub(crate) mod cp;
 #[cfg(feature = "dataframe")]
 pub(crate) mod dataframe;
 pub(crate) mod date;
 pub(crate) mod debug;
 pub(crate) mod def;
@ -184,6 +186,8 @@ pub(crate) use clear::Clear;
 pub(crate) mod touch;
 pub(crate) use all::Command as All;
 pub(crate) use any::Command as Any;
 #[cfg(feature = "dataframe")]
 pub(crate) use dataframe::Dataframe;
 pub(crate) use enter::Enter;
 pub(crate) use every::Every;
 pub(crate) use exec::Exec;
--- a/crates/nu-command/src/commands/all.rs
+++ b/crates/nu-command/src/commands/all.rs
@ -89,7 +89,7 @@ fn all(args: CommandArgs) -> Result<OutputStream, ShellError> {
        UntaggedValue::boolean(true).into_value(&tag),
    ));
-    // Variables in nu are immutable. Having the same variable accross invocations
+    // Variables in nu are immutable. Having the same variable across invocations
    // of evaluate_baseline_expr does not mutate the variables and those each
    // invocations are independent of each other!
    scope.enter_scope();
--- a/crates/nu-command/src/commands/autoview/command.rs
+++ b/crates/nu-command/src/commands/autoview/command.rs
@ -228,6 +228,20 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
                            out!("{:?}", row);
                        }
                    }
                    #[cfg(feature = "dataframe")]
                    Value {
                        value: UntaggedValue::Dataframe(df),
                        ..
                    } => {
                        if let Some(table) = table {
                            // TODO. Configure the parameter rows from file. It can be
                            // adjusted to see a certain amount of values in the head
                            let command_args =
                                create_default_command_args(&context, df.print()?.into(), tag);
                            let result = table.run(command_args)?;
                            let _ = result.collect::<Vec<_>>();
                        }
                    }
                    Value {
                        value: UntaggedValue::Primitive(Primitive::Nothing),
                        ..
--- a/crates/nu-command/src/commands/dataframe.rs
+++ b/crates/nu-command/src/commands/dataframe.rs
@ -0,0 +1,139 @@
 use std::path::PathBuf;
 use crate::prelude::*;
 use nu_engine::WholeStreamCommand;
 use nu_errors::ShellError;
 use nu_protocol::{dataframe::NuDataFrame, hir::NamedValue, Signature, SyntaxShape, UntaggedValue};
 use nu_source::Tagged;
 use polars::prelude::{CsvReader, SerReader};
 pub struct Dataframe;
 #[derive(Deserialize)]
 pub struct OpenArgs {
    file: Tagged<PathBuf>,
 }
 impl WholeStreamCommand for Dataframe {
    fn name(&self) -> &str {
        "dataframe"
    }
    fn usage(&self) -> &str {
        "Creates a dataframe from a csv file"
    }
    fn signature(&self) -> Signature {
        Signature::build("dataframe").named(
            "file",
            SyntaxShape::FilePath,
            "the file path to load values from",
            Some('f'),
        )
    }
    fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
        load_dataframe(args)
    }
    fn examples(&self) -> Vec<Example> {
        vec![
            Example {
                description: "Takes a file name and creates a dataframe",
                example: "dataframe -f test.csv",
                result: None,
            },
            Example {
                description: "Takes an input stream and converts it to a dataframe",
                example: "echo [[a b];[1 2] [3 4]] | dataframe",
                result: None,
            },
        ]
    }
 }
 // Creates a dataframe from either a file or a table.
 // If both options are found, then an error is returned to the user.
 // The InputStream can have a table and a dictionary as input variable.
 fn load_dataframe(args: CommandArgs) -> Result<OutputStream, ShellError> {
    // The file has priority over stream input
    if let Some(NamedValue::Value(_, _)) = args
        .call_info()
        .args
        .named
        .as_ref()
        .map(|named| named.named.get("file"))
        .flatten()
    {
        return create_from_file(args);
    }
    create_from_input(args)
 }
 fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
    // Command Tag. This marks where the command is located and the name
    // of the command used
    let tag = args.call_info.name_tag.clone();
    // Parsing the arguments that the function uses
    let (OpenArgs { file }, _) = args.process()?;
    // Needs more detail and arguments while loading the dataframe
    // Options:
    //  - has header
    //  - infer schema
    //  - delimiter
    //  - csv or parquet <- extracted from extension
    let csv_reader = match CsvReader::from_path(&file.item) {
        Ok(csv_reader) => csv_reader,
        Err(e) => {
            return Err(ShellError::labeled_error(
                "Unable to parse file",
                format!("{}", e),
                &file.tag,
            ))
        }
    };
    let df = match csv_reader.infer_schema(None).has_header(true).finish() {
        Ok(csv_reader) => csv_reader,
        Err(e) => {
            return Err(ShellError::labeled_error(
                "Error while parsing dataframe",
                format!("{}", e),
                &file.tag,
            ))
        }
    };
    let file_name = match file.item.into_os_string().into_string() {
        Ok(name) => name,
        Err(e) => {
            return Err(ShellError::labeled_error(
                "Error with file name",
                format!("{:?}", e),
                &file.tag,
            ))
        }
    };
    let nu_dataframe = NuDataFrame {
        dataframe: Some(df),
        name: file_name,
    };
    let init = InputStream::one(UntaggedValue::Dataframe(nu_dataframe).into_value(&tag));
    Ok(init.to_output_stream())
 }
 fn create_from_input(args: CommandArgs) -> Result<OutputStream, ShellError> {
    let tag = args.call_info.name_tag.clone();
    let args = args.evaluate_once()?;
    let df = NuDataFrame::try_from_iter(args.input, &tag)?;
    let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag));
    Ok(init.to_output_stream())
 }
--- a/crates/nu-command/src/commands/default_context.rs
+++ b/crates/nu-command/src/commands/default_context.rs
@ -251,6 +251,8 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
            whole_stream_command(Seq),
            whole_stream_command(SeqDates),
            whole_stream_command(TermSize),
            #[cfg(feature = "dataframe")]
            whole_stream_command(Dataframe),
        ]);
        #[cfg(feature = "clipboard-cli")]
--- a/crates/nu-command/src/commands/to_json.rs
+++ b/crates/nu-command/src/commands/to_json.rs
@ -114,6 +114,8 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
        UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => {
            serde_json::Value::Null
        }
        #[cfg(feature = "dataframe")]
        UntaggedValue::Dataframe(_) => serde_json::Value::Null,
        UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array(
            b.iter()
                .map(|x| {
--- a/crates/nu-command/src/commands/to_toml.rs
+++ b/crates/nu-command/src/commands/to_toml.rs
@ -75,6 +75,8 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
        UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?),
        UntaggedValue::Error(e) => return Err(e.clone()),
        UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
        #[cfg(feature = "dataframe")]
        UntaggedValue::Dataframe(_) => toml::Value::String("<Dataframe>".to_string()),
        UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
        UntaggedValue::Primitive(Primitive::Binary(b)) => {
            toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
--- a/crates/nu-command/src/commands/to_yaml.rs
+++ b/crates/nu-command/src/commands/to_yaml.rs
@ -95,6 +95,8 @@ pub fn value_to_yaml_value(v: &Value) -> Result<serde_yaml::Value, ShellError> {
        UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => {
            serde_yaml::Value::Null
        }
        #[cfg(feature = "dataframe")]
        UntaggedValue::Dataframe(_) => serde_yaml::Value::Null,
        UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence(
            b.iter()
                .map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x)))
--- a/crates/nu-command/src/commands/uniq.rs
+++ b/crates/nu-command/src/commands/uniq.rs
@ -155,6 +155,14 @@ fn uniq(args: CommandArgs) -> Result<ActionStream, ShellError> {
                            item.0.tag.span,
                        ))
                    }
                    #[cfg(feature = "dataframe")]
                    UntaggedValue::Dataframe(_) => {
                        return Err(ShellError::labeled_error(
                            "uniq -c cannot operate on dataframes.",
                            "source",
                            item.0.tag.span,
                        ))
                    }
                    UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0,
                }
            };
--- a/crates/nu-data/Cargo.toml
+++ b/crates/nu-data/Cargo.toml
@ -43,3 +43,4 @@ users = "0.11.0"
 [features]
 directories = ["directories-next"]
 dirs = ["dirs-next"]
 dataframe = ["nu-protocol/dataframe"]
--- a/crates/nu-data/src/base/shape.rs
+++ b/crates/nu-data/src/base/shape.rs
@ -43,6 +43,10 @@ pub enum InlineShape {
    // TODO: Error type
    Error,
    // TODO: Dataframe type
    #[cfg(feature = "dataframe")]
    Dataframe,
    // Stream markers (used as bookend markers rather than actual values)
    BeginningOfStream,
    EndOfStream,
@ -123,6 +127,8 @@ impl InlineShape {
            UntaggedValue::Table(table) => InlineShape::from_table(table.iter()),
            UntaggedValue::Error(_) => InlineShape::Error,
            UntaggedValue::Block(_) => InlineShape::Block,
            #[cfg(feature = "dataframe")]
            UntaggedValue::Dataframe(_) => InlineShape::Dataframe,
        }
    }
@ -312,6 +318,8 @@ impl PrettyDebug for FormatInlineShape {
            .group(),
            InlineShape::Block => DbgDocBldr::opaque("block"),
            InlineShape::Error => DbgDocBldr::error("error"),
            #[cfg(feature = "dataframe")]
            InlineShape::Dataframe => DbgDocBldr::error("dataframe_pretty_FormatInlineShape"),
            InlineShape::BeginningOfStream => DbgDocBldr::blank(),
            InlineShape::EndOfStream => DbgDocBldr::blank(),
        }
--- a/crates/nu-data/src/config.rs
+++ b/crates/nu-data/src/config.rs
@ -118,6 +118,8 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
        UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?),
        UntaggedValue::Error(e) => return Err(e.clone()),
        UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
        #[cfg(feature = "dataframe")]
        UntaggedValue::Dataframe(_) => toml::Value::String("<Dataframe>".to_string()),
        UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
        UntaggedValue::Primitive(Primitive::Binary(b)) => {
            toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
--- a/crates/nu-protocol/Cargo.toml
+++ b/crates/nu-protocol/Cargo.toml
@ -25,9 +25,14 @@ num-traits = "0.2.14"
 serde = { version = "1.0", features = ["derive"] }
 serde_bytes = "0.11.5"
 polars = {version="0.13.1", git = "https://github.com/ritchie46/polars", rev = "3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0", optional = true}
 # implement conversions
 serde_json = "1.0"
 serde_yaml = "0.8.16"
 toml = "0.5.8"
 [features]
 dataframe = ["polars"]
 [build-dependencies]
--- a/crates/nu-protocol/src/dataframe/mod.rs
+++ b/crates/nu-protocol/src/dataframe/mod.rs
@ -0,0 +1,3 @@
 pub mod nu_dataframe;
 pub use nu_dataframe::NuDataFrame;
--- a/crates/nu-protocol/src/dataframe/nu_dataframe.rs
+++ b/crates/nu-protocol/src/dataframe/nu_dataframe.rs
@ -0,0 +1,432 @@
 use std::hash::{Hash, Hasher};
 use std::{cmp::Ordering, collections::hash_map::Entry, collections::HashMap};
 use bigdecimal::FromPrimitive;
 use chrono::{DateTime, FixedOffset, NaiveDateTime};
 use nu_errors::ShellError;
 use nu_source::Tag;
 use num_bigint::BigInt;
 use polars::prelude::{AnyValue, DataFrame, NamedFrom, Series, TimeUnit};
 use serde::de::{Deserialize, Deserializer, Visitor};
 use serde::Serialize;
 use std::fmt;
 use crate::{Dictionary, Primitive, UntaggedValue, Value};
 const SECS_PER_DAY: i64 = 86_400;
 #[derive(Debug)]
 enum InputValue {
    Integer,
    Decimal,
    String,
 }
 #[derive(Debug)]
 struct ColumnValues {
    pub value_type: InputValue,
    pub values: Vec<Value>,
 }
 impl Default for ColumnValues {
    fn default() -> Self {
        Self {
            value_type: InputValue::Integer,
            values: Vec::new(),
        }
    }
 }
 type ColumnMap = HashMap<String, ColumnValues>;
 // TODO. Using Option to help with deserialization. It will be better to find
 // a way to use serde with dataframes
 #[derive(Debug, Clone, Serialize)]
 pub struct NuDataFrame {
    #[serde(skip_serializing)]
    pub dataframe: Option<DataFrame>,
    pub name: String,
 }
 impl Default for NuDataFrame {
    fn default() -> Self {
        NuDataFrame {
            dataframe: None,
            name: String::from("From Stream"),
        }
    }
 }
 impl NuDataFrame {
    fn new() -> Self {
        Self::default()
    }
 }
 // TODO. Better definition of equality and comparison for a dataframe.
 // Probably it make sense to have a name field and use it for comparisons
 impl PartialEq for NuDataFrame {
    fn eq(&self, _: &Self) -> bool {
        false
    }
 }
 impl Eq for NuDataFrame {}
 impl PartialOrd for NuDataFrame {
    fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
        Some(Ordering::Equal)
    }
 }
 impl Ord for NuDataFrame {
    fn cmp(&self, _: &Self) -> Ordering {
        Ordering::Equal
    }
 }
 impl Hash for NuDataFrame {
    fn hash<H: Hasher>(&self, _: &mut H) {}
 }
 impl<'de> Visitor<'de> for NuDataFrame {
    type Value = Self;
    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        formatter.write_str("an integer between -2^31 and 2^31")
    }
 }
 impl<'de> Deserialize<'de> for NuDataFrame {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        deserializer.deserialize_i32(NuDataFrame::new())
    }
 }
 impl NuDataFrame {
    pub fn try_from_iter<T>(iter: T, tag: &Tag) -> Result<Self, ShellError>
    where
        T: Iterator<Item = Value>,
    {
        // Dictionary to store the columnar data extracted from
        // the input. During the iteration we will sort if the values
        // have different type
        let mut column_values: ColumnMap = HashMap::new();
        for value in iter {
            match value.value {
                UntaggedValue::Row(dictionary) => insert_row(&mut column_values, dictionary)?,
                UntaggedValue::Table(table) => insert_table(&mut column_values, table)?,
                _ => {
                    return Err(ShellError::labeled_error(
                        "Format not supported",
                        "Value not supported for conversion",
                        &value.tag,
                    ));
                }
            }
        }
        from_parsed_columns(column_values, tag)
    }
    // Print is made out a head and if the dataframe is too large, then a tail
    pub fn print(&self) -> Result<Vec<Value>, ShellError> {
        if let Some(df) = &self.dataframe {
            let size: usize = 5;
            let mut values = self.head(Some(size))?;
            if df.height() > size {
                add_separator(&mut values, df);
                let remaining = df.height() - size;
                let tail_size = remaining.min(size);
                let mut tail_values = self.tail(Some(tail_size))?;
                values.append(&mut tail_values);
            }
            Ok(values)
        } else {
            unreachable!()
        }
    }
    pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
        let to_row = rows.unwrap_or(5);
        let values = self.to_rows(0, to_row)?;
        Ok(values)
    }
    pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
        if let Some(df) = &self.dataframe {
            let to_row = df.height();
            let size = rows.unwrap_or(5);
            let from_row = to_row.saturating_sub(size);
            let values = self.to_rows(from_row, to_row)?;
            Ok(values)
        } else {
            unreachable!()
        }
    }
    pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
        if let Some(df) = &self.dataframe {
            let column_names = df.get_column_names();
            let mut values: Vec<Value> = Vec::new();
            let upper_row = to_row.min(df.height());
            for i in from_row..upper_row {
                let row = df.get_row(i);
                let mut dictionary_row = Dictionary::default();
                for (val, name) in row.0.iter().zip(column_names.iter()) {
                    let untagged_val = anyvalue_to_untagged(val)?;
                    let dict_val = Value {
                        value: untagged_val,
                        tag: Tag::unknown(),
                    };
                    dictionary_row.insert(name.to_string(), dict_val);
                }
                let value = Value {
                    value: UntaggedValue::Row(dictionary_row),
                    tag: Tag::unknown(),
                };
                values.push(value);
            }
            Ok(values)
        } else {
            unreachable!()
        }
    }
 }
 // Adds a separator to the vector of values using the column names from the
 // dataframe to create the Values Row
 fn add_separator(values: &mut Vec<Value>, df: &DataFrame) {
    let column_names = df.get_column_names();
    let mut dictionary = Dictionary::default();
    for name in column_names {
        let indicator = Value {
            value: UntaggedValue::Primitive(Primitive::String("...".to_string())),
            tag: Tag::unknown(),
        };
        dictionary.insert(name.to_string(), indicator);
    }
    let extra_column = Value {
        value: UntaggedValue::Row(dictionary),
        tag: Tag::unknown(),
    };
    values.push(extra_column);
 }
 // Converts a polars AnyValue to an UntaggedValue
 // This is used when printing values coming for polars dataframes
 fn anyvalue_to_untagged(anyvalue: &AnyValue) -> Result<UntaggedValue, ShellError> {
    Ok(match anyvalue {
        AnyValue::Null => UntaggedValue::Primitive(Primitive::Nothing),
        AnyValue::Utf8(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::Boolean(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::Float32(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::Float64(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::Int32(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::Int64(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::UInt8(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::UInt16(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::Int8(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::Int16(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::UInt32(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::UInt64(a) => UntaggedValue::Primitive((*a).into()),
        AnyValue::Date32(a) => {
            // elapsed time in day since 1970-01-01
            let seconds = *a as i64 * SECS_PER_DAY;
            let naive_datetime = NaiveDateTime::from_timestamp(seconds, 0);
            // Zero length offset
            let offset = FixedOffset::east(0);
            let datetime = DateTime::<FixedOffset>::from_utc(naive_datetime, offset);
            UntaggedValue::Primitive(Primitive::Date(datetime))
        }
        AnyValue::Date64(a) => {
            // elapsed time in milliseconds since 1970-01-01
            let seconds = *a / 1000;
            let naive_datetime = NaiveDateTime::from_timestamp(seconds, 0);
            // Zero length offset
            let offset = FixedOffset::east(0);
            let datetime = DateTime::<FixedOffset>::from_utc(naive_datetime, offset);
            UntaggedValue::Primitive(Primitive::Date(datetime))
        }
        AnyValue::Time64(a, _) => UntaggedValue::Primitive((*a).into()),
        AnyValue::Duration(a, unit) => {
            let nanoseconds = match unit {
                TimeUnit::Second => *a / 1_000_000_000,
                TimeUnit::Millisecond => *a / 1_000_000,
                TimeUnit::Microsecond => *a / 1_000,
                TimeUnit::Nanosecond => *a,
            };
            if let Some(bigint) = BigInt::from_i64(nanoseconds) {
                UntaggedValue::Primitive(Primitive::Duration(bigint))
            } else {
                unreachable!("Internal error: protocol did not use compatible decimal")
            }
        }
        AnyValue::List(_) => {
            return Err(ShellError::labeled_error(
                "Format not supported",
                "Value not supported for conversion",
                Tag::unknown(),
            ));
        }
    })
 }
 // Inserting the values found in a UntaggedValue::Row
 // All the entries for the dictionary are checked in order to check if
 // the column values have the same type value.
 fn insert_row(column_values: &mut ColumnMap, dictionary: Dictionary) -> Result<(), ShellError> {
    for (key, value) in dictionary.entries {
        insert_value(value, key, column_values)?;
    }
    Ok(())
 }
 // Inserting the values found in a UntaggedValue::Table
 // All the entries for the table are checked in order to check if
 // the column values have the same type value.
 // The names for the columns are the enumerated numbers from the values
 fn insert_table(column_values: &mut ColumnMap, table: Vec<Value>) -> Result<(), ShellError> {
    for (index, value) in table.into_iter().enumerate() {
        let key = format!("{}", index);
        insert_value(value, key, column_values)?;
    }
    Ok(())
 }
 fn insert_value(
    value: Value,
    key: String,
    column_values: &mut ColumnMap,
 ) -> Result<(), ShellError> {
    let col_val = match column_values.entry(key) {
        Entry::Vacant(entry) => entry.insert(ColumnValues::default()),
        Entry::Occupied(entry) => entry.into_mut(),
    };
    // Checking that the type for the value is the same
    // for the previous value in the column
    if col_val.values.is_empty() {
        match &value.value {
            UntaggedValue::Primitive(Primitive::Int(_)) => {
                col_val.value_type = InputValue::Integer;
            }
            UntaggedValue::Primitive(Primitive::Decimal(_)) => {
                col_val.value_type = InputValue::Decimal;
            }
            UntaggedValue::Primitive(Primitive::String(_)) => {
                col_val.value_type = InputValue::String;
            }
            _ => {
                return Err(ShellError::labeled_error(
                    "Only primitive values accepted",
                    "Not a primitive value",
                    &value.tag,
                ));
            }
        }
        col_val.values.push(value);
    } else {
        let prev_value = &col_val.values[col_val.values.len() - 1];
        match (&prev_value.value, &value.value) {
            (
                UntaggedValue::Primitive(Primitive::Int(_)),
                UntaggedValue::Primitive(Primitive::Int(_)),
            )
            | (
                UntaggedValue::Primitive(Primitive::Decimal(_)),
                UntaggedValue::Primitive(Primitive::Decimal(_)),
            )
            | (
                UntaggedValue::Primitive(Primitive::String(_)),
                UntaggedValue::Primitive(Primitive::String(_)),
            ) => col_val.values.push(value),
            _ => {
                return Err(ShellError::labeled_error(
                    "Different values in column",
                    "Value with different type",
                    &value.tag,
                ));
            }
        }
    }
    Ok(())
 }
 // The ColumnMap has the parsed data from the StreamInput
 // This data can be used to create a Series object that can initialize
 // the dataframe based on the type of data that is found
 fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFrame, ShellError> {
    let mut df_series: Vec<Series> = Vec::new();
    for (name, column) in column_values {
        match column.value_type {
            InputValue::Decimal => {
                let series_values: Result<Vec<_>, _> =
                    column.values.iter().map(|v| v.as_f64()).collect();
                let series = Series::new(&name, series_values?);
                df_series.push(series)
            }
            InputValue::Integer => {
                let series_values: Result<Vec<_>, _> =
                    column.values.iter().map(|v| v.as_f32()).collect();
                let series = Series::new(&name, series_values?);
                df_series.push(series)
            }
            InputValue::String => {
                let series_values: Result<Vec<_>, _> =
                    column.values.iter().map(|v| v.as_string()).collect();
                let series = Series::new(&name, series_values?);
                df_series.push(series)
            }
        }
    }
    let df = DataFrame::new(df_series);
    match df {
        Ok(df) => Ok(NuDataFrame {
            dataframe: Some(df),
            name: "From stream".to_string(),
        }),
        Err(e) => {
            return Err(ShellError::labeled_error(
                "Error while creating dataframe",
                format!("{}", e),
                tag,
            ))
        }
    }
 }
--- a/crates/nu-protocol/src/lib.rs
+++ b/crates/nu-protocol/src/lib.rs
@ -12,6 +12,9 @@ mod type_name;
 mod type_shape;
 pub mod value;
 #[cfg(feature = "dataframe")]
 pub mod dataframe;
 pub use crate::call_info::{CallInfo, EvaluatedArgs};
 pub use crate::config_path::ConfigPath;
 pub use crate::maybe_owned::MaybeOwned;
--- a/crates/nu-protocol/src/type_shape.rs
+++ b/crates/nu-protocol/src/type_shape.rs
@ -69,6 +69,10 @@ pub enum Type {
    BeginningOfStream,
    /// End of stream marker (used as bookend markers rather than actual values)
    EndOfStream,
    /// Dataframe
    #[cfg(feature = "dataframe")]
    Dataframe,
 }
 /// A shape representation of the type of a row
@ -183,6 +187,8 @@ impl Type {
            UntaggedValue::Table(table) => Type::from_table(table.iter()),
            UntaggedValue::Error(_) => Type::Error,
            UntaggedValue::Block(_) => Type::Block,
            #[cfg(feature = "dataframe")]
            UntaggedValue::Dataframe(_) => Type::Dataframe,
        }
    }
 }
@ -287,6 +293,8 @@ impl PrettyDebug for Type {
                    })
            }
            Type::Block => ty("block"),
            #[cfg(feature = "dataframe")]
            Type::Dataframe => ty("dataframe_pretty_debug_for_Type"),
        }
    }
 }
--- a/crates/nu-protocol/src/value.rs
+++ b/crates/nu-protocol/src/value.rs
@ -30,6 +30,9 @@ use std::hash::{Hash, Hasher};
 use std::path::PathBuf;
 use std::time::SystemTime;
 #[cfg(feature = "dataframe")]
 use crate::dataframe::NuDataFrame;
 /// The core structured values that flow through a pipeline
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
 pub enum UntaggedValue {
@ -47,6 +50,10 @@ pub enum UntaggedValue {
    /// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values
    Block(Box<hir::CapturedBlock>),
    /// NuDataframe
    #[cfg(feature = "dataframe")]
    Dataframe(NuDataFrame),
 }
 impl UntaggedValue {
@ -489,6 +496,22 @@ impl Value {
        }
    }
    /// View the Value as signed 32-bit float, if possible
    pub fn as_f32(&self) -> Result<f32, ShellError> {
        match &self.value {
            UntaggedValue::Primitive(primitive) => primitive.as_f32(self.tag.span),
            _ => Err(ShellError::type_error("integer", self.spanned_type_name())),
        }
    }
    /// View the Value as signed 64-bit float, if possible
    pub fn as_f64(&self) -> Result<f64, ShellError> {
        match &self.value {
            UntaggedValue::Primitive(primitive) => primitive.as_f64(self.tag.span),
            _ => Err(ShellError::type_error("integer", self.spanned_type_name())),
        }
    }
    /// View the Value as boolean, if possible
    pub fn as_bool(&self) -> Result<bool, ShellError> {
        match &self.value {
@ -634,6 +657,8 @@ impl ShellTypeName for UntaggedValue {
            UntaggedValue::Table(_) => "table",
            UntaggedValue::Error(_) => "error",
            UntaggedValue::Block(_) => "block",
            #[cfg(feature = "dataframe")]
            UntaggedValue::Dataframe(_) => "dataframe",
        }
    }
 }
--- a/crates/nu-protocol/src/value/debug.rs
+++ b/crates/nu-protocol/src/value/debug.rs
@ -24,6 +24,8 @@ impl PrettyDebug for Value {
            .nest(),
            UntaggedValue::Error(_) => DbgDocBldr::error("error"),
            UntaggedValue::Block(_) => DbgDocBldr::opaque("block"),
            #[cfg(feature = "dataframe")]
            UntaggedValue::Dataframe(_) => DbgDocBldr::opaque("dataframe_prettydebug_for_Value"),
        }
    }
 }
--- a/crates/nu-protocol/src/value/primitive.rs
+++ b/crates/nu-protocol/src/value/primitive.rs
@ -247,6 +247,29 @@ impl Primitive {
        }
    }
    pub fn as_f32(&self, span: Span) -> Result<f32, ShellError> {
        match self {
            Primitive::Int(int) => int.to_f32().ok_or_else(|| {
                ShellError::range_error(
                    ExpectedRange::F32,
                    &format!("{}", int).spanned(span),
                    "converting an integer into a signed 32-bit float",
                )
            }),
            Primitive::Decimal(decimal) => decimal.to_f32().ok_or_else(|| {
                ShellError::range_error(
                    ExpectedRange::F32,
                    &format!("{}", decimal).spanned(span),
                    "converting a decimal into a signed 32-bit float",
                )
            }),
            other => Err(ShellError::type_error(
                "number",
                other.type_name().spanned(span),
            )),
        }
    }
    // FIXME: This is a bad name, but no other way to differentiate with our own Duration.
    pub fn into_chrono_duration(self, span: Span) -> Result<chrono::Duration, ShellError> {
        match self {
@ -332,17 +355,35 @@ impl From<BigInt> for Primitive {
    }
 }
-impl From<f64> for Primitive {
+// Macro to define the From trait for native types to primitives
-    /// Helper to convert from 64-bit float to a Primitive value
+// The from trait requires a converter that will be applied to the
-    fn from(float: f64) -> Primitive {
+// native type.
-        if let Some(f) = BigDecimal::from_f64(float) {
+macro_rules! from_native_to_primitive {
-            Primitive::Decimal(f)
+    ($native_type:ty, $primitive_type:expr, $converter: expr) => {
-        } else {
+        // e.g. from u32 -> Primitive
-            unreachable!("Internal error: protocol did not use f64-compatible decimal")
+        impl From<$native_type> for Primitive {
            fn from(int: $native_type) -> Primitive {
                if let Some(i) = $converter(int) {
                    $primitive_type(i)
                } else {
                    unreachable!("Internal error: protocol did not use compatible decimal")
                }
            }
        }
-    }
+    };
 }
 from_native_to_primitive!(i8, Primitive::Int, BigInt::from_i8);
 from_native_to_primitive!(i16, Primitive::Int, BigInt::from_i16);
 from_native_to_primitive!(i32, Primitive::Int, BigInt::from_i32);
 from_native_to_primitive!(i64, Primitive::Int, BigInt::from_i64);
 from_native_to_primitive!(u8, Primitive::Int, BigInt::from_u8);
 from_native_to_primitive!(u16, Primitive::Int, BigInt::from_u16);
 from_native_to_primitive!(u32, Primitive::Int, BigInt::from_u32);
 from_native_to_primitive!(u64, Primitive::Int, BigInt::from_u64);
 from_native_to_primitive!(f32, Primitive::Decimal, BigDecimal::from_f32);
 from_native_to_primitive!(f64, Primitive::Decimal, BigDecimal::from_f64);
 impl From<chrono::Duration> for Primitive {
    fn from(duration: chrono::Duration) -> Primitive {
        // FIXME: This is a hack since chrono::Duration does not give access to its 'nanos' field.
--- a/crates/nu-value-ext/Cargo.toml
+++ b/crates/nu-value-ext/Cargo.toml
@ -17,3 +17,6 @@ nu-source = { path = "../nu-source", version = "0.31.0" }
 indexmap = { version = "1.6.1", features = ["serde-1"] }
 itertools = "0.10.0"
 num-traits = "0.2.14"
 [features]
 dataframe = ["nu-protocol/dataframe"]
--- a/crates/nu-value-ext/src/lib.rs
+++ b/crates/nu-value-ext/src/lib.rs
@ -724,6 +724,10 @@ pub fn get_data<'value>(value: &'value Value, desc: &str) -> MaybeOwned<'value,
        UntaggedValue::Block(_) | UntaggedValue::Table(_) | UntaggedValue::Error(_) => {
            MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
        }
        #[cfg(feature = "dataframe")]
        UntaggedValue::Dataframe(_) => {
            MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
        }
    }
 }
		`@ -0,0 +1,3 @@`
							`pub mod nu_dataframe;`

							`pub use nu_dataframe::NuDataFrame;`