Complete Dataframe MVP (#3373)

* Dataframe MVP

* Removed test csv file

* Dataframe MVP

* Removed test csv file

* New revision polars

* New revision polars

* csv file reader

* argument parser for file reader

* Parser from Row primitive

* Column conversion

* Added as f32 and f64

* Parsing row to dataframe

* Removed repeated push to vector

* Accept table values to create dataframe

* Removed default serde

* Dataframe to rows to show data

* Save name of file with dataframe

* Usage example

* Upgrade polars version

* Clippy changes

* Added print function with head and tail

* Move dataframe struct to folder

* Lock file after running tests and merge

* Optional feature for dataframe

* Removed dataframe from plugins

* Update primitive.rs

Co-authored-by: JT <jonathandturner@users.noreply.github.com>
This commit is contained in:
Fernando Herrera
2021-05-12 02:01:31 +01:00
committed by GitHub
parent e73491441a
commit c80a9585b0
25 changed files with 1474 additions and 448 deletions

1180
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -158,6 +158,15 @@ zip-support = ["nu-cli/zip", "nu-command/zip"]
#This is disabled in extra for now #This is disabled in extra for now
table-pager = ["nu-command/table-pager"] table-pager = ["nu-command/table-pager"]
#dataframe feature for nushell
dataframe = [
"nu-protocol/dataframe",
"nu-command/dataframe",
"nu-value-ext/dataframe",
"nu-data/dataframe"
]
[profile.release] [profile.release]
#strip = "symbols" #Couldn't get working +nightly #strip = "symbols" #Couldn't get working +nightly
codegen-units = 1 #Reduce parallel codegen units codegen-units = 1 #Reduce parallel codegen units

View File

@ -99,6 +99,8 @@ uuid_crate = { package = "uuid", version = "0.8.2", features = ["v4"], optional
which = { version = "4.1.0", optional = true } which = { version = "4.1.0", optional = true }
zip = { version = "0.5.9", optional = true } zip = { version = "0.5.9", optional = true }
polars = {version="0.13.1", git = "https://github.com/ritchie46/polars", rev = "3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0", optional = true}
[target.'cfg(unix)'.dependencies] [target.'cfg(unix)'.dependencies]
umask = "1.0.0" umask = "1.0.0"
users = "0.11.0" users = "0.11.0"
@ -130,3 +132,4 @@ trash-support = ["trash"]
directories = ["directories-next"] directories = ["directories-next"]
dirs = ["dirs-next"] dirs = ["dirs-next"]
table-pager = ["minus", "crossterm"] table-pager = ["minus", "crossterm"]
dataframe = ["nu-protocol/dataframe", "polars"]

View File

@ -26,6 +26,8 @@ pub(crate) mod compact;
pub(crate) mod config; pub(crate) mod config;
pub(crate) mod constants; pub(crate) mod constants;
pub(crate) mod cp; pub(crate) mod cp;
#[cfg(feature = "dataframe")]
pub(crate) mod dataframe;
pub(crate) mod date; pub(crate) mod date;
pub(crate) mod debug; pub(crate) mod debug;
pub(crate) mod def; pub(crate) mod def;
@ -184,6 +186,8 @@ pub(crate) use clear::Clear;
pub(crate) mod touch; pub(crate) mod touch;
pub(crate) use all::Command as All; pub(crate) use all::Command as All;
pub(crate) use any::Command as Any; pub(crate) use any::Command as Any;
#[cfg(feature = "dataframe")]
pub(crate) use dataframe::Dataframe;
pub(crate) use enter::Enter; pub(crate) use enter::Enter;
pub(crate) use every::Every; pub(crate) use every::Every;
pub(crate) use exec::Exec; pub(crate) use exec::Exec;

View File

@ -89,7 +89,7 @@ fn all(args: CommandArgs) -> Result<OutputStream, ShellError> {
UntaggedValue::boolean(true).into_value(&tag), UntaggedValue::boolean(true).into_value(&tag),
)); ));
// Variables in nu are immutable. Having the same variable accross invocations // Variables in nu are immutable. Having the same variable across invocations
// of evaluate_baseline_expr does not mutate the variables and those each // of evaluate_baseline_expr does not mutate the variables and those each
// invocations are independent of each other! // invocations are independent of each other!
scope.enter_scope(); scope.enter_scope();

View File

@ -228,6 +228,20 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
out!("{:?}", row); out!("{:?}", row);
} }
} }
#[cfg(feature = "dataframe")]
Value {
value: UntaggedValue::Dataframe(df),
..
} => {
if let Some(table) = table {
// TODO. Configure the parameter rows from file. It can be
// adjusted to see a certain amount of values in the head
let command_args =
create_default_command_args(&context, df.print()?.into(), tag);
let result = table.run(command_args)?;
let _ = result.collect::<Vec<_>>();
}
}
Value { Value {
value: UntaggedValue::Primitive(Primitive::Nothing), value: UntaggedValue::Primitive(Primitive::Nothing),
.. ..

View File

@ -0,0 +1,139 @@
use std::path::PathBuf;
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, hir::NamedValue, Signature, SyntaxShape, UntaggedValue};
use nu_source::Tagged;
use polars::prelude::{CsvReader, SerReader};
pub struct Dataframe;
#[derive(Deserialize)]
pub struct OpenArgs {
file: Tagged<PathBuf>,
}
impl WholeStreamCommand for Dataframe {
fn name(&self) -> &str {
"dataframe"
}
fn usage(&self) -> &str {
"Creates a dataframe from a csv file"
}
fn signature(&self) -> Signature {
Signature::build("dataframe").named(
"file",
SyntaxShape::FilePath,
"the file path to load values from",
Some('f'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
load_dataframe(args)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes a file name and creates a dataframe",
example: "dataframe -f test.csv",
result: None,
},
Example {
description: "Takes an input stream and converts it to a dataframe",
example: "echo [[a b];[1 2] [3 4]] | dataframe",
result: None,
},
]
}
}
// Creates a dataframe from either a file or a table.
// If both options are found, then an error is returned to the user.
// The InputStream can have a table and a dictionary as input variable.
fn load_dataframe(args: CommandArgs) -> Result<OutputStream, ShellError> {
// The file has priority over stream input
if let Some(NamedValue::Value(_, _)) = args
.call_info()
.args
.named
.as_ref()
.map(|named| named.named.get("file"))
.flatten()
{
return create_from_file(args);
}
create_from_input(args)
}
fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
// Command Tag. This marks where the command is located and the name
// of the command used
let tag = args.call_info.name_tag.clone();
// Parsing the arguments that the function uses
let (OpenArgs { file }, _) = args.process()?;
// Needs more detail and arguments while loading the dataframe
// Options:
// - has header
// - infer schema
// - delimiter
// - csv or parquet <- extracted from extension
let csv_reader = match CsvReader::from_path(&file.item) {
Ok(csv_reader) => csv_reader,
Err(e) => {
return Err(ShellError::labeled_error(
"Unable to parse file",
format!("{}", e),
&file.tag,
))
}
};
let df = match csv_reader.infer_schema(None).has_header(true).finish() {
Ok(csv_reader) => csv_reader,
Err(e) => {
return Err(ShellError::labeled_error(
"Error while parsing dataframe",
format!("{}", e),
&file.tag,
))
}
};
let file_name = match file.item.into_os_string().into_string() {
Ok(name) => name,
Err(e) => {
return Err(ShellError::labeled_error(
"Error with file name",
format!("{:?}", e),
&file.tag,
))
}
};
let nu_dataframe = NuDataFrame {
dataframe: Some(df),
name: file_name,
};
let init = InputStream::one(UntaggedValue::Dataframe(nu_dataframe).into_value(&tag));
Ok(init.to_output_stream())
}
fn create_from_input(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let args = args.evaluate_once()?;
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag));
Ok(init.to_output_stream())
}

View File

@ -251,6 +251,8 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(Seq), whole_stream_command(Seq),
whole_stream_command(SeqDates), whole_stream_command(SeqDates),
whole_stream_command(TermSize), whole_stream_command(TermSize),
#[cfg(feature = "dataframe")]
whole_stream_command(Dataframe),
]); ]);
#[cfg(feature = "clipboard-cli")] #[cfg(feature = "clipboard-cli")]

View File

@ -114,6 +114,8 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => { UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => {
serde_json::Value::Null serde_json::Value::Null
} }
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => serde_json::Value::Null,
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array( UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array(
b.iter() b.iter()
.map(|x| { .map(|x| {

View File

@ -75,6 +75,8 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?), UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?),
UntaggedValue::Error(e) => return Err(e.clone()), UntaggedValue::Error(e) => return Err(e.clone()),
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()), UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => toml::Value::String("<Dataframe>".to_string()),
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()), UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
UntaggedValue::Primitive(Primitive::Binary(b)) => { UntaggedValue::Primitive(Primitive::Binary(b)) => {
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect()) toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())

View File

@ -95,6 +95,8 @@ pub fn value_to_yaml_value(v: &Value) -> Result<serde_yaml::Value, ShellError> {
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => { UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => {
serde_yaml::Value::Null serde_yaml::Value::Null
} }
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => serde_yaml::Value::Null,
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence( UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence(
b.iter() b.iter()
.map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x))) .map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x)))

View File

@ -155,6 +155,14 @@ fn uniq(args: CommandArgs) -> Result<ActionStream, ShellError> {
item.0.tag.span, item.0.tag.span,
)) ))
} }
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => {
return Err(ShellError::labeled_error(
"uniq -c cannot operate on dataframes.",
"source",
item.0.tag.span,
))
}
UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0, UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0,
} }
}; };

View File

@ -43,3 +43,4 @@ users = "0.11.0"
[features] [features]
directories = ["directories-next"] directories = ["directories-next"]
dirs = ["dirs-next"] dirs = ["dirs-next"]
dataframe = ["nu-protocol/dataframe"]

View File

@ -43,6 +43,10 @@ pub enum InlineShape {
// TODO: Error type // TODO: Error type
Error, Error,
// TODO: Dataframe type
#[cfg(feature = "dataframe")]
Dataframe,
// Stream markers (used as bookend markers rather than actual values) // Stream markers (used as bookend markers rather than actual values)
BeginningOfStream, BeginningOfStream,
EndOfStream, EndOfStream,
@ -123,6 +127,8 @@ impl InlineShape {
UntaggedValue::Table(table) => InlineShape::from_table(table.iter()), UntaggedValue::Table(table) => InlineShape::from_table(table.iter()),
UntaggedValue::Error(_) => InlineShape::Error, UntaggedValue::Error(_) => InlineShape::Error,
UntaggedValue::Block(_) => InlineShape::Block, UntaggedValue::Block(_) => InlineShape::Block,
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => InlineShape::Dataframe,
} }
} }
@ -312,6 +318,8 @@ impl PrettyDebug for FormatInlineShape {
.group(), .group(),
InlineShape::Block => DbgDocBldr::opaque("block"), InlineShape::Block => DbgDocBldr::opaque("block"),
InlineShape::Error => DbgDocBldr::error("error"), InlineShape::Error => DbgDocBldr::error("error"),
#[cfg(feature = "dataframe")]
InlineShape::Dataframe => DbgDocBldr::error("dataframe_pretty_FormatInlineShape"),
InlineShape::BeginningOfStream => DbgDocBldr::blank(), InlineShape::BeginningOfStream => DbgDocBldr::blank(),
InlineShape::EndOfStream => DbgDocBldr::blank(), InlineShape::EndOfStream => DbgDocBldr::blank(),
} }

View File

@ -118,6 +118,8 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?), UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?),
UntaggedValue::Error(e) => return Err(e.clone()), UntaggedValue::Error(e) => return Err(e.clone()),
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()), UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => toml::Value::String("<Dataframe>".to_string()),
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()), UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
UntaggedValue::Primitive(Primitive::Binary(b)) => { UntaggedValue::Primitive(Primitive::Binary(b)) => {
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect()) toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())

View File

@ -25,9 +25,14 @@ num-traits = "0.2.14"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_bytes = "0.11.5" serde_bytes = "0.11.5"
polars = {version="0.13.1", git = "https://github.com/ritchie46/polars", rev = "3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0", optional = true}
# implement conversions # implement conversions
serde_json = "1.0" serde_json = "1.0"
serde_yaml = "0.8.16" serde_yaml = "0.8.16"
toml = "0.5.8" toml = "0.5.8"
[features]
dataframe = ["polars"]
[build-dependencies] [build-dependencies]

View File

@ -0,0 +1,3 @@
pub mod nu_dataframe;
pub use nu_dataframe::NuDataFrame;

View File

@ -0,0 +1,432 @@
use std::hash::{Hash, Hasher};
use std::{cmp::Ordering, collections::hash_map::Entry, collections::HashMap};
use bigdecimal::FromPrimitive;
use chrono::{DateTime, FixedOffset, NaiveDateTime};
use nu_errors::ShellError;
use nu_source::Tag;
use num_bigint::BigInt;
use polars::prelude::{AnyValue, DataFrame, NamedFrom, Series, TimeUnit};
use serde::de::{Deserialize, Deserializer, Visitor};
use serde::Serialize;
use std::fmt;
use crate::{Dictionary, Primitive, UntaggedValue, Value};
const SECS_PER_DAY: i64 = 86_400;
#[derive(Debug)]
enum InputValue {
Integer,
Decimal,
String,
}
#[derive(Debug)]
struct ColumnValues {
pub value_type: InputValue,
pub values: Vec<Value>,
}
impl Default for ColumnValues {
fn default() -> Self {
Self {
value_type: InputValue::Integer,
values: Vec::new(),
}
}
}
type ColumnMap = HashMap<String, ColumnValues>;
// TODO. Using Option to help with deserialization. It will be better to find
// a way to use serde with dataframes
#[derive(Debug, Clone, Serialize)]
pub struct NuDataFrame {
#[serde(skip_serializing)]
pub dataframe: Option<DataFrame>,
pub name: String,
}
impl Default for NuDataFrame {
fn default() -> Self {
NuDataFrame {
dataframe: None,
name: String::from("From Stream"),
}
}
}
impl NuDataFrame {
fn new() -> Self {
Self::default()
}
}
// TODO. Better definition of equality and comparison for a dataframe.
// Probably it make sense to have a name field and use it for comparisons
impl PartialEq for NuDataFrame {
fn eq(&self, _: &Self) -> bool {
false
}
}
impl Eq for NuDataFrame {}
impl PartialOrd for NuDataFrame {
fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
Some(Ordering::Equal)
}
}
impl Ord for NuDataFrame {
fn cmp(&self, _: &Self) -> Ordering {
Ordering::Equal
}
}
impl Hash for NuDataFrame {
fn hash<H: Hasher>(&self, _: &mut H) {}
}
impl<'de> Visitor<'de> for NuDataFrame {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an integer between -2^31 and 2^31")
}
}
impl<'de> Deserialize<'de> for NuDataFrame {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_i32(NuDataFrame::new())
}
}
impl NuDataFrame {
pub fn try_from_iter<T>(iter: T, tag: &Tag) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
// Dictionary to store the columnar data extracted from
// the input. During the iteration we will sort if the values
// have different type
let mut column_values: ColumnMap = HashMap::new();
for value in iter {
match value.value {
UntaggedValue::Row(dictionary) => insert_row(&mut column_values, dictionary)?,
UntaggedValue::Table(table) => insert_table(&mut column_values, table)?,
_ => {
return Err(ShellError::labeled_error(
"Format not supported",
"Value not supported for conversion",
&value.tag,
));
}
}
}
from_parsed_columns(column_values, tag)
}
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
if let Some(df) = &self.dataframe {
let size: usize = 5;
let mut values = self.head(Some(size))?;
if df.height() > size {
add_separator(&mut values, df);
let remaining = df.height() - size;
let tail_size = remaining.min(size);
let mut tail_values = self.tail(Some(tail_size))?;
values.append(&mut tail_values);
}
Ok(values)
} else {
unreachable!()
}
}
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let to_row = rows.unwrap_or(5);
let values = self.to_rows(0, to_row)?;
Ok(values)
}
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
if let Some(df) = &self.dataframe {
let to_row = df.height();
let size = rows.unwrap_or(5);
let from_row = to_row.saturating_sub(size);
let values = self.to_rows(from_row, to_row)?;
Ok(values)
} else {
unreachable!()
}
}
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
if let Some(df) = &self.dataframe {
let column_names = df.get_column_names();
let mut values: Vec<Value> = Vec::new();
let upper_row = to_row.min(df.height());
for i in from_row..upper_row {
let row = df.get_row(i);
let mut dictionary_row = Dictionary::default();
for (val, name) in row.0.iter().zip(column_names.iter()) {
let untagged_val = anyvalue_to_untagged(val)?;
let dict_val = Value {
value: untagged_val,
tag: Tag::unknown(),
};
dictionary_row.insert(name.to_string(), dict_val);
}
let value = Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
};
values.push(value);
}
Ok(values)
} else {
unreachable!()
}
}
}
// Adds a separator to the vector of values using the column names from the
// dataframe to create the Values Row
fn add_separator(values: &mut Vec<Value>, df: &DataFrame) {
let column_names = df.get_column_names();
let mut dictionary = Dictionary::default();
for name in column_names {
let indicator = Value {
value: UntaggedValue::Primitive(Primitive::String("...".to_string())),
tag: Tag::unknown(),
};
dictionary.insert(name.to_string(), indicator);
}
let extra_column = Value {
value: UntaggedValue::Row(dictionary),
tag: Tag::unknown(),
};
values.push(extra_column);
}
// Converts a polars AnyValue to an UntaggedValue
// This is used when printing values coming for polars dataframes
fn anyvalue_to_untagged(anyvalue: &AnyValue) -> Result<UntaggedValue, ShellError> {
Ok(match anyvalue {
AnyValue::Null => UntaggedValue::Primitive(Primitive::Nothing),
AnyValue::Utf8(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Boolean(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Float32(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Float64(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Int32(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Int64(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::UInt8(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::UInt16(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Int8(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Int16(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::UInt32(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::UInt64(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Date32(a) => {
// elapsed time in day since 1970-01-01
let seconds = *a as i64 * SECS_PER_DAY;
let naive_datetime = NaiveDateTime::from_timestamp(seconds, 0);
// Zero length offset
let offset = FixedOffset::east(0);
let datetime = DateTime::<FixedOffset>::from_utc(naive_datetime, offset);
UntaggedValue::Primitive(Primitive::Date(datetime))
}
AnyValue::Date64(a) => {
// elapsed time in milliseconds since 1970-01-01
let seconds = *a / 1000;
let naive_datetime = NaiveDateTime::from_timestamp(seconds, 0);
// Zero length offset
let offset = FixedOffset::east(0);
let datetime = DateTime::<FixedOffset>::from_utc(naive_datetime, offset);
UntaggedValue::Primitive(Primitive::Date(datetime))
}
AnyValue::Time64(a, _) => UntaggedValue::Primitive((*a).into()),
AnyValue::Duration(a, unit) => {
let nanoseconds = match unit {
TimeUnit::Second => *a / 1_000_000_000,
TimeUnit::Millisecond => *a / 1_000_000,
TimeUnit::Microsecond => *a / 1_000,
TimeUnit::Nanosecond => *a,
};
if let Some(bigint) = BigInt::from_i64(nanoseconds) {
UntaggedValue::Primitive(Primitive::Duration(bigint))
} else {
unreachable!("Internal error: protocol did not use compatible decimal")
}
}
AnyValue::List(_) => {
return Err(ShellError::labeled_error(
"Format not supported",
"Value not supported for conversion",
Tag::unknown(),
));
}
})
}
// Inserting the values found in a UntaggedValue::Row
// All the entries for the dictionary are checked in order to check if
// the column values have the same type value.
fn insert_row(column_values: &mut ColumnMap, dictionary: Dictionary) -> Result<(), ShellError> {
for (key, value) in dictionary.entries {
insert_value(value, key, column_values)?;
}
Ok(())
}
// Inserting the values found in a UntaggedValue::Table
// All the entries for the table are checked in order to check if
// the column values have the same type value.
// The names for the columns are the enumerated numbers from the values
fn insert_table(column_values: &mut ColumnMap, table: Vec<Value>) -> Result<(), ShellError> {
for (index, value) in table.into_iter().enumerate() {
let key = format!("{}", index);
insert_value(value, key, column_values)?;
}
Ok(())
}
fn insert_value(
value: Value,
key: String,
column_values: &mut ColumnMap,
) -> Result<(), ShellError> {
let col_val = match column_values.entry(key) {
Entry::Vacant(entry) => entry.insert(ColumnValues::default()),
Entry::Occupied(entry) => entry.into_mut(),
};
// Checking that the type for the value is the same
// for the previous value in the column
if col_val.values.is_empty() {
match &value.value {
UntaggedValue::Primitive(Primitive::Int(_)) => {
col_val.value_type = InputValue::Integer;
}
UntaggedValue::Primitive(Primitive::Decimal(_)) => {
col_val.value_type = InputValue::Decimal;
}
UntaggedValue::Primitive(Primitive::String(_)) => {
col_val.value_type = InputValue::String;
}
_ => {
return Err(ShellError::labeled_error(
"Only primitive values accepted",
"Not a primitive value",
&value.tag,
));
}
}
col_val.values.push(value);
} else {
let prev_value = &col_val.values[col_val.values.len() - 1];
match (&prev_value.value, &value.value) {
(
UntaggedValue::Primitive(Primitive::Int(_)),
UntaggedValue::Primitive(Primitive::Int(_)),
)
| (
UntaggedValue::Primitive(Primitive::Decimal(_)),
UntaggedValue::Primitive(Primitive::Decimal(_)),
)
| (
UntaggedValue::Primitive(Primitive::String(_)),
UntaggedValue::Primitive(Primitive::String(_)),
) => col_val.values.push(value),
_ => {
return Err(ShellError::labeled_error(
"Different values in column",
"Value with different type",
&value.tag,
));
}
}
}
Ok(())
}
// The ColumnMap has the parsed data from the StreamInput
// This data can be used to create a Series object that can initialize
// the dataframe based on the type of data that is found
fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFrame, ShellError> {
let mut df_series: Vec<Series> = Vec::new();
for (name, column) in column_values {
match column.value_type {
InputValue::Decimal => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputValue::Integer => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f32()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputValue::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
}
}
let df = DataFrame::new(df_series);
match df {
Ok(df) => Ok(NuDataFrame {
dataframe: Some(df),
name: "From stream".to_string(),
}),
Err(e) => {
return Err(ShellError::labeled_error(
"Error while creating dataframe",
format!("{}", e),
tag,
))
}
}
}

View File

@ -12,6 +12,9 @@ mod type_name;
mod type_shape; mod type_shape;
pub mod value; pub mod value;
#[cfg(feature = "dataframe")]
pub mod dataframe;
pub use crate::call_info::{CallInfo, EvaluatedArgs}; pub use crate::call_info::{CallInfo, EvaluatedArgs};
pub use crate::config_path::ConfigPath; pub use crate::config_path::ConfigPath;
pub use crate::maybe_owned::MaybeOwned; pub use crate::maybe_owned::MaybeOwned;

View File

@ -69,6 +69,10 @@ pub enum Type {
BeginningOfStream, BeginningOfStream,
/// End of stream marker (used as bookend markers rather than actual values) /// End of stream marker (used as bookend markers rather than actual values)
EndOfStream, EndOfStream,
/// Dataframe
#[cfg(feature = "dataframe")]
Dataframe,
} }
/// A shape representation of the type of a row /// A shape representation of the type of a row
@ -183,6 +187,8 @@ impl Type {
UntaggedValue::Table(table) => Type::from_table(table.iter()), UntaggedValue::Table(table) => Type::from_table(table.iter()),
UntaggedValue::Error(_) => Type::Error, UntaggedValue::Error(_) => Type::Error,
UntaggedValue::Block(_) => Type::Block, UntaggedValue::Block(_) => Type::Block,
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => Type::Dataframe,
} }
} }
} }
@ -287,6 +293,8 @@ impl PrettyDebug for Type {
}) })
} }
Type::Block => ty("block"), Type::Block => ty("block"),
#[cfg(feature = "dataframe")]
Type::Dataframe => ty("dataframe_pretty_debug_for_Type"),
} }
} }
} }

View File

@ -30,6 +30,9 @@ use std::hash::{Hash, Hasher};
use std::path::PathBuf; use std::path::PathBuf;
use std::time::SystemTime; use std::time::SystemTime;
#[cfg(feature = "dataframe")]
use crate::dataframe::NuDataFrame;
/// The core structured values that flow through a pipeline /// The core structured values that flow through a pipeline
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
pub enum UntaggedValue { pub enum UntaggedValue {
@ -47,6 +50,10 @@ pub enum UntaggedValue {
/// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values /// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values
Block(Box<hir::CapturedBlock>), Block(Box<hir::CapturedBlock>),
/// NuDataframe
#[cfg(feature = "dataframe")]
Dataframe(NuDataFrame),
} }
impl UntaggedValue { impl UntaggedValue {
@ -489,6 +496,22 @@ impl Value {
} }
} }
/// View the Value as signed 32-bit float, if possible
pub fn as_f32(&self) -> Result<f32, ShellError> {
match &self.value {
UntaggedValue::Primitive(primitive) => primitive.as_f32(self.tag.span),
_ => Err(ShellError::type_error("integer", self.spanned_type_name())),
}
}
/// View the Value as signed 64-bit float, if possible
pub fn as_f64(&self) -> Result<f64, ShellError> {
match &self.value {
UntaggedValue::Primitive(primitive) => primitive.as_f64(self.tag.span),
_ => Err(ShellError::type_error("integer", self.spanned_type_name())),
}
}
/// View the Value as boolean, if possible /// View the Value as boolean, if possible
pub fn as_bool(&self) -> Result<bool, ShellError> { pub fn as_bool(&self) -> Result<bool, ShellError> {
match &self.value { match &self.value {
@ -634,6 +657,8 @@ impl ShellTypeName for UntaggedValue {
UntaggedValue::Table(_) => "table", UntaggedValue::Table(_) => "table",
UntaggedValue::Error(_) => "error", UntaggedValue::Error(_) => "error",
UntaggedValue::Block(_) => "block", UntaggedValue::Block(_) => "block",
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => "dataframe",
} }
} }
} }

View File

@ -24,6 +24,8 @@ impl PrettyDebug for Value {
.nest(), .nest(),
UntaggedValue::Error(_) => DbgDocBldr::error("error"), UntaggedValue::Error(_) => DbgDocBldr::error("error"),
UntaggedValue::Block(_) => DbgDocBldr::opaque("block"), UntaggedValue::Block(_) => DbgDocBldr::opaque("block"),
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => DbgDocBldr::opaque("dataframe_prettydebug_for_Value"),
} }
} }
} }

View File

@ -247,6 +247,29 @@ impl Primitive {
} }
} }
pub fn as_f32(&self, span: Span) -> Result<f32, ShellError> {
match self {
Primitive::Int(int) => int.to_f32().ok_or_else(|| {
ShellError::range_error(
ExpectedRange::F32,
&format!("{}", int).spanned(span),
"converting an integer into a signed 32-bit float",
)
}),
Primitive::Decimal(decimal) => decimal.to_f32().ok_or_else(|| {
ShellError::range_error(
ExpectedRange::F32,
&format!("{}", decimal).spanned(span),
"converting a decimal into a signed 32-bit float",
)
}),
other => Err(ShellError::type_error(
"number",
other.type_name().spanned(span),
)),
}
}
// FIXME: This is a bad name, but no other way to differentiate with our own Duration. // FIXME: This is a bad name, but no other way to differentiate with our own Duration.
pub fn into_chrono_duration(self, span: Span) -> Result<chrono::Duration, ShellError> { pub fn into_chrono_duration(self, span: Span) -> Result<chrono::Duration, ShellError> {
match self { match self {
@ -332,17 +355,35 @@ impl From<BigInt> for Primitive {
} }
} }
impl From<f64> for Primitive { // Macro to define the From trait for native types to primitives
/// Helper to convert from 64-bit float to a Primitive value // The from trait requires a converter that will be applied to the
fn from(float: f64) -> Primitive { // native type.
if let Some(f) = BigDecimal::from_f64(float) { macro_rules! from_native_to_primitive {
Primitive::Decimal(f) ($native_type:ty, $primitive_type:expr, $converter: expr) => {
} else { // e.g. from u32 -> Primitive
unreachable!("Internal error: protocol did not use f64-compatible decimal") impl From<$native_type> for Primitive {
fn from(int: $native_type) -> Primitive {
if let Some(i) = $converter(int) {
$primitive_type(i)
} else {
unreachable!("Internal error: protocol did not use compatible decimal")
}
}
} }
} };
} }
from_native_to_primitive!(i8, Primitive::Int, BigInt::from_i8);
from_native_to_primitive!(i16, Primitive::Int, BigInt::from_i16);
from_native_to_primitive!(i32, Primitive::Int, BigInt::from_i32);
from_native_to_primitive!(i64, Primitive::Int, BigInt::from_i64);
from_native_to_primitive!(u8, Primitive::Int, BigInt::from_u8);
from_native_to_primitive!(u16, Primitive::Int, BigInt::from_u16);
from_native_to_primitive!(u32, Primitive::Int, BigInt::from_u32);
from_native_to_primitive!(u64, Primitive::Int, BigInt::from_u64);
from_native_to_primitive!(f32, Primitive::Decimal, BigDecimal::from_f32);
from_native_to_primitive!(f64, Primitive::Decimal, BigDecimal::from_f64);
impl From<chrono::Duration> for Primitive { impl From<chrono::Duration> for Primitive {
fn from(duration: chrono::Duration) -> Primitive { fn from(duration: chrono::Duration) -> Primitive {
// FIXME: This is a hack since chrono::Duration does not give access to its 'nanos' field. // FIXME: This is a hack since chrono::Duration does not give access to its 'nanos' field.

View File

@ -17,3 +17,6 @@ nu-source = { path = "../nu-source", version = "0.31.0" }
indexmap = { version = "1.6.1", features = ["serde-1"] } indexmap = { version = "1.6.1", features = ["serde-1"] }
itertools = "0.10.0" itertools = "0.10.0"
num-traits = "0.2.14" num-traits = "0.2.14"
[features]
dataframe = ["nu-protocol/dataframe"]

View File

@ -724,6 +724,10 @@ pub fn get_data<'value>(value: &'value Value, desc: &str) -> MaybeOwned<'value,
UntaggedValue::Block(_) | UntaggedValue::Table(_) | UntaggedValue::Error(_) => { UntaggedValue::Block(_) | UntaggedValue::Table(_) | UntaggedValue::Error(_) => {
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value()) MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
} }
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => {
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
}
} }
} }