Complete Dataframe MVP (#3373)

* Dataframe MVP

* Removed test csv file

* Dataframe MVP

* Removed test csv file

* New revision polars

* New revision polars

* csv file reader

* argument parser for file reader

* Parser from Row primitive

* Column conversion

* Added as f32 and f64

* Parsing row to dataframe

* Removed repeated push to vector

* Accept table values to create dataframe

* Removed default serde

* Dataframe to rows to show data

* Save name of file with dataframe

* Usage example

* Upgrade polars version

* Clippy changes

* Added print function with head and tail

* Move dataframe struct to folder

* Lock file after running tests and merge

* Optional feature for dataframe

* Removed dataframe from plugins

* Update primitive.rs

Co-authored-by: JT <jonathandturner@users.noreply.github.com>
This commit is contained in:
Fernando Herrera 2021-05-12 02:01:31 +01:00 committed by GitHub
parent e73491441a
commit c80a9585b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 1474 additions and 448 deletions

1180
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -158,6 +158,15 @@ zip-support = ["nu-cli/zip", "nu-command/zip"]
#This is disabled in extra for now #This is disabled in extra for now
table-pager = ["nu-command/table-pager"] table-pager = ["nu-command/table-pager"]
#dataframe feature for nushell
dataframe = [
"nu-protocol/dataframe",
"nu-command/dataframe",
"nu-value-ext/dataframe",
"nu-data/dataframe"
]
[profile.release] [profile.release]
#strip = "symbols" #Couldn't get working +nightly #strip = "symbols" #Couldn't get working +nightly
codegen-units = 1 #Reduce parallel codegen units codegen-units = 1 #Reduce parallel codegen units

View File

@ -99,6 +99,8 @@ uuid_crate = { package = "uuid", version = "0.8.2", features = ["v4"], optional
which = { version = "4.1.0", optional = true } which = { version = "4.1.0", optional = true }
zip = { version = "0.5.9", optional = true } zip = { version = "0.5.9", optional = true }
polars = {version="0.13.1", git = "https://github.com/ritchie46/polars", rev = "3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0", optional = true}
[target.'cfg(unix)'.dependencies] [target.'cfg(unix)'.dependencies]
umask = "1.0.0" umask = "1.0.0"
users = "0.11.0" users = "0.11.0"
@ -130,3 +132,4 @@ trash-support = ["trash"]
directories = ["directories-next"] directories = ["directories-next"]
dirs = ["dirs-next"] dirs = ["dirs-next"]
table-pager = ["minus", "crossterm"] table-pager = ["minus", "crossterm"]
dataframe = ["nu-protocol/dataframe", "polars"]

View File

@ -26,6 +26,8 @@ pub(crate) mod compact;
pub(crate) mod config; pub(crate) mod config;
pub(crate) mod constants; pub(crate) mod constants;
pub(crate) mod cp; pub(crate) mod cp;
#[cfg(feature = "dataframe")]
pub(crate) mod dataframe;
pub(crate) mod date; pub(crate) mod date;
pub(crate) mod debug; pub(crate) mod debug;
pub(crate) mod def; pub(crate) mod def;
@ -184,6 +186,8 @@ pub(crate) use clear::Clear;
pub(crate) mod touch; pub(crate) mod touch;
pub(crate) use all::Command as All; pub(crate) use all::Command as All;
pub(crate) use any::Command as Any; pub(crate) use any::Command as Any;
#[cfg(feature = "dataframe")]
pub(crate) use dataframe::Dataframe;
pub(crate) use enter::Enter; pub(crate) use enter::Enter;
pub(crate) use every::Every; pub(crate) use every::Every;
pub(crate) use exec::Exec; pub(crate) use exec::Exec;

View File

@ -89,7 +89,7 @@ fn all(args: CommandArgs) -> Result<OutputStream, ShellError> {
UntaggedValue::boolean(true).into_value(&tag), UntaggedValue::boolean(true).into_value(&tag),
)); ));
// Variables in nu are immutable. Having the same variable accross invocations // Variables in nu are immutable. Having the same variable across invocations
// of evaluate_baseline_expr does not mutate the variables and those each // of evaluate_baseline_expr does not mutate the variables and those each
// invocations are independent of each other! // invocations are independent of each other!
scope.enter_scope(); scope.enter_scope();

View File

@ -228,6 +228,20 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
out!("{:?}", row); out!("{:?}", row);
} }
} }
#[cfg(feature = "dataframe")]
Value {
value: UntaggedValue::Dataframe(df),
..
} => {
if let Some(table) = table {
// TODO. Configure the parameter rows from file. It can be
// adjusted to see a certain amount of values in the head
let command_args =
create_default_command_args(&context, df.print()?.into(), tag);
let result = table.run(command_args)?;
let _ = result.collect::<Vec<_>>();
}
}
Value { Value {
value: UntaggedValue::Primitive(Primitive::Nothing), value: UntaggedValue::Primitive(Primitive::Nothing),
.. ..

View File

@ -0,0 +1,139 @@
use std::path::PathBuf;
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, hir::NamedValue, Signature, SyntaxShape, UntaggedValue};
use nu_source::Tagged;
use polars::prelude::{CsvReader, SerReader};
pub struct Dataframe;
#[derive(Deserialize)]
pub struct OpenArgs {
file: Tagged<PathBuf>,
}
impl WholeStreamCommand for Dataframe {
fn name(&self) -> &str {
"dataframe"
}
fn usage(&self) -> &str {
"Creates a dataframe from a csv file"
}
fn signature(&self) -> Signature {
Signature::build("dataframe").named(
"file",
SyntaxShape::FilePath,
"the file path to load values from",
Some('f'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
load_dataframe(args)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes a file name and creates a dataframe",
example: "dataframe -f test.csv",
result: None,
},
Example {
description: "Takes an input stream and converts it to a dataframe",
example: "echo [[a b];[1 2] [3 4]] | dataframe",
result: None,
},
]
}
}
// Creates a dataframe from either a file or a table.
// If both options are found, then an error is returned to the user.
// The InputStream can have a table and a dictionary as input variable.
fn load_dataframe(args: CommandArgs) -> Result<OutputStream, ShellError> {
// The file has priority over stream input
if let Some(NamedValue::Value(_, _)) = args
.call_info()
.args
.named
.as_ref()
.map(|named| named.named.get("file"))
.flatten()
{
return create_from_file(args);
}
create_from_input(args)
}
fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
// Command Tag. This marks where the command is located and the name
// of the command used
let tag = args.call_info.name_tag.clone();
// Parsing the arguments that the function uses
let (OpenArgs { file }, _) = args.process()?;
// Needs more detail and arguments while loading the dataframe
// Options:
// - has header
// - infer schema
// - delimiter
// - csv or parquet <- extracted from extension
let csv_reader = match CsvReader::from_path(&file.item) {
Ok(csv_reader) => csv_reader,
Err(e) => {
return Err(ShellError::labeled_error(
"Unable to parse file",
format!("{}", e),
&file.tag,
))
}
};
let df = match csv_reader.infer_schema(None).has_header(true).finish() {
Ok(csv_reader) => csv_reader,
Err(e) => {
return Err(ShellError::labeled_error(
"Error while parsing dataframe",
format!("{}", e),
&file.tag,
))
}
};
let file_name = match file.item.into_os_string().into_string() {
Ok(name) => name,
Err(e) => {
return Err(ShellError::labeled_error(
"Error with file name",
format!("{:?}", e),
&file.tag,
))
}
};
let nu_dataframe = NuDataFrame {
dataframe: Some(df),
name: file_name,
};
let init = InputStream::one(UntaggedValue::Dataframe(nu_dataframe).into_value(&tag));
Ok(init.to_output_stream())
}
fn create_from_input(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let args = args.evaluate_once()?;
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag));
Ok(init.to_output_stream())
}

View File

@ -251,6 +251,8 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(Seq), whole_stream_command(Seq),
whole_stream_command(SeqDates), whole_stream_command(SeqDates),
whole_stream_command(TermSize), whole_stream_command(TermSize),
#[cfg(feature = "dataframe")]
whole_stream_command(Dataframe),
]); ]);
#[cfg(feature = "clipboard-cli")] #[cfg(feature = "clipboard-cli")]

View File

@ -114,6 +114,8 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => { UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => {
serde_json::Value::Null serde_json::Value::Null
} }
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => serde_json::Value::Null,
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array( UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array(
b.iter() b.iter()
.map(|x| { .map(|x| {

View File

@ -75,6 +75,8 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?), UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?),
UntaggedValue::Error(e) => return Err(e.clone()), UntaggedValue::Error(e) => return Err(e.clone()),
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()), UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => toml::Value::String("<Dataframe>".to_string()),
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()), UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
UntaggedValue::Primitive(Primitive::Binary(b)) => { UntaggedValue::Primitive(Primitive::Binary(b)) => {
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect()) toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())

View File

@ -95,6 +95,8 @@ pub fn value_to_yaml_value(v: &Value) -> Result<serde_yaml::Value, ShellError> {
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => { UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => {
serde_yaml::Value::Null serde_yaml::Value::Null
} }
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => serde_yaml::Value::Null,
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence( UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence(
b.iter() b.iter()
.map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x))) .map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x)))

View File

@ -155,6 +155,14 @@ fn uniq(args: CommandArgs) -> Result<ActionStream, ShellError> {
item.0.tag.span, item.0.tag.span,
)) ))
} }
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => {
return Err(ShellError::labeled_error(
"uniq -c cannot operate on dataframes.",
"source",
item.0.tag.span,
))
}
UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0, UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0,
} }
}; };

View File

@ -43,3 +43,4 @@ users = "0.11.0"
[features] [features]
directories = ["directories-next"] directories = ["directories-next"]
dirs = ["dirs-next"] dirs = ["dirs-next"]
dataframe = ["nu-protocol/dataframe"]

View File

@ -43,6 +43,10 @@ pub enum InlineShape {
// TODO: Error type // TODO: Error type
Error, Error,
// TODO: Dataframe type
#[cfg(feature = "dataframe")]
Dataframe,
// Stream markers (used as bookend markers rather than actual values) // Stream markers (used as bookend markers rather than actual values)
BeginningOfStream, BeginningOfStream,
EndOfStream, EndOfStream,
@ -123,6 +127,8 @@ impl InlineShape {
UntaggedValue::Table(table) => InlineShape::from_table(table.iter()), UntaggedValue::Table(table) => InlineShape::from_table(table.iter()),
UntaggedValue::Error(_) => InlineShape::Error, UntaggedValue::Error(_) => InlineShape::Error,
UntaggedValue::Block(_) => InlineShape::Block, UntaggedValue::Block(_) => InlineShape::Block,
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => InlineShape::Dataframe,
} }
} }
@ -312,6 +318,8 @@ impl PrettyDebug for FormatInlineShape {
.group(), .group(),
InlineShape::Block => DbgDocBldr::opaque("block"), InlineShape::Block => DbgDocBldr::opaque("block"),
InlineShape::Error => DbgDocBldr::error("error"), InlineShape::Error => DbgDocBldr::error("error"),
#[cfg(feature = "dataframe")]
InlineShape::Dataframe => DbgDocBldr::error("dataframe_pretty_FormatInlineShape"),
InlineShape::BeginningOfStream => DbgDocBldr::blank(), InlineShape::BeginningOfStream => DbgDocBldr::blank(),
InlineShape::EndOfStream => DbgDocBldr::blank(), InlineShape::EndOfStream => DbgDocBldr::blank(),
} }

View File

@ -118,6 +118,8 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?), UntaggedValue::Table(l) => toml::Value::Array(collect_values(l)?),
UntaggedValue::Error(e) => return Err(e.clone()), UntaggedValue::Error(e) => return Err(e.clone()),
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()), UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => toml::Value::String("<Dataframe>".to_string()),
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()), UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
UntaggedValue::Primitive(Primitive::Binary(b)) => { UntaggedValue::Primitive(Primitive::Binary(b)) => {
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect()) toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())

View File

@ -25,9 +25,14 @@ num-traits = "0.2.14"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_bytes = "0.11.5" serde_bytes = "0.11.5"
polars = {version="0.13.1", git = "https://github.com/ritchie46/polars", rev = "3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0", optional = true}
# implement conversions # implement conversions
serde_json = "1.0" serde_json = "1.0"
serde_yaml = "0.8.16" serde_yaml = "0.8.16"
toml = "0.5.8" toml = "0.5.8"
[features]
dataframe = ["polars"]
[build-dependencies] [build-dependencies]

View File

@ -0,0 +1,3 @@
pub mod nu_dataframe;
pub use nu_dataframe::NuDataFrame;

View File

@ -0,0 +1,432 @@
use std::hash::{Hash, Hasher};
use std::{cmp::Ordering, collections::hash_map::Entry, collections::HashMap};
use bigdecimal::FromPrimitive;
use chrono::{DateTime, FixedOffset, NaiveDateTime};
use nu_errors::ShellError;
use nu_source::Tag;
use num_bigint::BigInt;
use polars::prelude::{AnyValue, DataFrame, NamedFrom, Series, TimeUnit};
use serde::de::{Deserialize, Deserializer, Visitor};
use serde::Serialize;
use std::fmt;
use crate::{Dictionary, Primitive, UntaggedValue, Value};
const SECS_PER_DAY: i64 = 86_400;
#[derive(Debug)]
enum InputValue {
Integer,
Decimal,
String,
}
#[derive(Debug)]
struct ColumnValues {
pub value_type: InputValue,
pub values: Vec<Value>,
}
impl Default for ColumnValues {
fn default() -> Self {
Self {
value_type: InputValue::Integer,
values: Vec::new(),
}
}
}
type ColumnMap = HashMap<String, ColumnValues>;
// TODO. Using Option to help with deserialization. It will be better to find
// a way to use serde with dataframes
#[derive(Debug, Clone, Serialize)]
pub struct NuDataFrame {
#[serde(skip_serializing)]
pub dataframe: Option<DataFrame>,
pub name: String,
}
impl Default for NuDataFrame {
fn default() -> Self {
NuDataFrame {
dataframe: None,
name: String::from("From Stream"),
}
}
}
impl NuDataFrame {
fn new() -> Self {
Self::default()
}
}
// TODO. Better definition of equality and comparison for a dataframe.
// Probably it make sense to have a name field and use it for comparisons
impl PartialEq for NuDataFrame {
fn eq(&self, _: &Self) -> bool {
false
}
}
impl Eq for NuDataFrame {}
impl PartialOrd for NuDataFrame {
fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
Some(Ordering::Equal)
}
}
impl Ord for NuDataFrame {
fn cmp(&self, _: &Self) -> Ordering {
Ordering::Equal
}
}
impl Hash for NuDataFrame {
fn hash<H: Hasher>(&self, _: &mut H) {}
}
impl<'de> Visitor<'de> for NuDataFrame {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an integer between -2^31 and 2^31")
}
}
impl<'de> Deserialize<'de> for NuDataFrame {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_i32(NuDataFrame::new())
}
}
impl NuDataFrame {
pub fn try_from_iter<T>(iter: T, tag: &Tag) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
// Dictionary to store the columnar data extracted from
// the input. During the iteration we will sort if the values
// have different type
let mut column_values: ColumnMap = HashMap::new();
for value in iter {
match value.value {
UntaggedValue::Row(dictionary) => insert_row(&mut column_values, dictionary)?,
UntaggedValue::Table(table) => insert_table(&mut column_values, table)?,
_ => {
return Err(ShellError::labeled_error(
"Format not supported",
"Value not supported for conversion",
&value.tag,
));
}
}
}
from_parsed_columns(column_values, tag)
}
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
if let Some(df) = &self.dataframe {
let size: usize = 5;
let mut values = self.head(Some(size))?;
if df.height() > size {
add_separator(&mut values, df);
let remaining = df.height() - size;
let tail_size = remaining.min(size);
let mut tail_values = self.tail(Some(tail_size))?;
values.append(&mut tail_values);
}
Ok(values)
} else {
unreachable!()
}
}
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let to_row = rows.unwrap_or(5);
let values = self.to_rows(0, to_row)?;
Ok(values)
}
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
if let Some(df) = &self.dataframe {
let to_row = df.height();
let size = rows.unwrap_or(5);
let from_row = to_row.saturating_sub(size);
let values = self.to_rows(from_row, to_row)?;
Ok(values)
} else {
unreachable!()
}
}
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
if let Some(df) = &self.dataframe {
let column_names = df.get_column_names();
let mut values: Vec<Value> = Vec::new();
let upper_row = to_row.min(df.height());
for i in from_row..upper_row {
let row = df.get_row(i);
let mut dictionary_row = Dictionary::default();
for (val, name) in row.0.iter().zip(column_names.iter()) {
let untagged_val = anyvalue_to_untagged(val)?;
let dict_val = Value {
value: untagged_val,
tag: Tag::unknown(),
};
dictionary_row.insert(name.to_string(), dict_val);
}
let value = Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
};
values.push(value);
}
Ok(values)
} else {
unreachable!()
}
}
}
// Adds a separator to the vector of values using the column names from the
// dataframe to create the Values Row
fn add_separator(values: &mut Vec<Value>, df: &DataFrame) {
let column_names = df.get_column_names();
let mut dictionary = Dictionary::default();
for name in column_names {
let indicator = Value {
value: UntaggedValue::Primitive(Primitive::String("...".to_string())),
tag: Tag::unknown(),
};
dictionary.insert(name.to_string(), indicator);
}
let extra_column = Value {
value: UntaggedValue::Row(dictionary),
tag: Tag::unknown(),
};
values.push(extra_column);
}
// Converts a polars AnyValue to an UntaggedValue
// This is used when printing values coming for polars dataframes
fn anyvalue_to_untagged(anyvalue: &AnyValue) -> Result<UntaggedValue, ShellError> {
Ok(match anyvalue {
AnyValue::Null => UntaggedValue::Primitive(Primitive::Nothing),
AnyValue::Utf8(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Boolean(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Float32(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Float64(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Int32(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Int64(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::UInt8(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::UInt16(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Int8(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Int16(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::UInt32(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::UInt64(a) => UntaggedValue::Primitive((*a).into()),
AnyValue::Date32(a) => {
// elapsed time in day since 1970-01-01
let seconds = *a as i64 * SECS_PER_DAY;
let naive_datetime = NaiveDateTime::from_timestamp(seconds, 0);
// Zero length offset
let offset = FixedOffset::east(0);
let datetime = DateTime::<FixedOffset>::from_utc(naive_datetime, offset);
UntaggedValue::Primitive(Primitive::Date(datetime))
}
AnyValue::Date64(a) => {
// elapsed time in milliseconds since 1970-01-01
let seconds = *a / 1000;
let naive_datetime = NaiveDateTime::from_timestamp(seconds, 0);
// Zero length offset
let offset = FixedOffset::east(0);
let datetime = DateTime::<FixedOffset>::from_utc(naive_datetime, offset);
UntaggedValue::Primitive(Primitive::Date(datetime))
}
AnyValue::Time64(a, _) => UntaggedValue::Primitive((*a).into()),
AnyValue::Duration(a, unit) => {
let nanoseconds = match unit {
TimeUnit::Second => *a / 1_000_000_000,
TimeUnit::Millisecond => *a / 1_000_000,
TimeUnit::Microsecond => *a / 1_000,
TimeUnit::Nanosecond => *a,
};
if let Some(bigint) = BigInt::from_i64(nanoseconds) {
UntaggedValue::Primitive(Primitive::Duration(bigint))
} else {
unreachable!("Internal error: protocol did not use compatible decimal")
}
}
AnyValue::List(_) => {
return Err(ShellError::labeled_error(
"Format not supported",
"Value not supported for conversion",
Tag::unknown(),
));
}
})
}
// Inserting the values found in a UntaggedValue::Row
// All the entries for the dictionary are checked in order to check if
// the column values have the same type value.
fn insert_row(column_values: &mut ColumnMap, dictionary: Dictionary) -> Result<(), ShellError> {
for (key, value) in dictionary.entries {
insert_value(value, key, column_values)?;
}
Ok(())
}
// Inserting the values found in a UntaggedValue::Table
// All the entries for the table are checked in order to check if
// the column values have the same type value.
// The names for the columns are the enumerated numbers from the values
fn insert_table(column_values: &mut ColumnMap, table: Vec<Value>) -> Result<(), ShellError> {
for (index, value) in table.into_iter().enumerate() {
let key = format!("{}", index);
insert_value(value, key, column_values)?;
}
Ok(())
}
fn insert_value(
value: Value,
key: String,
column_values: &mut ColumnMap,
) -> Result<(), ShellError> {
let col_val = match column_values.entry(key) {
Entry::Vacant(entry) => entry.insert(ColumnValues::default()),
Entry::Occupied(entry) => entry.into_mut(),
};
// Checking that the type for the value is the same
// for the previous value in the column
if col_val.values.is_empty() {
match &value.value {
UntaggedValue::Primitive(Primitive::Int(_)) => {
col_val.value_type = InputValue::Integer;
}
UntaggedValue::Primitive(Primitive::Decimal(_)) => {
col_val.value_type = InputValue::Decimal;
}
UntaggedValue::Primitive(Primitive::String(_)) => {
col_val.value_type = InputValue::String;
}
_ => {
return Err(ShellError::labeled_error(
"Only primitive values accepted",
"Not a primitive value",
&value.tag,
));
}
}
col_val.values.push(value);
} else {
let prev_value = &col_val.values[col_val.values.len() - 1];
match (&prev_value.value, &value.value) {
(
UntaggedValue::Primitive(Primitive::Int(_)),
UntaggedValue::Primitive(Primitive::Int(_)),
)
| (
UntaggedValue::Primitive(Primitive::Decimal(_)),
UntaggedValue::Primitive(Primitive::Decimal(_)),
)
| (
UntaggedValue::Primitive(Primitive::String(_)),
UntaggedValue::Primitive(Primitive::String(_)),
) => col_val.values.push(value),
_ => {
return Err(ShellError::labeled_error(
"Different values in column",
"Value with different type",
&value.tag,
));
}
}
}
Ok(())
}
// The ColumnMap has the parsed data from the StreamInput
// This data can be used to create a Series object that can initialize
// the dataframe based on the type of data that is found
fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFrame, ShellError> {
let mut df_series: Vec<Series> = Vec::new();
for (name, column) in column_values {
match column.value_type {
InputValue::Decimal => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputValue::Integer => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f32()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputValue::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
}
}
let df = DataFrame::new(df_series);
match df {
Ok(df) => Ok(NuDataFrame {
dataframe: Some(df),
name: "From stream".to_string(),
}),
Err(e) => {
return Err(ShellError::labeled_error(
"Error while creating dataframe",
format!("{}", e),
tag,
))
}
}
}

View File

@ -12,6 +12,9 @@ mod type_name;
mod type_shape; mod type_shape;
pub mod value; pub mod value;
#[cfg(feature = "dataframe")]
pub mod dataframe;
pub use crate::call_info::{CallInfo, EvaluatedArgs}; pub use crate::call_info::{CallInfo, EvaluatedArgs};
pub use crate::config_path::ConfigPath; pub use crate::config_path::ConfigPath;
pub use crate::maybe_owned::MaybeOwned; pub use crate::maybe_owned::MaybeOwned;

View File

@ -69,6 +69,10 @@ pub enum Type {
BeginningOfStream, BeginningOfStream,
/// End of stream marker (used as bookend markers rather than actual values) /// End of stream marker (used as bookend markers rather than actual values)
EndOfStream, EndOfStream,
/// Dataframe
#[cfg(feature = "dataframe")]
Dataframe,
} }
/// A shape representation of the type of a row /// A shape representation of the type of a row
@ -183,6 +187,8 @@ impl Type {
UntaggedValue::Table(table) => Type::from_table(table.iter()), UntaggedValue::Table(table) => Type::from_table(table.iter()),
UntaggedValue::Error(_) => Type::Error, UntaggedValue::Error(_) => Type::Error,
UntaggedValue::Block(_) => Type::Block, UntaggedValue::Block(_) => Type::Block,
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => Type::Dataframe,
} }
} }
} }
@ -287,6 +293,8 @@ impl PrettyDebug for Type {
}) })
} }
Type::Block => ty("block"), Type::Block => ty("block"),
#[cfg(feature = "dataframe")]
Type::Dataframe => ty("dataframe_pretty_debug_for_Type"),
} }
} }
} }

View File

@ -30,6 +30,9 @@ use std::hash::{Hash, Hasher};
use std::path::PathBuf; use std::path::PathBuf;
use std::time::SystemTime; use std::time::SystemTime;
#[cfg(feature = "dataframe")]
use crate::dataframe::NuDataFrame;
/// The core structured values that flow through a pipeline /// The core structured values that flow through a pipeline
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
pub enum UntaggedValue { pub enum UntaggedValue {
@ -47,6 +50,10 @@ pub enum UntaggedValue {
/// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values /// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values
Block(Box<hir::CapturedBlock>), Block(Box<hir::CapturedBlock>),
/// NuDataframe
#[cfg(feature = "dataframe")]
Dataframe(NuDataFrame),
} }
impl UntaggedValue { impl UntaggedValue {
@ -489,6 +496,22 @@ impl Value {
} }
} }
/// View the Value as signed 32-bit float, if possible
pub fn as_f32(&self) -> Result<f32, ShellError> {
match &self.value {
UntaggedValue::Primitive(primitive) => primitive.as_f32(self.tag.span),
_ => Err(ShellError::type_error("integer", self.spanned_type_name())),
}
}
/// View the Value as signed 64-bit float, if possible
pub fn as_f64(&self) -> Result<f64, ShellError> {
match &self.value {
UntaggedValue::Primitive(primitive) => primitive.as_f64(self.tag.span),
_ => Err(ShellError::type_error("integer", self.spanned_type_name())),
}
}
/// View the Value as boolean, if possible /// View the Value as boolean, if possible
pub fn as_bool(&self) -> Result<bool, ShellError> { pub fn as_bool(&self) -> Result<bool, ShellError> {
match &self.value { match &self.value {
@ -634,6 +657,8 @@ impl ShellTypeName for UntaggedValue {
UntaggedValue::Table(_) => "table", UntaggedValue::Table(_) => "table",
UntaggedValue::Error(_) => "error", UntaggedValue::Error(_) => "error",
UntaggedValue::Block(_) => "block", UntaggedValue::Block(_) => "block",
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => "dataframe",
} }
} }
} }

View File

@ -24,6 +24,8 @@ impl PrettyDebug for Value {
.nest(), .nest(),
UntaggedValue::Error(_) => DbgDocBldr::error("error"), UntaggedValue::Error(_) => DbgDocBldr::error("error"),
UntaggedValue::Block(_) => DbgDocBldr::opaque("block"), UntaggedValue::Block(_) => DbgDocBldr::opaque("block"),
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => DbgDocBldr::opaque("dataframe_prettydebug_for_Value"),
} }
} }
} }

View File

@ -247,6 +247,29 @@ impl Primitive {
} }
} }
pub fn as_f32(&self, span: Span) -> Result<f32, ShellError> {
match self {
Primitive::Int(int) => int.to_f32().ok_or_else(|| {
ShellError::range_error(
ExpectedRange::F32,
&format!("{}", int).spanned(span),
"converting an integer into a signed 32-bit float",
)
}),
Primitive::Decimal(decimal) => decimal.to_f32().ok_or_else(|| {
ShellError::range_error(
ExpectedRange::F32,
&format!("{}", decimal).spanned(span),
"converting a decimal into a signed 32-bit float",
)
}),
other => Err(ShellError::type_error(
"number",
other.type_name().spanned(span),
)),
}
}
// FIXME: This is a bad name, but no other way to differentiate with our own Duration. // FIXME: This is a bad name, but no other way to differentiate with our own Duration.
pub fn into_chrono_duration(self, span: Span) -> Result<chrono::Duration, ShellError> { pub fn into_chrono_duration(self, span: Span) -> Result<chrono::Duration, ShellError> {
match self { match self {
@ -332,16 +355,34 @@ impl From<BigInt> for Primitive {
} }
} }
impl From<f64> for Primitive { // Macro to define the From trait for native types to primitives
/// Helper to convert from 64-bit float to a Primitive value // The from trait requires a converter that will be applied to the
fn from(float: f64) -> Primitive { // native type.
if let Some(f) = BigDecimal::from_f64(float) { macro_rules! from_native_to_primitive {
Primitive::Decimal(f) ($native_type:ty, $primitive_type:expr, $converter: expr) => {
// e.g. from u32 -> Primitive
impl From<$native_type> for Primitive {
fn from(int: $native_type) -> Primitive {
if let Some(i) = $converter(int) {
$primitive_type(i)
} else { } else {
unreachable!("Internal error: protocol did not use f64-compatible decimal") unreachable!("Internal error: protocol did not use compatible decimal")
} }
} }
} }
};
}
from_native_to_primitive!(i8, Primitive::Int, BigInt::from_i8);
from_native_to_primitive!(i16, Primitive::Int, BigInt::from_i16);
from_native_to_primitive!(i32, Primitive::Int, BigInt::from_i32);
from_native_to_primitive!(i64, Primitive::Int, BigInt::from_i64);
from_native_to_primitive!(u8, Primitive::Int, BigInt::from_u8);
from_native_to_primitive!(u16, Primitive::Int, BigInt::from_u16);
from_native_to_primitive!(u32, Primitive::Int, BigInt::from_u32);
from_native_to_primitive!(u64, Primitive::Int, BigInt::from_u64);
from_native_to_primitive!(f32, Primitive::Decimal, BigDecimal::from_f32);
from_native_to_primitive!(f64, Primitive::Decimal, BigDecimal::from_f64);
impl From<chrono::Duration> for Primitive { impl From<chrono::Duration> for Primitive {
fn from(duration: chrono::Duration) -> Primitive { fn from(duration: chrono::Duration) -> Primitive {

View File

@ -17,3 +17,6 @@ nu-source = { path = "../nu-source", version = "0.31.0" }
indexmap = { version = "1.6.1", features = ["serde-1"] } indexmap = { version = "1.6.1", features = ["serde-1"] }
itertools = "0.10.0" itertools = "0.10.0"
num-traits = "0.2.14" num-traits = "0.2.14"
[features]
dataframe = ["nu-protocol/dataframe"]

View File

@ -724,6 +724,10 @@ pub fn get_data<'value>(value: &'value Value, desc: &str) -> MaybeOwned<'value,
UntaggedValue::Block(_) | UntaggedValue::Table(_) | UntaggedValue::Error(_) => { UntaggedValue::Block(_) | UntaggedValue::Table(_) | UntaggedValue::Error(_) => {
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value()) MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
} }
#[cfg(feature = "dataframe")]
UntaggedValue::Dataframe(_) => {
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
}
} }
} }