All is a DataFrame (#3812)

* nuframe in its own type in UntaggedValue

* Removed eager dataframe from enum

* Dataframe created from list of values

* Corrected order in dataframe columns

* Returned tag from stream collection

* Removed series from dataframe commands

* Arithmetic operators

* forced push

* forced push

* Replace all command

* String commands

* appending operations with dfs

* Testing suite for dataframes

* Unit test for dataframe commands

* improved equality for dataframes
This commit is contained in:
Fernando Herrera
2021-07-25 11:01:54 +01:00
committed by GitHub
parent 9120a64cfb
commit f1ee9113ac
83 changed files with 3293 additions and 1422 deletions

View File

@ -1,15 +1,11 @@
pub mod nu_dataframe;
pub mod nu_groupby;
pub mod nu_series;
pub use nu_dataframe::NuDataFrame;
pub use nu_dataframe::{Column, NuDataFrame};
pub use nu_groupby::NuGroupBy;
pub use nu_series::NuSeries;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
pub enum PolarsData {
EagerDataFrame(NuDataFrame),
pub enum FrameStruct {
GroupBy(NuGroupBy),
Series(NuSeries),
}

View File

@ -1,54 +1,147 @@
use indexmap::{map::Entry, IndexMap};
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::{cmp::Ordering, collections::hash_map::Entry, collections::HashMap};
use std::ops::{Deref, DerefMut};
use bigdecimal::FromPrimitive;
use chrono::{DateTime, FixedOffset, NaiveDateTime};
use nu_errors::ShellError;
use nu_source::{Span, Tag};
use num_bigint::BigInt;
use polars::prelude::{AnyValue, DataFrame, NamedFrom, Series, TimeUnit};
use polars::prelude::{AnyValue, DataFrame, DataType, NamedFrom, Series, TimeUnit};
use serde::{Deserialize, Serialize};
use crate::{Dictionary, Primitive, UntaggedValue, Value};
use super::PolarsData;
const SECS_PER_DAY: i64 = 86_400;
#[derive(Debug)]
enum InputValue {
Integer,
Decimal,
String,
pub struct Column {
name: String,
values: Vec<Value>,
}
impl Column {
pub fn new(name: String, values: Vec<Value>) -> Self {
Self { name, values }
}
pub fn new_empty(name: String) -> Self {
Self {
name,
values: Vec::new(),
}
}
pub fn push(&mut self, value: Value) {
self.values.push(value)
}
}
#[derive(Debug)]
struct ColumnValues {
pub value_type: InputValue,
pub values: Vec<Value>,
enum InputType {
Integer,
Decimal,
String,
Boolean,
}
impl Default for ColumnValues {
fn default() -> Self {
#[derive(Debug)]
struct TypedColumn {
pub column: Column,
pub column_type: Option<InputType>,
}
impl TypedColumn {
fn new_empty(name: String) -> Self {
Self {
value_type: InputValue::Integer,
values: Vec::new(),
column: Column::new_empty(name),
column_type: None,
}
}
}
type ColumnMap = HashMap<String, ColumnValues>;
impl Deref for TypedColumn {
type Target = Column;
fn deref(&self) -> &Self::Target {
&self.column
}
}
impl DerefMut for TypedColumn {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.column
}
}
type ColumnMap = IndexMap<String, TypedColumn>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NuDataFrame {
dataframe: DataFrame,
}
// TODO. Better definition of equality and comparison for a dataframe.
// Probably it make sense to have a name field and use it for comparisons
// Dataframes are considered equal if they have the same shape, column name
// and values
impl PartialEq for NuDataFrame {
fn eq(&self, _: &Self) -> bool {
false
fn eq(&self, other: &Self) -> bool {
if self.as_ref().width() == 0 {
// checking for empty dataframe
return false;
}
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
// checking both dataframes share the same names
return false;
}
if self.as_ref().height() != other.as_ref().height() {
// checking both dataframes have the same row size
return false;
}
// sorting dataframe by the first column
let column_names = self.as_ref().get_column_names();
let first_col = column_names
.get(0)
.expect("already checked that dataframe is different than 0");
// if unable to sort, then unable to compare
let lhs = match self.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return false,
};
let rhs = match other.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return false,
};
for name in self.as_ref().get_column_names() {
let self_series = lhs.column(name).expect("name from dataframe names");
let other_series = rhs
.column(name)
.expect("already checked that name in other");
let self_series = match self_series.dtype() {
// Casting needed to compare other numeric types with nushell numeric type.
// In nushell we only have i64 integer numeric types and any array created
// with nushell untagged primitives will be of type i64
DataType::UInt32 => match self_series.cast_with_dtype(&DataType::Int64) {
Ok(series) => series,
Err(_) => return false,
},
_ => self_series.clone(),
};
if !self_series.series_equal(&other_series) {
return false;
}
}
true
}
}
@ -87,14 +180,14 @@ impl NuDataFrame {
NuDataFrame { dataframe }
}
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuDataFrame, ShellError>
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<(Self, Tag), ShellError>
where
T: Iterator<Item = Value>,
{
input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df),
UntaggedValue::DataFrame(df) => Some((df, value.tag)),
_ => None,
})
.ok_or_else(|| {
@ -113,41 +206,127 @@ impl NuDataFrame {
// Dictionary to store the columnar data extracted from
// the input. During the iteration we check if the values
// have different type
let mut column_values: ColumnMap = HashMap::new();
let mut column_values: ColumnMap = IndexMap::new();
for value in iter {
match value.value {
UntaggedValue::Row(dictionary) => insert_row(&mut column_values, dictionary)?,
UntaggedValue::Table(table) => insert_table(&mut column_values, table)?,
UntaggedValue::Primitive(Primitive::Int(_))
| UntaggedValue::Primitive(Primitive::Decimal(_))
| UntaggedValue::Primitive(Primitive::String(_))
| UntaggedValue::Primitive(Primitive::Boolean(_)) => {
let key = format!("{}", 0);
insert_value(value, key, &mut column_values)?
}
_ => {
return Err(ShellError::labeled_error_with_secondary(
"Format not supported",
"Value not supported for conversion",
&value.tag,
"Perhaps you want to use a List of Tables or a Dictionary",
"Perhaps you want to use a List, a List of Tables or a Dictionary",
&value.tag,
));
}
}
}
from_parsed_columns(column_values, tag)
from_parsed_columns(column_values, &tag.span)
}
pub fn try_from_series(columns: Vec<Series>, span: &Span) -> Result<Self, ShellError> {
let dataframe = DataFrame::new(columns).map_err(|e| {
ShellError::labeled_error(
"DataFrame Creation",
format!("Unable to create DataFrame: {}", e),
span,
)
})?;
Ok(Self { dataframe })
}
pub fn try_from_columns(columns: Vec<Column>, span: &Span) -> Result<Self, ShellError> {
let mut column_values: ColumnMap = IndexMap::new();
for column in columns {
for value in column.values {
insert_value(value, column.name.clone(), &mut column_values)?;
}
}
from_parsed_columns(column_values, span)
}
pub fn into_value(self, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(self)),
value: Self::into_untagged(self),
tag,
}
}
pub fn into_untagged(self) -> UntaggedValue {
UntaggedValue::DataFrame(self)
}
pub fn dataframe_to_value(df: DataFrame, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(df))),
value: Self::dataframe_to_untagged(df),
tag,
}
}
pub fn dataframe_to_untagged(df: DataFrame) -> UntaggedValue {
UntaggedValue::DataFrame(Self::new(df))
}
pub fn series_to_untagged(series: Series, span: &Span) -> UntaggedValue {
match DataFrame::new(vec![series]) {
Ok(dataframe) => UntaggedValue::DataFrame(Self { dataframe }),
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
"DataFrame Creation",
format!("Unable to create DataFrame: {}", e),
span,
)),
}
}
pub fn column(&self, column: &str, tag: &Tag) -> Result<Self, ShellError> {
let s = self.as_ref().column(column).map_err(|e| {
ShellError::labeled_error("Column not found", format!("{}", e), tag.span)
})?;
let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| {
ShellError::labeled_error("DataFrame error", format!("{}", e), tag.span)
})?;
Ok(Self { dataframe })
}
pub fn is_series(&self) -> bool {
self.as_ref().width() == 1
}
pub fn as_series(&self, span: &Span) -> Result<Series, ShellError> {
if !self.is_series() {
return Err(ShellError::labeled_error_with_secondary(
"Not a Series",
"DataFrame cannot be used as Series",
span,
"Note that a Series is a DataFrame with one column",
span,
));
}
let series = self
.as_ref()
.get_columns()
.get(0)
.expect("We have already checked that the width is 1");
Ok(series.clone())
}
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
let df = &self.as_ref();
@ -188,24 +367,17 @@ impl NuDataFrame {
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
let df = self.as_ref();
let column_names = df.get_column_names();
let upper_row = to_row.min(df.height());
let mut values: Vec<Value> = Vec::new();
let upper_row = to_row.min(df.height());
for i in from_row..upper_row {
let row = df.get_row(i);
let mut dictionary_row = Dictionary::default();
for (val, name) in row.0.iter().zip(column_names.iter()) {
let untagged_val = anyvalue_to_untagged(val)?;
for col in df.get_columns() {
let dict_val = Value {
value: untagged_val,
value: anyvalue_to_untagged(&col.get(i))?,
tag: Tag::unknown(),
};
dictionary_row.insert(name.to_string(), dict_val);
dictionary_row.insert(col.name().into(), dict_val);
}
let value = Value {
@ -213,7 +385,7 @@ impl NuDataFrame {
tag: Tag::unknown(),
};
values.push(value);
values.push(value)
}
Ok(values)
@ -336,8 +508,8 @@ fn insert_value(
key: String,
column_values: &mut ColumnMap,
) -> Result<(), ShellError> {
let col_val = match column_values.entry(key) {
Entry::Vacant(entry) => entry.insert(ColumnValues::default()),
let col_val = match column_values.entry(key.clone()) {
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)),
Entry::Occupied(entry) => entry.into_mut(),
};
@ -346,13 +518,16 @@ fn insert_value(
if col_val.values.is_empty() {
match &value.value {
UntaggedValue::Primitive(Primitive::Int(_)) => {
col_val.value_type = InputValue::Integer;
col_val.column_type = Some(InputType::Integer);
}
UntaggedValue::Primitive(Primitive::Decimal(_)) => {
col_val.value_type = InputValue::Decimal;
col_val.column_type = Some(InputType::Decimal);
}
UntaggedValue::Primitive(Primitive::String(_)) => {
col_val.value_type = InputValue::String;
col_val.column_type = Some(InputType::String);
}
UntaggedValue::Primitive(Primitive::Boolean(_)) => {
col_val.column_type = Some(InputType::Boolean);
}
_ => {
return Err(ShellError::labeled_error(
@ -378,6 +553,10 @@ fn insert_value(
| (
UntaggedValue::Primitive(Primitive::String(_)),
UntaggedValue::Primitive(Primitive::String(_)),
)
| (
UntaggedValue::Primitive(Primitive::Boolean(_)),
UntaggedValue::Primitive(Primitive::Boolean(_)),
) => col_val.values.push(value),
_ => {
return Err(ShellError::labeled_error_with_secondary(
@ -397,27 +576,35 @@ fn insert_value(
// The ColumnMap has the parsed data from the StreamInput
// This data can be used to create a Series object that can initialize
// the dataframe based on the type of data that is found
fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFrame, ShellError> {
fn from_parsed_columns(column_values: ColumnMap, span: &Span) -> Result<NuDataFrame, ShellError> {
let mut df_series: Vec<Series> = Vec::new();
for (name, column) in column_values {
match column.value_type {
InputValue::Decimal => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputValue::Integer => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_i64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputValue::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
if let Some(column_type) = &column.column_type {
match column_type {
InputType::Decimal => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Integer => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_i64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Boolean => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_bool()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
}
}
}
@ -430,7 +617,7 @@ fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFram
return Err(ShellError::labeled_error(
"Error while creating dataframe",
format!("{}", e),
tag,
span,
))
}
}

View File

@ -2,7 +2,7 @@ use nu_source::{Span, Tag};
use polars::frame::groupby::{GroupBy, GroupTuples};
use serde::{Deserialize, Serialize};
use super::{NuDataFrame, PolarsData};
use super::{FrameStruct, NuDataFrame};
use nu_errors::ShellError;
use crate::{TaggedDictBuilder, UntaggedValue, Value};
@ -30,7 +30,7 @@ impl NuGroupBy {
input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::GroupBy(group)) => Some(group),
UntaggedValue::FrameStruct(FrameStruct::GroupBy(group)) => Some(group),
_ => None,
})
.ok_or_else(|| {

View File

@ -1,345 +0,0 @@
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::vec;
use nu_errors::ShellError;
use nu_source::{Span, Tag};
use polars::prelude::{DataType, NamedFrom, Series};
use serde::{Deserialize, Serialize};
use crate::{Dictionary, Primitive, UntaggedValue, Value};
use super::PolarsData;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NuSeries {
series: Series,
dtype: String,
}
// TODO. Better definition of equality and comparison for a dataframe.
// Probably it make sense to have a name field and use it for comparisons
impl PartialEq for NuSeries {
fn eq(&self, _: &Self) -> bool {
false
}
}
impl Eq for NuSeries {}
impl PartialOrd for NuSeries {
fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
Some(Ordering::Equal)
}
}
impl Ord for NuSeries {
fn cmp(&self, _: &Self) -> Ordering {
Ordering::Equal
}
}
impl Hash for NuSeries {
fn hash<H: Hasher>(&self, _: &mut H) {}
}
impl NuSeries {
pub fn new(series: Series) -> Self {
let dtype = series.dtype().to_string();
NuSeries { series, dtype }
}
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuSeries, ShellError>
where
T: Iterator<Item = Value>,
{
input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Some(series),
_ => None,
})
.ok_or_else(|| {
ShellError::labeled_error(
"No series in stream",
"no series found in input stream",
span,
)
})
}
pub fn try_from_iter<T>(iter: T, name: Option<String>) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
let mut vec_values: Vec<Value> = Vec::new();
for value in iter {
match value.value {
UntaggedValue::Primitive(Primitive::Int(_))
| UntaggedValue::Primitive(Primitive::Decimal(_))
| UntaggedValue::Primitive(Primitive::String(_))
| UntaggedValue::Primitive(Primitive::Boolean(_)) => {
insert_value(value, &mut vec_values)?
}
_ => {
return Err(ShellError::labeled_error_with_secondary(
"Format not supported",
"Value not supported for conversion",
&value.tag.span,
"Perhaps you want to use a list of primitive values (int, decimal, string, or bool)",
&value.tag.span,
));
}
}
}
from_parsed_vector(vec_values, name)
}
pub fn into_value(self, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::Series(self)),
tag,
}
}
pub fn series_to_value(series: Series, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series))),
tag,
}
}
pub fn series_to_untagged(series: Series) -> UntaggedValue {
UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series)))
}
pub fn dtype(&self) -> &str {
&self.dtype
}
pub fn series(self) -> Series {
self.series
}
}
impl AsRef<Series> for NuSeries {
fn as_ref(&self) -> &Series {
&self.series
}
}
impl AsMut<Series> for NuSeries {
fn as_mut(&mut self) -> &mut Series {
&mut self.series
}
}
macro_rules! series_to_chunked {
($converter: expr, $self: expr) => {{
let chunked_array = $converter.map_err(|e| {
ShellError::labeled_error("Parsing Error", format!("{}", e), Span::unknown())
})?;
let size = 20;
let (head_size, skip, tail_size) = if $self.as_ref().len() > size {
let remaining = $self.as_ref().len() - (size / 2);
let skip = $self.as_ref().len() - remaining;
(size / 2, skip, remaining.min(size / 2))
} else {
(size, 0, 0)
};
let head = chunked_array.into_iter().take(head_size).map(|value| {
let value = match value {
Some(v) => Value {
value: UntaggedValue::Primitive(v.into()),
tag: Tag::unknown(),
},
None => Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
tag: Tag::unknown(),
},
};
let mut dictionary_row = Dictionary::default();
let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
dictionary_row.insert(header, value);
Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
}
});
let res = if $self.as_ref().len() < size {
head.collect::<Vec<Value>>()
} else {
let middle = std::iter::once({
let mut dictionary_row = Dictionary::default();
let value = Value {
value: UntaggedValue::Primitive("...".into()),
tag: Tag::unknown(),
};
let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
dictionary_row.insert(header, value);
Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
}
});
let tail =
chunked_array
.into_iter()
.skip(skip)
.take(tail_size)
.map(|value| match value {
Some(v) => {
let mut dictionary_row = Dictionary::default();
let value = Value {
value: UntaggedValue::Primitive(v.into()),
tag: Tag::unknown(),
};
let header = format!("{} ({})", $self.as_ref().name(), $self.dtype());
dictionary_row.insert(header, value);
Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
}
}
None => Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
tag: Tag::unknown(),
},
});
head.chain(middle).chain(tail).collect::<Vec<Value>>()
};
Ok(res)
}};
}
impl NuSeries {
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
match self.as_ref().dtype() {
DataType::Boolean => series_to_chunked!(self.as_ref().bool(), self),
DataType::UInt8 => series_to_chunked!(self.as_ref().u8(), self),
DataType::UInt16 => series_to_chunked!(self.as_ref().u16(), self),
DataType::UInt32 => series_to_chunked!(self.as_ref().u32(), self),
DataType::UInt64 => series_to_chunked!(self.as_ref().u64(), self),
DataType::Int8 => series_to_chunked!(self.as_ref().i8(), self),
DataType::Int16 => series_to_chunked!(self.as_ref().i16(), self),
DataType::Int32 => series_to_chunked!(self.as_ref().i32(), self),
DataType::Int64 => series_to_chunked!(self.as_ref().i64(), self),
DataType::Float32 => series_to_chunked!(self.as_ref().f32(), self),
DataType::Float64 => series_to_chunked!(self.as_ref().f64(), self),
DataType::Utf8 => series_to_chunked!(self.as_ref().utf8(), self),
DataType::Date32 => series_to_chunked!(self.as_ref().date32(), self),
DataType::Date64 => series_to_chunked!(self.as_ref().date64(), self),
DataType::Null => Ok(vec![Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
tag: Tag::unknown(),
}]),
//DataType::List(_) => None,
//DataType::Time64(TimeUnit) => None,
//DataType::Duration(TimeUnit) => None,
// DataType::Categorical => None,
_ => unimplemented!(),
}
}
}
fn insert_value(value: Value, vec_values: &mut Vec<Value>) -> Result<(), ShellError> {
// Checking that the type for the value is the same
// for the previous value in the column
if vec_values.is_empty() {
vec_values.push(value);
Ok(())
} else {
let prev_value = &vec_values[vec_values.len() - 1];
match (&prev_value.value, &value.value) {
(
UntaggedValue::Primitive(Primitive::Int(_)),
UntaggedValue::Primitive(Primitive::Int(_)),
)
| (
UntaggedValue::Primitive(Primitive::Decimal(_)),
UntaggedValue::Primitive(Primitive::Decimal(_)),
)
| (
UntaggedValue::Primitive(Primitive::String(_)),
UntaggedValue::Primitive(Primitive::String(_)),
)
| (
UntaggedValue::Primitive(Primitive::Boolean(_)),
UntaggedValue::Primitive(Primitive::Boolean(_)),
) => {
vec_values.push(value);
Ok(())
}
_ => Err(ShellError::labeled_error_with_secondary(
"Different values in column",
"Value with different type",
&value.tag,
"Perhaps you want to change it to this value type",
&prev_value.tag,
)),
}
}
}
fn from_parsed_vector(
vec_values: Vec<Value>,
name: Option<String>,
) -> Result<NuSeries, ShellError> {
let series = match &vec_values[0].value {
UntaggedValue::Primitive(Primitive::Int(_)) => {
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_i64()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "int",
};
Series::new(series_name, series_values?)
}
UntaggedValue::Primitive(Primitive::Decimal(_)) => {
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_f64()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "decimal",
};
Series::new(series_name, series_values?)
}
UntaggedValue::Primitive(Primitive::String(_)) => {
let series_values: Result<Vec<_>, _> =
vec_values.iter().map(|v| v.as_string()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "string",
};
Series::new(series_name, series_values?)
}
UntaggedValue::Primitive(Primitive::Boolean(_)) => {
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_bool()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "string",
};
Series::new(series_name, series_values?)
}
_ => unreachable!("The untagged type is checked while creating vec_values"),
};
Ok(NuSeries::new(series))
}

View File

@ -75,6 +75,10 @@ pub enum Type {
/// Dataframe
#[cfg(feature = "dataframe")]
DataFrame,
/// Dataframe
#[cfg(feature = "dataframe")]
FrameStruct,
}
/// A shape representation of the type of a row
@ -192,6 +196,8 @@ impl Type {
UntaggedValue::Block(_) => Type::Block,
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => Type::DataFrame,
#[cfg(feature = "dataframe")]
UntaggedValue::FrameStruct(_) => Type::DataFrame,
}
}
}
@ -298,7 +304,7 @@ impl PrettyDebug for Type {
}
Type::Block => ty("block"),
#[cfg(feature = "dataframe")]
Type::DataFrame => ty("data_type_formatter"),
Type::DataFrame | Type::FrameStruct => ty("data_type_formatter"),
}
}
}

View File

@ -31,7 +31,7 @@ use std::path::PathBuf;
use std::time::SystemTime;
#[cfg(feature = "dataframe")]
use crate::dataframe::PolarsData;
use crate::dataframe::{FrameStruct, NuDataFrame};
/// The core structured values that flow through a pipeline
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
@ -51,10 +51,15 @@ pub enum UntaggedValue {
/// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values
Block(Box<hir::CapturedBlock>),
/// Data option that holds the polars structs required to to data
/// manipulation and operations using polars dataframes
/// Main nushell dataframe
#[cfg(feature = "dataframe")]
DataFrame(PolarsData),
DataFrame(NuDataFrame),
/// Data option that holds intermediate struct required to do data
/// manipulation and operations for dataframes such as groupby, lazy frames
/// and lazy groupby
#[cfg(feature = "dataframe")]
FrameStruct(FrameStruct),
}
impl UntaggedValue {
@ -685,11 +690,9 @@ impl ShellTypeName for UntaggedValue {
UntaggedValue::Error(_) => "error",
UntaggedValue::Block(_) => "block",
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(_)) => "dataframe",
UntaggedValue::DataFrame(_) => "dataframe",
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(PolarsData::Series(_)) => "series",
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(PolarsData::GroupBy(_)) => "groupby",
UntaggedValue::FrameStruct(FrameStruct::GroupBy(_)) => "groupby",
}
}
}

View File

@ -25,7 +25,9 @@ impl PrettyDebug for Value {
UntaggedValue::Error(_) => DbgDocBldr::error("error"),
UntaggedValue::Block(_) => DbgDocBldr::opaque("block"),
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => DbgDocBldr::opaque("dataframe_prettydebug_for_data"),
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
DbgDocBldr::opaque("dataframe")
}
}
}
}