forked from extern/nushell
Series Operation (#3563)
* Sample command * Join command with checks * More dataframes commands * Groupby and aggregate commands * Missing feature dataframe flag * Renamed file * New commands for dataframes * error parser and df reference * filter command for dataframes * removed name from nu_dataframe * commands to save to parquet and csv * polars new version * new dataframe commands * series type and print * Series basic arithmetics * Add new column to dataframe * Command names changed to nushell standard
This commit is contained in:
@ -1,12 +1,15 @@
|
||||
pub mod nu_dataframe;
|
||||
pub mod nu_groupby;
|
||||
pub mod nu_series;
|
||||
|
||||
pub use nu_dataframe::NuDataFrame;
|
||||
pub use nu_groupby::NuGroupBy;
|
||||
pub use nu_series::NuSeries;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||
pub enum PolarsData {
|
||||
EagerDataFrame(NuDataFrame),
|
||||
GroupBy(NuGroupBy),
|
||||
Series(NuSeries),
|
||||
}
|
||||
|
@ -4,16 +4,15 @@ use std::{cmp::Ordering, collections::hash_map::Entry, collections::HashMap};
|
||||
use bigdecimal::FromPrimitive;
|
||||
use chrono::{DateTime, FixedOffset, NaiveDateTime};
|
||||
use nu_errors::ShellError;
|
||||
use nu_source::Tag;
|
||||
use nu_source::{Span, Tag};
|
||||
use num_bigint::BigInt;
|
||||
use polars::prelude::{AnyValue, DataFrame, NamedFrom, Series, TimeUnit};
|
||||
use serde::de::{Deserialize, Deserializer, Visitor};
|
||||
use serde::Serialize;
|
||||
|
||||
use std::fmt;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{Dictionary, Primitive, UntaggedValue, Value};
|
||||
|
||||
use super::PolarsData;
|
||||
|
||||
const SECS_PER_DAY: i64 = 86_400;
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -40,26 +39,9 @@ impl Default for ColumnValues {
|
||||
|
||||
type ColumnMap = HashMap<String, ColumnValues>;
|
||||
|
||||
// TODO. Using Option to help with deserialization. It will be better to find
|
||||
// a way to use serde with dataframes
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NuDataFrame {
|
||||
#[serde(skip_serializing)]
|
||||
pub dataframe: Option<DataFrame>,
|
||||
}
|
||||
|
||||
impl Default for NuDataFrame {
|
||||
fn default() -> Self {
|
||||
NuDataFrame { dataframe: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl NuDataFrame {
|
||||
pub fn new(df: polars::prelude::DataFrame) -> Self {
|
||||
NuDataFrame {
|
||||
dataframe: Some(df),
|
||||
}
|
||||
}
|
||||
dataframe: DataFrame,
|
||||
}
|
||||
|
||||
// TODO. Better definition of equality and comparison for a dataframe.
|
||||
@ -88,30 +70,46 @@ impl Hash for NuDataFrame {
|
||||
fn hash<H: Hasher>(&self, _: &mut H) {}
|
||||
}
|
||||
|
||||
impl<'de> Visitor<'de> for NuDataFrame {
|
||||
type Value = Self;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("an integer between -2^31 and 2^31")
|
||||
impl AsRef<DataFrame> for NuDataFrame {
|
||||
fn as_ref(&self) -> &polars::prelude::DataFrame {
|
||||
&self.dataframe
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for NuDataFrame {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_i32(NuDataFrame::default())
|
||||
impl AsMut<DataFrame> for NuDataFrame {
|
||||
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
|
||||
&mut self.dataframe
|
||||
}
|
||||
}
|
||||
|
||||
impl NuDataFrame {
|
||||
pub fn new(dataframe: polars::prelude::DataFrame) -> Self {
|
||||
NuDataFrame { dataframe }
|
||||
}
|
||||
|
||||
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuDataFrame, ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
input
|
||||
.next()
|
||||
.and_then(|value| match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
span,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn try_from_iter<T>(iter: T, tag: &Tag) -> Result<Self, ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
// Dictionary to store the columnar data extracted from
|
||||
// the input. During the iteration we will sort if the values
|
||||
// the input. During the iteration we check if the values
|
||||
// have different type
|
||||
let mut column_values: ColumnMap = HashMap::new();
|
||||
|
||||
@ -120,10 +118,12 @@ impl NuDataFrame {
|
||||
UntaggedValue::Row(dictionary) => insert_row(&mut column_values, dictionary)?,
|
||||
UntaggedValue::Table(table) => insert_table(&mut column_values, table)?,
|
||||
_ => {
|
||||
return Err(ShellError::labeled_error(
|
||||
return Err(ShellError::labeled_error_with_secondary(
|
||||
"Format not supported",
|
||||
"Value not supported for conversion",
|
||||
&value.tag,
|
||||
"Perhaps you want to use a List of Tables or a Dictionary",
|
||||
&value.tag,
|
||||
));
|
||||
}
|
||||
}
|
||||
@ -132,26 +132,37 @@ impl NuDataFrame {
|
||||
from_parsed_columns(column_values, tag)
|
||||
}
|
||||
|
||||
pub fn to_value(self, tag: Tag) -> Value {
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(self)),
|
||||
tag,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dataframe_to_value(df: DataFrame, tag: Tag) -> Value {
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(df))),
|
||||
tag,
|
||||
}
|
||||
}
|
||||
|
||||
// Print is made out a head and if the dataframe is too large, then a tail
|
||||
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||
if let Some(df) = &self.dataframe {
|
||||
let size: usize = 20;
|
||||
let df = &self.as_ref();
|
||||
let size: usize = 20;
|
||||
|
||||
if df.height() > size {
|
||||
let sample_size = size / 2;
|
||||
let mut values = self.head(Some(sample_size))?;
|
||||
add_separator(&mut values, df);
|
||||
let remaining = df.height() - sample_size;
|
||||
let tail_size = remaining.min(sample_size);
|
||||
let mut tail_values = self.tail(Some(tail_size))?;
|
||||
values.append(&mut tail_values);
|
||||
if df.height() > size {
|
||||
let sample_size = size / 2;
|
||||
let mut values = self.head(Some(sample_size))?;
|
||||
add_separator(&mut values, df);
|
||||
let remaining = df.height() - sample_size;
|
||||
let tail_size = remaining.min(sample_size);
|
||||
let mut tail_values = self.tail(Some(tail_size))?;
|
||||
values.append(&mut tail_values);
|
||||
|
||||
Ok(values)
|
||||
} else {
|
||||
Ok(self.head(Some(size))?)
|
||||
}
|
||||
Ok(values)
|
||||
} else {
|
||||
unreachable!("No dataframe found in print command")
|
||||
Ok(self.head(Some(size))?)
|
||||
}
|
||||
}
|
||||
|
||||
@ -163,71 +174,47 @@ impl NuDataFrame {
|
||||
}
|
||||
|
||||
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
|
||||
if let Some(df) = &self.dataframe {
|
||||
let to_row = df.height();
|
||||
let size = rows.unwrap_or(5);
|
||||
let from_row = to_row.saturating_sub(size);
|
||||
let df = &self.as_ref();
|
||||
let to_row = df.height();
|
||||
let size = rows.unwrap_or(5);
|
||||
let from_row = to_row.saturating_sub(size);
|
||||
|
||||
let values = self.to_rows(from_row, to_row)?;
|
||||
let values = self.to_rows(from_row, to_row)?;
|
||||
|
||||
Ok(values)
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
|
||||
if let Some(df) = &self.dataframe {
|
||||
let column_names = df.get_column_names();
|
||||
let df = &self.as_ref();
|
||||
let column_names = df.get_column_names();
|
||||
|
||||
let mut values: Vec<Value> = Vec::new();
|
||||
let mut values: Vec<Value> = Vec::new();
|
||||
|
||||
let upper_row = to_row.min(df.height());
|
||||
for i in from_row..upper_row {
|
||||
let row = df.get_row(i);
|
||||
let mut dictionary_row = Dictionary::default();
|
||||
let upper_row = to_row.min(df.height());
|
||||
for i in from_row..upper_row {
|
||||
let row = df.get_row(i);
|
||||
let mut dictionary_row = Dictionary::default();
|
||||
|
||||
for (val, name) in row.0.iter().zip(column_names.iter()) {
|
||||
let untagged_val = anyvalue_to_untagged(val)?;
|
||||
for (val, name) in row.0.iter().zip(column_names.iter()) {
|
||||
let untagged_val = anyvalue_to_untagged(val)?;
|
||||
|
||||
let dict_val = Value {
|
||||
value: untagged_val,
|
||||
tag: Tag::unknown(),
|
||||
};
|
||||
|
||||
dictionary_row.insert(name.to_string(), dict_val);
|
||||
}
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::Row(dictionary_row),
|
||||
let dict_val = Value {
|
||||
value: untagged_val,
|
||||
tag: Tag::unknown(),
|
||||
};
|
||||
|
||||
values.push(value);
|
||||
dictionary_row.insert(name.to_string(), dict_val);
|
||||
}
|
||||
|
||||
Ok(values)
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
let value = Value {
|
||||
value: UntaggedValue::Row(dictionary_row),
|
||||
tag: Tag::unknown(),
|
||||
};
|
||||
|
||||
impl AsRef<polars::prelude::DataFrame> for NuDataFrame {
|
||||
fn as_ref(&self) -> &polars::prelude::DataFrame {
|
||||
match &self.dataframe {
|
||||
Some(df) => df,
|
||||
None => unreachable!("Accessing ref to dataframe from nu_dataframe"),
|
||||
values.push(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<polars::prelude::DataFrame> for NuDataFrame {
|
||||
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
|
||||
match &mut self.dataframe {
|
||||
Some(df) => df,
|
||||
None => unreachable!("Accessing mut ref to dataframe from nu_dataframe"),
|
||||
}
|
||||
Ok(values)
|
||||
}
|
||||
}
|
||||
|
||||
@ -391,10 +378,12 @@ fn insert_value(
|
||||
UntaggedValue::Primitive(Primitive::String(_)),
|
||||
) => col_val.values.push(value),
|
||||
_ => {
|
||||
return Err(ShellError::labeled_error(
|
||||
return Err(ShellError::labeled_error_with_secondary(
|
||||
"Different values in column",
|
||||
"Value with different type",
|
||||
&value.tag,
|
||||
"Perhaps you want to change it to this value type",
|
||||
&prev_value.tag,
|
||||
));
|
||||
}
|
||||
}
|
||||
@ -418,7 +407,7 @@ fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFram
|
||||
}
|
||||
InputValue::Integer => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_f32()).collect();
|
||||
column.values.iter().map(|v| v.as_i64()).collect();
|
||||
let series = Series::new(&name, series_values?);
|
||||
df_series.push(series)
|
||||
}
|
||||
@ -434,9 +423,7 @@ fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFram
|
||||
let df = DataFrame::new(df_series);
|
||||
|
||||
match df {
|
||||
Ok(df) => Ok(NuDataFrame {
|
||||
dataframe: Some(df),
|
||||
}),
|
||||
Ok(df) => Ok(NuDataFrame::new(df)),
|
||||
Err(e) => {
|
||||
return Err(ShellError::labeled_error(
|
||||
"Error while creating dataframe",
|
||||
|
@ -1,11 +1,11 @@
|
||||
use nu_source::Tag;
|
||||
use nu_source::{Span, Tag};
|
||||
use polars::frame::groupby::{GroupBy, GroupTuples};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::NuDataFrame;
|
||||
use super::{NuDataFrame, PolarsData};
|
||||
use nu_errors::ShellError;
|
||||
|
||||
use crate::{TaggedDictBuilder, Value};
|
||||
use crate::{TaggedDictBuilder, UntaggedValue, Value};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||
pub struct NuGroupBy {
|
||||
@ -23,11 +23,25 @@ impl NuGroupBy {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuGroupBy, ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
input
|
||||
.next()
|
||||
.and_then(|value| match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::GroupBy(group)) => Some(group),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or(ShellError::labeled_error(
|
||||
"No groupby object in stream",
|
||||
"no groupby object found in input stream",
|
||||
span,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn to_groupby(&self) -> Result<GroupBy, ShellError> {
|
||||
let df = match &self.dataframe.dataframe {
|
||||
Some(df) => df,
|
||||
None => unreachable!("No dataframe in nu_dataframe"),
|
||||
};
|
||||
let df = self.dataframe.as_ref();
|
||||
|
||||
let by = df.select_series(&self.by).map_err(|e| {
|
||||
ShellError::labeled_error("Error creating groupby", format!("{}", e), Tag::unknown())
|
||||
@ -50,9 +64,6 @@ impl NuGroupBy {
|
||||
|
||||
impl AsRef<polars::prelude::DataFrame> for NuGroupBy {
|
||||
fn as_ref(&self) -> &polars::prelude::DataFrame {
|
||||
match &self.dataframe.dataframe {
|
||||
Some(df) => df,
|
||||
None => unreachable!("Accessing reference to dataframe from nu_groupby"),
|
||||
}
|
||||
self.dataframe.as_ref()
|
||||
}
|
||||
}
|
||||
|
330
crates/nu-protocol/src/dataframe/nu_series.rs
Normal file
330
crates/nu-protocol/src/dataframe/nu_series.rs
Normal file
@ -0,0 +1,330 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::vec;
|
||||
|
||||
use nu_errors::ShellError;
|
||||
use nu_source::{Span, Tag};
|
||||
use polars::prelude::{DataType, NamedFrom, Series};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{Dictionary, Primitive, UntaggedValue, Value};
|
||||
|
||||
use super::PolarsData;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NuSeries {
|
||||
series: Series,
|
||||
dtype: String,
|
||||
}
|
||||
|
||||
// TODO. Better definition of equality and comparison for a dataframe.
|
||||
// Probably it make sense to have a name field and use it for comparisons
|
||||
impl PartialEq for NuSeries {
|
||||
fn eq(&self, _: &Self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for NuSeries {}
|
||||
|
||||
impl PartialOrd for NuSeries {
|
||||
fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
|
||||
Some(Ordering::Equal)
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for NuSeries {
|
||||
fn cmp(&self, _: &Self) -> Ordering {
|
||||
Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for NuSeries {
|
||||
fn hash<H: Hasher>(&self, _: &mut H) {}
|
||||
}
|
||||
|
||||
impl NuSeries {
|
||||
pub fn new(series: Series) -> Self {
|
||||
let dtype = series.dtype().to_string();
|
||||
|
||||
NuSeries { series, dtype }
|
||||
}
|
||||
|
||||
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuSeries, ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
input
|
||||
.next()
|
||||
.and_then(|value| match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => Some(series),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or(ShellError::labeled_error(
|
||||
"No series in stream",
|
||||
"no series found in input stream",
|
||||
span,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn try_from_iter<T>(iter: T, name: Option<String>) -> Result<Self, ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
let mut vec_values: Vec<Value> = Vec::new();
|
||||
|
||||
for value in iter {
|
||||
match value.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(_))
|
||||
| UntaggedValue::Primitive(Primitive::Decimal(_))
|
||||
| UntaggedValue::Primitive(Primitive::String(_)) => {
|
||||
insert_value(value, &mut vec_values)?
|
||||
}
|
||||
_ => {
|
||||
return Err(ShellError::labeled_error_with_secondary(
|
||||
"Format not supported",
|
||||
"Value not supported for conversion",
|
||||
&value.tag.span,
|
||||
"Perhaps you want to use a list of primitive values (int, decimal, string)",
|
||||
&value.tag.span,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
from_parsed_vector(vec_values, name)
|
||||
}
|
||||
|
||||
pub fn to_value(self, tag: Tag) -> Value {
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::Series(self)),
|
||||
tag,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn series_to_value(series: Series, tag: Tag) -> Value {
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series))),
|
||||
tag,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn series_to_untagged(series: Series) -> UntaggedValue {
|
||||
UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series)))
|
||||
}
|
||||
|
||||
pub fn dtype(&self) -> &str {
|
||||
&self.dtype
|
||||
}
|
||||
|
||||
pub fn series(self) -> Series {
|
||||
self.series
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Series> for NuSeries {
|
||||
fn as_ref(&self) -> &Series {
|
||||
&self.series
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<Series> for NuSeries {
|
||||
fn as_mut(&mut self) -> &mut Series {
|
||||
&mut self.series
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! series_to_chunked {
|
||||
($converter: expr, $self: expr) => {{
|
||||
let chunked_array = $converter.map_err(|e| {
|
||||
ShellError::labeled_error("Parsing Error", format!("{}", e), Span::unknown())
|
||||
})?;
|
||||
|
||||
let size = 20;
|
||||
|
||||
let (head_size, skip, tail_size) = if $self.as_ref().len() > size {
|
||||
let remaining = $self.as_ref().len() - (size / 2);
|
||||
let skip = $self.as_ref().len() - remaining;
|
||||
(size / 2, skip, remaining.min(size / 2))
|
||||
} else {
|
||||
(size, 0, 0)
|
||||
};
|
||||
|
||||
let head = chunked_array
|
||||
.into_iter()
|
||||
.take(head_size)
|
||||
.map(|value| match value {
|
||||
Some(v) => {
|
||||
let mut dictionary_row = Dictionary::default();
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::Primitive(v.into()),
|
||||
tag: Tag::unknown(),
|
||||
};
|
||||
|
||||
let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
|
||||
dictionary_row.insert(header, value);
|
||||
|
||||
Value {
|
||||
value: UntaggedValue::Row(dictionary_row),
|
||||
tag: Tag::unknown(),
|
||||
}
|
||||
}
|
||||
None => Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||
tag: Tag::unknown(),
|
||||
},
|
||||
});
|
||||
|
||||
let res = if $self.as_ref().len() < size {
|
||||
head.collect::<Vec<Value>>()
|
||||
} else {
|
||||
let middle = std::iter::once({
|
||||
let mut dictionary_row = Dictionary::default();
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::Primitive("...".into()),
|
||||
tag: Tag::unknown(),
|
||||
};
|
||||
|
||||
let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
|
||||
dictionary_row.insert(header, value);
|
||||
|
||||
Value {
|
||||
value: UntaggedValue::Row(dictionary_row),
|
||||
tag: Tag::unknown(),
|
||||
}
|
||||
});
|
||||
|
||||
let tail =
|
||||
chunked_array
|
||||
.into_iter()
|
||||
.skip(skip)
|
||||
.take(tail_size)
|
||||
.map(|value| match value {
|
||||
Some(v) => {
|
||||
let mut dictionary_row = Dictionary::default();
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::Primitive(v.into()),
|
||||
tag: Tag::unknown(),
|
||||
};
|
||||
|
||||
let header = format!("{} ({})", $self.as_ref().name(), $self.dtype());
|
||||
dictionary_row.insert(header, value);
|
||||
|
||||
Value {
|
||||
value: UntaggedValue::Row(dictionary_row),
|
||||
tag: Tag::unknown(),
|
||||
}
|
||||
}
|
||||
None => Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||
tag: Tag::unknown(),
|
||||
},
|
||||
});
|
||||
|
||||
head.chain(middle).chain(tail).collect::<Vec<Value>>()
|
||||
};
|
||||
|
||||
Ok(res)
|
||||
}};
|
||||
}
|
||||
|
||||
impl NuSeries {
|
||||
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||
match self.as_ref().dtype() {
|
||||
DataType::Boolean => series_to_chunked!(self.as_ref().bool(), self),
|
||||
DataType::UInt8 => series_to_chunked!(self.as_ref().u8(), self),
|
||||
DataType::UInt16 => series_to_chunked!(self.as_ref().u16(), self),
|
||||
DataType::UInt32 => series_to_chunked!(self.as_ref().u32(), self),
|
||||
DataType::UInt64 => series_to_chunked!(self.as_ref().u64(), self),
|
||||
DataType::Int8 => series_to_chunked!(self.as_ref().i8(), self),
|
||||
DataType::Int16 => series_to_chunked!(self.as_ref().i16(), self),
|
||||
DataType::Int32 => series_to_chunked!(self.as_ref().i32(), self),
|
||||
DataType::Int64 => series_to_chunked!(self.as_ref().i64(), self),
|
||||
DataType::Float32 => series_to_chunked!(self.as_ref().f32(), self),
|
||||
DataType::Float64 => series_to_chunked!(self.as_ref().f64(), self),
|
||||
DataType::Utf8 => series_to_chunked!(self.as_ref().utf8(), self),
|
||||
DataType::Date32 => series_to_chunked!(self.as_ref().date32(), self),
|
||||
DataType::Date64 => series_to_chunked!(self.as_ref().date64(), self),
|
||||
DataType::Null => Ok(vec![Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||
tag: Tag::unknown(),
|
||||
}]),
|
||||
//DataType::List(_) => None,
|
||||
//DataType::Time64(TimeUnit) => None,
|
||||
//DataType::Duration(TimeUnit) => None,
|
||||
// DataType::Categorical => None,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_value(value: Value, vec_values: &mut Vec<Value>) -> Result<(), ShellError> {
|
||||
// Checking that the type for the value is the same
|
||||
// for the previous value in the column
|
||||
if vec_values.is_empty() {
|
||||
Ok(vec_values.push(value))
|
||||
} else {
|
||||
let prev_value = &vec_values[vec_values.len() - 1];
|
||||
|
||||
match (&prev_value.value, &value.value) {
|
||||
(
|
||||
UntaggedValue::Primitive(Primitive::Int(_)),
|
||||
UntaggedValue::Primitive(Primitive::Int(_)),
|
||||
)
|
||||
| (
|
||||
UntaggedValue::Primitive(Primitive::Decimal(_)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(_)),
|
||||
)
|
||||
| (
|
||||
UntaggedValue::Primitive(Primitive::String(_)),
|
||||
UntaggedValue::Primitive(Primitive::String(_)),
|
||||
) => Ok(vec_values.push(value)),
|
||||
_ => Err(ShellError::labeled_error_with_secondary(
|
||||
"Different values in column",
|
||||
"Value with different type",
|
||||
&value.tag,
|
||||
"Perhaps you want to change it to this value type",
|
||||
&prev_value.tag,
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_parsed_vector(
|
||||
vec_values: Vec<Value>,
|
||||
name: Option<String>,
|
||||
) -> Result<NuSeries, ShellError> {
|
||||
let series = match &vec_values[0].value {
|
||||
UntaggedValue::Primitive(Primitive::Int(_)) => {
|
||||
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_i64()).collect();
|
||||
let series_name = match &name {
|
||||
Some(n) => n.as_ref(),
|
||||
None => "int",
|
||||
};
|
||||
Series::new(series_name, series_values?)
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::Decimal(_)) => {
|
||||
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_f64()).collect();
|
||||
let series_name = match &name {
|
||||
Some(n) => n.as_ref(),
|
||||
None => "decimal",
|
||||
};
|
||||
Series::new(series_name, series_values?)
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::String(_)) => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
vec_values.iter().map(|v| v.as_string()).collect();
|
||||
let series_name = match &name {
|
||||
Some(n) => n.as_ref(),
|
||||
None => "string",
|
||||
};
|
||||
Series::new(series_name, series_values?)
|
||||
}
|
||||
_ => unreachable!("The untagged type is checked while creating vec_values"),
|
||||
};
|
||||
|
||||
Ok(NuSeries::new(series))
|
||||
}
|
@ -672,7 +672,11 @@ impl ShellTypeName for UntaggedValue {
|
||||
UntaggedValue::Error(_) => "error",
|
||||
UntaggedValue::Block(_) => "block",
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => "dataframe",
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(_)) => "dataframe",
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(PolarsData::Series(_)) => "series",
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(PolarsData::GroupBy(_)) => "groupby",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user