From fe92051bb377b17f3e9228d7be7755baffbfe55b Mon Sep 17 00:00:00 2001
From: Jack Wright <56345+ayax79@users.noreply.github.com>
Date: Thu, 9 Nov 2023 17:00:59 -0800
Subject: [PATCH] Adding support for Polars structs (#10943)
Provides support for reading Polars structs. This allows opening of
supported files (jsonl, parquet, etc) that contain rows with structured
data.
The following attached json lines
file([receipts.jsonl.gz](https://github.com/nushell/nushell/files/13311476/receipts.jsonl.gz))
contains a customer column with structured data. This json lines file
can now be loaded via `dfr open` and will render as follows:
This also addresses some cleanup of date handling and utilizing
timezones where provided.
This pull request only addresses reading data from polars structs. I
will address converting nushell data to polars structs in a future
request as this change is large enough as it is.
---------
Co-authored-by: Jack Wright
---
Cargo.lock | 1 +
crates/nu-cmd-dataframe/Cargo.toml | 1 +
.../values/nu_dataframe/conversion.rs | 808 +++++++++++++++---
3 files changed, 692 insertions(+), 118 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 0f062d80f5..bf8e7f1cde 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2764,6 +2764,7 @@ name = "nu-cmd-dataframe"
version = "0.86.1"
dependencies = [
"chrono",
+ "chrono-tz",
"fancy-regex",
"indexmap 2.1.0",
"nu-cmd-lang",
diff --git a/crates/nu-cmd-dataframe/Cargo.toml b/crates/nu-cmd-dataframe/Cargo.toml
index d5e7be36fa..7388387adf 100644
--- a/crates/nu-cmd-dataframe/Cargo.toml
+++ b/crates/nu-cmd-dataframe/Cargo.toml
@@ -19,6 +19,7 @@ nu-protocol = { path = "../nu-protocol", version = "0.86.1" }
# Potential dependencies for extras
chrono = { version = "0.4", features = ["std", "unstable-locales"], default-features = false }
+chrono-tz = "0.8"
fancy-regex = "0.11"
indexmap = { version = "2.1" }
num = { version = "0.4", optional = true }
diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs
index 028d73a146..4d4516bb77 100644
--- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs
+++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs
@@ -1,23 +1,31 @@
-use super::{DataFrameValue, NuDataFrame};
+use std::ops::{Deref, DerefMut};
-use chrono::{DateTime, FixedOffset, NaiveDateTime};
+use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc};
+use chrono_tz::Tz;
use indexmap::map::{Entry, IndexMap};
-use nu_protocol::{Record, ShellError, Span, Value};
use polars::chunked_array::builder::AnonymousOwnedListBuilder;
use polars::chunked_array::object::builder::ObjectChunkedBuilder;
use polars::chunked_array::ChunkedArray;
+use polars::datatypes::AnyValue;
+use polars::export::arrow::array::{
+ Array, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
+ UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+};
use polars::export::arrow::Either;
use polars::prelude::{
- DataFrame, DataType, DatetimeChunked, Float64Type, Int64Type, IntoSeries,
- ListBooleanChunkedBuilder, ListBuilderTrait, ListPrimitiveChunkedBuilder, ListType,
- ListUtf8ChunkedBuilder, NamedFrom, NewChunkedArray, ObjectType, Series, TemporalMethods,
- TimeUnit,
+ ArrayRef, DataFrame, DataType, DatetimeChunked, Float64Type, Int64Type, IntoSeries,
+ LargeBinaryArray, LargeListArray, LargeStringArray, ListBooleanChunkedBuilder,
+ ListBuilderTrait, ListPrimitiveChunkedBuilder, ListType, ListUtf8ChunkedBuilder, NamedFrom,
+ NewChunkedArray, ObjectType, Series, StructArray, TemporalMethods, TimeUnit,
};
-use std::ops::{Deref, DerefMut};
-const SECS_PER_DAY: i64 = 86_400;
+use nu_protocol::{Record, ShellError, Span, Value};
-// The values capacity is for the size of an internal vec.
+use super::{DataFrameValue, NuDataFrame};
+
+const NANOS_PER_DAY: i64 = 86_400_000_000_000;
+
+// The values capacity is for the size of an vec.
// Since this is impossible to determine without traversing every value
// I just picked one. Since this is for converting back and forth
// between nushell tables the values shouldn't be too extremely large for
@@ -199,7 +207,7 @@ fn value_to_input_type(value: &Value) -> InputType {
Value::Filesize { .. } => InputType::Filesize,
Value::List { vals, .. } => {
// We need to determined the type inside of the list.
- // Since Value::List does not have any kind of internal
+ // Since Value::List does not have any kind of
// type information, we need to look inside the list.
// This will cause errors if lists have inconsistent types.
// Basically, if a list column needs to be converted to dataframe,
@@ -805,28 +813,21 @@ fn series_to_values(
)),
Some(ca) => {
let it = ca.into_iter();
- let values: Vec =
- if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) {
- Either::Left(it.skip(from_row).take(size))
+ if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) {
+ Either::Left(it.skip(from_row).take(size))
+ } else {
+ Either::Right(it)
+ }
+ .map(|ca| {
+ let sublist: Vec = if let Some(ref s) = ca {
+ series_to_values(s, None, None, Span::unknown())?
} else {
- Either::Right(it)
- }
- .map(|ca| {
- let sublist = ca
- .map(|ref s| {
- match series_to_values(s, None, None, Span::unknown()) {
- Ok(v) => v,
- Err(e) => {
- eprintln!("Error list values: {e}");
- vec![]
- }
- }
- })
- .unwrap_or(vec![]);
- Value::list(sublist, span)
- })
- .collect::>();
- Ok(values)
+ // empty item
+ vec![]
+ };
+ Ok(Value::list(sublist, span))
+ })
+ .collect::, ShellError>>()
}
}
}
@@ -849,51 +850,16 @@ fn series_to_values(
}
.map(|v| match v {
Some(a) => {
- // elapsed time in day since 1970-01-01
- let seconds = a as i64 * SECS_PER_DAY;
- let naive_datetime = match NaiveDateTime::from_timestamp_opt(seconds, 0) {
- Some(val) => val,
- None => {
- return Value::error(
- ShellError::UnsupportedInput {
- msg: "The given local datetime representation is invalid."
- .to_string(),
- input: format!("timestamp is {a:?}"),
- msg_span: span,
- input_span: Span::unknown(),
- },
- span,
- )
- }
- };
- // Zero length offset
- let offset = match FixedOffset::east_opt(0) {
- Some(val) => val,
- None => {
- return Value::error(
- ShellError::UnsupportedInput {
- msg: "The given local datetime representation is invalid."
- .to_string(),
- input: format!("timestamp is {a:?}"),
- msg_span: span,
- input_span: Span::unknown(),
- },
- span,
- )
- }
- };
- let datetime =
- DateTime::::from_naive_utc_and_offset(naive_datetime, offset);
-
- Value::date(datetime, span)
+ let nanos = nanos_per_day(a);
+ let datetime = datetime_from_epoch_nanos(nanos, &None, span)?;
+ Ok(Value::date(datetime, span))
}
- None => Value::nothing(span),
+ None => Ok(Value::nothing(span)),
})
- .collect::>();
-
+ .collect::, ShellError>>()?;
Ok(values)
}
- DataType::Datetime(time_unit, _) => {
+ DataType::Datetime(time_unit, tz) => {
let casted = series.datetime().map_err(|e| {
ShellError::GenericError(
"Error casting column to datetime".into(),
@@ -912,55 +878,48 @@ fn series_to_values(
}
.map(|v| match v {
Some(a) => {
- let unit_divisor = match time_unit {
- TimeUnit::Nanoseconds => 1_000_000_000,
- TimeUnit::Microseconds => 1_000_000,
- TimeUnit::Milliseconds => 1_000,
- };
// elapsed time in nano/micro/milliseconds since 1970-01-01
- let seconds = a / unit_divisor;
- let naive_datetime = match NaiveDateTime::from_timestamp_opt(seconds, 0) {
- Some(val) => val,
- None => {
- return Value::error(
- ShellError::UnsupportedInput {
- msg: "The given local datetime representation is invalid."
- .to_string(),
- input: format!("timestamp is {a:?}"),
- msg_span: span,
- input_span: Span::unknown(),
- },
- span,
- )
- }
- };
- // Zero length offset
- let offset = match FixedOffset::east_opt(0) {
- Some(val) => val,
- None => {
- return Value::error(
- ShellError::UnsupportedInput {
- msg: "The given local datetime representation is invalid."
- .to_string(),
- input: format!("timestamp is {a:?}"),
- msg_span: span,
- input_span: Span::unknown(),
- },
- span,
- )
- }
- };
- let datetime =
- DateTime::::from_naive_utc_and_offset(naive_datetime, offset);
-
- Value::date(datetime, span)
+ let nanos = nanos_from_timeunit(a, *time_unit);
+ let datetime = datetime_from_epoch_nanos(nanos, tz, span)?;
+ Ok(Value::date(datetime, span))
}
- None => Value::nothing(span),
+ None => Ok(Value::nothing(span)),
})
- .collect::>();
-
+ .collect::, ShellError>>()?;
Ok(values)
}
+ DataType::Struct(polar_fields) => {
+ let casted = series.struct_().map_err(|e| {
+ ShellError::GenericError(
+ "Error casting column to struct".into(),
+ "".to_string(),
+ None,
+ Some(e.to_string()),
+ Vec::new(),
+ )
+ })?;
+ let it = casted.into_iter();
+ let values: Result, ShellError> =
+ if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) {
+ Either::Left(it.skip(from_row).take(size))
+ } else {
+ Either::Right(it)
+ }
+ .map(|any_values| {
+ let vals: Result, ShellError> = any_values
+ .iter()
+ .map(|v| any_value_to_value(v, span))
+ .collect();
+ let cols: Vec = polar_fields
+ .iter()
+ .map(|field| field.name.to_string())
+ .collect();
+ let record = Record { cols, vals: vals? };
+ Ok(Value::record(record, span))
+ })
+ .collect();
+ values
+ }
DataType::Time => {
let casted = series.timestamp(TimeUnit::Nanoseconds).map_err(|e| {
ShellError::GenericError(
@@ -996,10 +955,252 @@ fn series_to_values(
}
}
+fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result {
+ match any_value {
+ AnyValue::Null => Ok(Value::nothing(span)),
+ AnyValue::Boolean(b) => Ok(Value::bool(*b, span)),
+ AnyValue::Utf8(s) => Ok(Value::string(s.to_string(), span)),
+ AnyValue::UInt8(i) => Ok(Value::int(*i as i64, span)),
+ AnyValue::UInt16(i) => Ok(Value::int(*i as i64, span)),
+ AnyValue::UInt32(i) => Ok(Value::int(*i as i64, span)),
+ AnyValue::UInt64(i) => Ok(Value::int(*i as i64, span)),
+ AnyValue::Int8(i) => Ok(Value::int(*i as i64, span)),
+ AnyValue::Int16(i) => Ok(Value::int(*i as i64, span)),
+ AnyValue::Int32(i) => Ok(Value::int(*i as i64, span)),
+ AnyValue::Int64(i) => Ok(Value::int(*i, span)),
+ AnyValue::Float32(f) => Ok(Value::float(*f as f64, span)),
+ AnyValue::Float64(f) => Ok(Value::float(*f, span)),
+ AnyValue::Date(d) => {
+ let nanos = nanos_per_day(*d);
+ datetime_from_epoch_nanos(nanos, &None, span)
+ .map(|datetime| Value::date(datetime, span))
+ }
+ AnyValue::Datetime(a, time_unit, tz) => {
+ let nanos = nanos_from_timeunit(*a, *time_unit);
+ datetime_from_epoch_nanos(nanos, tz, span).map(|datetime| Value::date(datetime, span))
+ }
+ AnyValue::Duration(a, time_unit) => {
+ let nanos = match time_unit {
+ TimeUnit::Nanoseconds => *a,
+ TimeUnit::Microseconds => *a * 1_000,
+ TimeUnit::Milliseconds => *a * 1_000_000,
+ };
+ Ok(Value::duration(nanos, span))
+ }
+ // AnyValue::Time represents the current time since midnight.
+ // Unfortunately, there is no timezone related information.
+ // Given this, calculate the current date from UTC and add the time.
+ AnyValue::Time(nanos) => time_from_midnight(*nanos, span),
+ AnyValue::List(series) => {
+ series_to_values(series, None, None, span).map(|values| Value::list(values, span))
+ }
+ AnyValue::Struct(idx, struct_array, s_fields) => {
+ let cols: Vec = s_fields.iter().map(|f| f.name().to_string()).collect();
+ let vals: Result, ShellError> = struct_array
+ .values()
+ .iter()
+ .enumerate()
+ .map(|(pos, v)| {
+ let f = &s_fields[pos];
+ arr_to_value(&f.dtype, &**v, *idx, span)
+ })
+ .collect();
+ let record = Record { cols, vals: vals? };
+ Ok(Value::record(record, span))
+ }
+ AnyValue::StructOwned(struct_tuple) => {
+ let values: Result, ShellError> = struct_tuple
+ .0
+ .iter()
+ .map(|s| any_value_to_value(s, span))
+ .collect();
+ let fields = struct_tuple
+ .1
+ .iter()
+ .map(|f| f.name().to_string())
+ .collect();
+ Ok(Value::Record {
+ val: Record {
+ cols: fields,
+ vals: values?,
+ },
+ internal_span: span,
+ })
+ }
+ AnyValue::Utf8Owned(s) => Ok(Value::string(s.to_string(), span)),
+ AnyValue::Binary(bytes) => Ok(Value::binary(*bytes, span)),
+ AnyValue::BinaryOwned(bytes) => Ok(Value::binary(bytes.to_owned(), span)),
+ e => Err(ShellError::GenericError(
+ "Error creating Value".into(),
+ "".to_string(),
+ None,
+ Some(format!("Value not supported in nushell: {e}")),
+ Vec::new(),
+ )),
+ }
+}
+
+#[inline]
+fn arr_to_value(
+ dt: &DataType,
+ arr: &dyn Array,
+ idx: usize,
+ span: Span,
+) -> Result {
+ macro_rules! downcast {
+ ($casttype:ident) => {{
+ let arr = &*(arr as *const dyn Array as *const $casttype);
+ arr.value_unchecked(idx)
+ }};
+ }
+
+ // Not loving the unsafe here, however this largely based off the one
+ // example I found for converting Array values in:
+ // polars_core::chunked_array::ops::any_value::arr_to_any_value
+ unsafe {
+ match dt {
+ DataType::Boolean => Ok(Value::bool(downcast!(BooleanArray), span)),
+ DataType::UInt8 => Ok(Value::int(downcast!(UInt8Array) as i64, span)),
+ DataType::UInt16 => Ok(Value::int(downcast!(UInt16Array) as i64, span)),
+ DataType::UInt32 => Ok(Value::int(downcast!(UInt32Array) as i64, span)),
+ DataType::UInt64 => Ok(Value::int(downcast!(UInt64Array) as i64, span)),
+ DataType::Int8 => Ok(Value::int(downcast!(Int8Array) as i64, span)),
+ DataType::Int16 => Ok(Value::int(downcast!(Int16Array) as i64, span)),
+ DataType::Int32 => Ok(Value::int(downcast!(Int32Array) as i64, span)),
+ DataType::Int64 => Ok(Value::int(downcast!(Int64Array), span)),
+ DataType::Float32 => Ok(Value::float(downcast!(Float32Array) as f64, span)),
+ DataType::Float64 => Ok(Value::float(downcast!(Float64Array), span)),
+ // DataType::Decimal(_, _) => {}
+ DataType::Utf8 => Ok(Value::string(downcast!(LargeStringArray).to_string(), span)),
+ DataType::Binary => Ok(Value::binary(downcast!(LargeBinaryArray).to_owned(), span)),
+ DataType::Date => {
+ let date = downcast!(Int32Array);
+ let nanos = nanos_per_day(date);
+ datetime_from_epoch_nanos(nanos, &None, span)
+ .map(|datetime| Value::date(datetime, span))
+ }
+ DataType::Datetime(time_unit, tz) => {
+ let nanos = nanos_from_timeunit(downcast!(Int64Array), *time_unit);
+ datetime_from_epoch_nanos(nanos, tz, span)
+ .map(|datetime| Value::date(datetime, span))
+ }
+ // DataType::Duration(_) => {}
+ DataType::Time => {
+ let t = downcast!(Int64Array);
+ time_from_midnight(t, span)
+ }
+ DataType::List(dt) => {
+ let v: ArrayRef = downcast!(LargeListArray);
+ let values_result = if dt.is_primitive() {
+ let s = Series::from_chunks_and_dtype_unchecked("", vec![v], dt);
+ series_to_values(&s, None, None, span)
+ } else {
+ let s = Series::from_chunks_and_dtype_unchecked("", vec![v], &dt.to_physical())
+ .cast_unchecked(dt)
+ .map_err(|e| {
+ ShellError::GenericError(
+ "Error creating Value from polars LargeListArray".into(),
+ e.to_string(),
+ Some(span),
+ None,
+ Vec::new(),
+ )
+ })?;
+ series_to_values(&s, None, None, span)
+ };
+ values_result.map(|values| Value::list(values, span))
+ }
+ DataType::Null => Ok(Value::nothing(span)),
+ DataType::Struct(fields) => {
+ let arr = &*(arr as *const dyn Array as *const StructArray);
+ let vals: Result, ShellError> = arr
+ .values()
+ .iter()
+ .enumerate()
+ .map(|(pos, v)| {
+ let f = &fields[pos];
+ arr_to_value(&f.dtype, &**v, 0, span)
+ })
+ .collect();
+ let cols = fields.iter().map(|f| f.name().to_string()).collect();
+ Ok(Value::record(Record { cols, vals: vals? }, span))
+ }
+ DataType::Unknown => Ok(Value::nothing(span)),
+ _ => Err(ShellError::CantConvert {
+ to_type: dt.to_string(),
+ from_type: "polars array".to_string(),
+ span,
+ help: Some(format!(
+ "Could not convert polars array of type {:?} to value",
+ dt
+ )),
+ }),
+ }
+ }
+}
+
+fn nanos_per_day(days: i32) -> i64 {
+ days as i64 * NANOS_PER_DAY
+}
+
+fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> i64 {
+ a * match time_unit {
+ TimeUnit::Microseconds => 1_000, // Convert microseconds to nanoseconds
+ TimeUnit::Milliseconds => 1_000_000, // Convert milliseconds to nanoseconds
+ TimeUnit::Nanoseconds => 1, // Already in nanoseconds
+ }
+}
+
+fn datetime_from_epoch_nanos(
+ nanos: i64,
+ timezone: &Option,
+ span: Span,
+) -> Result, ShellError> {
+ let tz: Tz = if let Some(polars_tz) = timezone {
+ polars_tz.parse::().map_err(|_| {
+ ShellError::GenericError(
+ format!("Could not parse polars timezone: {polars_tz}"),
+ "".to_string(),
+ Some(span),
+ None,
+ vec![],
+ )
+ })?
+ } else {
+ Tz::UTC
+ };
+
+ Ok(tz.timestamp_nanos(nanos).fixed_offset())
+}
+
+fn time_from_midnight(nanos: i64, span: Span) -> Result {
+ let today = Utc::now().date_naive();
+ NaiveTime::from_hms_opt(0, 0, 0) // midnight
+ .map(|time| time + Duration::nanoseconds(nanos)) // current time
+ .map(|time| today.and_time(time)) // current date and time
+ .and_then(|datetime| {
+ FixedOffset::east_opt(0) // utc
+ .map(|offset| {
+ DateTime::::from_naive_utc_and_offset(datetime, offset)
+ })
+ })
+ .map(|datetime| Value::date(datetime, span)) // current date and time
+ .ok_or(ShellError::CantConvert {
+ to_type: "datetime".to_string(),
+ from_type: "polars time".to_string(),
+ span,
+ help: Some("Could not convert polars time of {nanos} to datetime".to_string()),
+ })
+}
+
#[cfg(test)]
mod tests {
- use super::*;
use indexmap::indexmap;
+ use polars::export::arrow::array::{ListArray, NullArray, PrimitiveArray};
+ use polars::export::arrow::buffer::Buffer;
+ use polars::prelude::Field;
+
+ use super::*;
#[test]
fn test_parsed_column_string_list() -> Result<(), Box> {
@@ -1034,4 +1235,375 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn test_any_value_to_value() -> Result<(), Box> {
+ let span = Span::test_data();
+ assert_eq!(
+ any_value_to_value(&AnyValue::Null, span)?,
+ Value::nothing(span)
+ );
+
+ let test_bool = true;
+ assert_eq!(
+ any_value_to_value(&AnyValue::Boolean(test_bool), span)?,
+ Value::bool(test_bool, span)
+ );
+
+ let test_str = "foo";
+ assert_eq!(
+ any_value_to_value(&AnyValue::Utf8(test_str), span)?,
+ Value::string(test_str.to_string(), span)
+ );
+ assert_eq!(
+ any_value_to_value(&AnyValue::Utf8Owned(test_str.into()), span)?,
+ Value::string(test_str.to_owned(), span)
+ );
+
+ let tests_uint8 = 4;
+ assert_eq!(
+ any_value_to_value(&AnyValue::UInt8(tests_uint8), span)?,
+ Value::int(tests_uint8 as i64, span)
+ );
+
+ let tests_uint16 = 233;
+ assert_eq!(
+ any_value_to_value(&AnyValue::UInt16(tests_uint16), span)?,
+ Value::int(tests_uint16 as i64, span)
+ );
+
+ let tests_uint32 = 897688233;
+ assert_eq!(
+ any_value_to_value(&AnyValue::UInt32(tests_uint32), span)?,
+ Value::int(tests_uint32 as i64, span)
+ );
+
+ let tests_uint64 = 903225135897388233;
+ assert_eq!(
+ any_value_to_value(&AnyValue::UInt64(tests_uint64), span)?,
+ Value::int(tests_uint64 as i64, span)
+ );
+
+ let tests_float32 = 903225135897388233.3223353;
+ assert_eq!(
+ any_value_to_value(&AnyValue::Float32(tests_float32), span)?,
+ Value::float(tests_float32 as f64, span)
+ );
+
+ let tests_float64 = 9064251358973882322333.64233533232;
+ assert_eq!(
+ any_value_to_value(&AnyValue::Float64(tests_float64), span)?,
+ Value::float(tests_float64, span)
+ );
+
+ let test_days = 10_957;
+ let comparison_date = Utc
+ .with_ymd_and_hms(2000, 1, 1, 0, 0, 0)
+ .unwrap()
+ .fixed_offset();
+ assert_eq!(
+ any_value_to_value(&AnyValue::Date(test_days), span)?,
+ Value::date(comparison_date, span)
+ );
+
+ let test_millis = 946_684_800_000;
+ assert_eq!(
+ any_value_to_value(
+ &AnyValue::Datetime(test_millis, TimeUnit::Milliseconds, &None),
+ span
+ )?,
+ Value::date(comparison_date, span)
+ );
+
+ let test_duration_millis = 99_999;
+ let test_duration_micros = 99_999_000;
+ let test_duration_nanos = 99_999_000_000;
+ assert_eq!(
+ any_value_to_value(
+ &AnyValue::Duration(test_duration_nanos, TimeUnit::Nanoseconds),
+ span
+ )?,
+ Value::duration(test_duration_nanos, span)
+ );
+ assert_eq!(
+ any_value_to_value(
+ &AnyValue::Duration(test_duration_micros, TimeUnit::Microseconds),
+ span
+ )?,
+ Value::duration(test_duration_nanos, span)
+ );
+ assert_eq!(
+ any_value_to_value(
+ &AnyValue::Duration(test_duration_millis, TimeUnit::Milliseconds),
+ span
+ )?,
+ Value::duration(test_duration_nanos, span)
+ );
+
+ let test_binary = b"sdf2332f32q3f3afwaf3232f32";
+ assert_eq!(
+ any_value_to_value(&AnyValue::Binary(test_binary), span)?,
+ Value::binary(test_binary.to_vec(), span)
+ );
+ assert_eq!(
+ any_value_to_value(&AnyValue::BinaryOwned(test_binary.to_vec()), span)?,
+ Value::binary(test_binary.to_vec(), span)
+ );
+
+ let test_time_nanos = 54_000_000_000_000;
+ let test_time = DateTime::::from_naive_utc_and_offset(
+ Utc::now()
+ .date_naive()
+ .and_time(NaiveTime::from_hms_opt(15, 00, 00).unwrap()),
+ FixedOffset::east_opt(0).unwrap(),
+ );
+ assert_eq!(
+ any_value_to_value(&AnyValue::Time(test_time_nanos), span)?,
+ Value::date(test_time, span)
+ );
+
+ let test_list_series = Series::new("int series", &[1, 2, 3]);
+ let comparison_list_series = Value::list(
+ vec![
+ Value::int(1, span),
+ Value::int(2, span),
+ Value::int(3, span),
+ ],
+ span,
+ );
+ assert_eq!(
+ any_value_to_value(&AnyValue::List(test_list_series), span)?,
+ comparison_list_series
+ );
+
+ let field_value_0 = AnyValue::Int32(1);
+ let field_value_1 = AnyValue::Boolean(true);
+ let values = vec![field_value_0, field_value_1];
+ let field_name_0 = "num_field";
+ let field_name_1 = "bool_field";
+ let fields = vec![
+ Field::new(field_name_0, DataType::Int32),
+ Field::new(field_name_1, DataType::Boolean),
+ ];
+ let test_owned_struct = AnyValue::StructOwned(Box::new((values, fields.clone())));
+ let comparison_owned_record = Value::record(
+ Record {
+ cols: vec![field_name_0.to_owned(), field_name_1.to_owned()],
+ vals: vec![Value::int(1, span), Value::bool(true, span)],
+ },
+ span,
+ );
+ assert_eq!(
+ any_value_to_value(&test_owned_struct, span)?,
+ comparison_owned_record.clone()
+ );
+
+ let test_int_arr = PrimitiveArray::from([Some(1_i32)]);
+ let test_bool_arr = BooleanArray::from([Some(true)]);
+ let test_struct_arr = StructArray::new(
+ DataType::Struct(fields.clone()).to_arrow(),
+ vec![Box::new(test_int_arr), Box::new(test_bool_arr)],
+ None,
+ );
+ assert_eq!(
+ any_value_to_value(
+ &AnyValue::Struct(0, &test_struct_arr, fields.as_slice()),
+ span
+ )?,
+ comparison_owned_record
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_arr_to_value() -> Result<(), Box> {
+ let test_bool_arr = BooleanArray::from([Some(true)]);
+ assert_eq!(
+ arr_to_value(&DataType::Boolean, &test_bool_arr, 0, Span::test_data())?,
+ Value::bool(true, Span::test_data())
+ );
+
+ let test_uint8_arr = PrimitiveArray::from([Some(9_u8)]);
+ assert_eq!(
+ arr_to_value(&DataType::UInt8, &test_uint8_arr, 0, Span::test_data())?,
+ Value::int(9, Span::test_data())
+ );
+
+ let test_uint16_arr = PrimitiveArray::from([Some(3223_u16)]);
+ assert_eq!(
+ arr_to_value(&DataType::UInt16, &test_uint16_arr, 0, Span::test_data())?,
+ Value::int(3223, Span::test_data())
+ );
+
+ let test_uint32_arr = PrimitiveArray::from([Some(33_u32)]);
+ assert_eq!(
+ arr_to_value(&DataType::UInt32, &test_uint32_arr, 0, Span::test_data())?,
+ Value::int(33, Span::test_data())
+ );
+
+ let test_uint64_arr = PrimitiveArray::from([Some(33_3232_u64)]);
+ assert_eq!(
+ arr_to_value(&DataType::UInt64, &test_uint64_arr, 0, Span::test_data())?,
+ Value::int(33_3232, Span::test_data())
+ );
+
+ let test_int8_arr = PrimitiveArray::from([Some(9_i8)]);
+ assert_eq!(
+ arr_to_value(&DataType::Int8, &test_int8_arr, 0, Span::test_data())?,
+ Value::int(9, Span::test_data())
+ );
+
+ let test_int16_arr = PrimitiveArray::from([Some(3223_i16)]);
+ assert_eq!(
+ arr_to_value(&DataType::Int16, &test_int16_arr, 0, Span::test_data())?,
+ Value::int(3223, Span::test_data())
+ );
+
+ let test_int32_arr = PrimitiveArray::from([Some(33_i32)]);
+ assert_eq!(
+ arr_to_value(&DataType::Int32, &test_int32_arr, 0, Span::test_data())?,
+ Value::int(33, Span::test_data())
+ );
+
+ let test_int64_arr = PrimitiveArray::from([Some(33_3232_i64)]);
+ assert_eq!(
+ arr_to_value(&DataType::Int64, &test_int64_arr, 0, Span::test_data())?,
+ Value::int(33_3232, Span::test_data())
+ );
+
+ let test_float32_arr = PrimitiveArray::from([Some(33.32_f32)]);
+ assert_eq!(
+ arr_to_value(&DataType::Float32, &test_float32_arr, 0, Span::test_data())?,
+ Value::float(33.32_f32 as f64, Span::test_data())
+ );
+
+ let test_float64_arr = PrimitiveArray::from([Some(33_3232.999_f64)]);
+ assert_eq!(
+ arr_to_value(&DataType::Float64, &test_float64_arr, 0, Span::test_data())?,
+ Value::float(33_3232.999, Span::test_data())
+ );
+
+ let test_str = "hello world";
+ let test_str_arr = LargeStringArray::from(vec![Some(test_str.to_string())]);
+ assert_eq!(
+ arr_to_value(&DataType::Utf8, &test_str_arr, 0, Span::test_data())?,
+ Value::string(test_str.to_string(), Span::test_data())
+ );
+
+ let test_bin = b"asdlfkjadsf";
+ let test_bin_arr = LargeBinaryArray::from(vec![Some(test_bin.to_vec())]);
+ assert_eq!(
+ arr_to_value(&DataType::Binary, &test_bin_arr, 0, Span::test_data())?,
+ Value::binary(test_bin.to_vec(), Span::test_data())
+ );
+
+ let test_days = 10_957_i32;
+ let comparison_date = Utc
+ .with_ymd_and_hms(2000, 1, 1, 0, 0, 0)
+ .unwrap()
+ .fixed_offset();
+ let test_date_arr = PrimitiveArray::from([Some(test_days)]);
+ assert_eq!(
+ arr_to_value(&DataType::Date, &test_date_arr, 0, Span::test_data())?,
+ Value::date(comparison_date, Span::test_data())
+ );
+
+ let test_dt_nanos = 1_357_488_900_000_000_000_i64;
+ let test_dt_arr = PrimitiveArray::from([Some(test_dt_nanos)]);
+ let test_dt = Utc.timestamp_nanos(test_dt_nanos).fixed_offset();
+ assert_eq!(
+ arr_to_value(
+ &DataType::Datetime(TimeUnit::Nanoseconds, Some("UTC".to_owned())),
+ &test_dt_arr,
+ 0,
+ Span::test_data()
+ )?,
+ Value::date(test_dt, Span::test_data())
+ );
+
+ let test_time_nanos = 54_000_000_000_000_i64;
+ let test_dt_arr = PrimitiveArray::from([Some(test_time_nanos)]);
+ let test_time = DateTime::::from_naive_utc_and_offset(
+ Utc::now()
+ .date_naive()
+ .and_time(NaiveTime::from_hms_opt(15, 00, 00).unwrap()),
+ FixedOffset::east_opt(0).unwrap(),
+ );
+ assert_eq!(
+ arr_to_value(&DataType::Time, &test_dt_arr, 0, Span::test_data())?,
+ Value::date(test_time, Span::test_data())
+ );
+
+ let values = Buffer::from(vec![1, 2, 3]);
+ let values = PrimitiveArray::::new(DataType::Int64.to_arrow(), values, None);
+ let data_type = ListArray::::default_datatype(DataType::Int64.to_arrow());
+ let array = ListArray::::new(
+ data_type,
+ vec![0, 3].try_into().unwrap(),
+ Box::new(values),
+ None,
+ );
+ let comparison_list_series = Value::list(
+ vec![
+ Value::int(1, Span::test_data()),
+ Value::int(2, Span::test_data()),
+ Value::int(3, Span::test_data()),
+ ],
+ Span::test_data(),
+ );
+ assert_eq!(
+ arr_to_value(
+ &DataType::List(Box::new(DataType::Int64)),
+ &array,
+ 0,
+ Span::test_data()
+ )?,
+ comparison_list_series
+ );
+
+ let field_name_0 = "num_field";
+ let field_name_1 = "bool_field";
+ let fields = vec![
+ Field::new(field_name_0, DataType::Int32),
+ Field::new(field_name_1, DataType::Boolean),
+ ];
+ let test_int_arr = PrimitiveArray::from([Some(1_i32)]);
+ let test_struct_arr = StructArray::new(
+ DataType::Struct(fields.clone()).to_arrow(),
+ vec![Box::new(test_int_arr), Box::new(test_bool_arr)],
+ None,
+ );
+ let comparison_owned_record = Value::record(
+ Record {
+ cols: vec![field_name_0.to_owned(), field_name_1.to_owned()],
+ vals: vec![
+ Value::int(1, Span::test_data()),
+ Value::bool(true, Span::test_data()),
+ ],
+ },
+ Span::test_data(),
+ );
+ assert_eq!(
+ arr_to_value(
+ &DataType::Struct(fields),
+ &test_struct_arr,
+ 0,
+ Span::test_data(),
+ )?,
+ comparison_owned_record
+ );
+
+ assert_eq!(
+ arr_to_value(
+ &DataType::Null,
+ &NullArray::new(DataType::Null.to_arrow(), 0),
+ 0,
+ Span::test_data()
+ )?,
+ Value::nothing(Span::test_data())
+ );
+
+ Ok(())
+ }
}