From fe92051bb377b17f3e9228d7be7755baffbfe55b Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Thu, 9 Nov 2023 17:00:59 -0800 Subject: [PATCH] Adding support for Polars structs (#10943) Provides support for reading Polars structs. This allows opening of supported files (jsonl, parquet, etc) that contain rows with structured data. The following attached json lines file([receipts.jsonl.gz](https://github.com/nushell/nushell/files/13311476/receipts.jsonl.gz)) contains a customer column with structured data. This json lines file can now be loaded via `dfr open` and will render as follows: Screenshot 2023-11-09 at 10 09 18 This also addresses some cleanup of date handling and utilizing timezones where provided. This pull request only addresses reading data from polars structs. I will address converting nushell data to polars structs in a future request as this change is large enough as it is. --------- Co-authored-by: Jack Wright --- Cargo.lock | 1 + crates/nu-cmd-dataframe/Cargo.toml | 1 + .../values/nu_dataframe/conversion.rs | 808 +++++++++++++++--- 3 files changed, 692 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0f062d80f5..bf8e7f1cde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2764,6 +2764,7 @@ name = "nu-cmd-dataframe" version = "0.86.1" dependencies = [ "chrono", + "chrono-tz", "fancy-regex", "indexmap 2.1.0", "nu-cmd-lang", diff --git a/crates/nu-cmd-dataframe/Cargo.toml b/crates/nu-cmd-dataframe/Cargo.toml index d5e7be36fa..7388387adf 100644 --- a/crates/nu-cmd-dataframe/Cargo.toml +++ b/crates/nu-cmd-dataframe/Cargo.toml @@ -19,6 +19,7 @@ nu-protocol = { path = "../nu-protocol", version = "0.86.1" } # Potential dependencies for extras chrono = { version = "0.4", features = ["std", "unstable-locales"], default-features = false } +chrono-tz = "0.8" fancy-regex = "0.11" indexmap = { version = "2.1" } num = { version = "0.4", optional = true } diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs index 028d73a146..4d4516bb77 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs @@ -1,23 +1,31 @@ -use super::{DataFrameValue, NuDataFrame}; +use std::ops::{Deref, DerefMut}; -use chrono::{DateTime, FixedOffset, NaiveDateTime}; +use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc}; +use chrono_tz::Tz; use indexmap::map::{Entry, IndexMap}; -use nu_protocol::{Record, ShellError, Span, Value}; use polars::chunked_array::builder::AnonymousOwnedListBuilder; use polars::chunked_array::object::builder::ObjectChunkedBuilder; use polars::chunked_array::ChunkedArray; +use polars::datatypes::AnyValue; +use polars::export::arrow::array::{ + Array, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, + UInt16Array, UInt32Array, UInt64Array, UInt8Array, +}; use polars::export::arrow::Either; use polars::prelude::{ - DataFrame, DataType, DatetimeChunked, Float64Type, Int64Type, IntoSeries, - ListBooleanChunkedBuilder, ListBuilderTrait, ListPrimitiveChunkedBuilder, ListType, - ListUtf8ChunkedBuilder, NamedFrom, NewChunkedArray, ObjectType, Series, TemporalMethods, - TimeUnit, + ArrayRef, DataFrame, DataType, DatetimeChunked, Float64Type, Int64Type, IntoSeries, + LargeBinaryArray, LargeListArray, LargeStringArray, ListBooleanChunkedBuilder, + ListBuilderTrait, ListPrimitiveChunkedBuilder, ListType, ListUtf8ChunkedBuilder, NamedFrom, + NewChunkedArray, ObjectType, Series, StructArray, TemporalMethods, TimeUnit, }; -use std::ops::{Deref, DerefMut}; -const SECS_PER_DAY: i64 = 86_400; +use nu_protocol::{Record, ShellError, Span, Value}; -// The values capacity is for the size of an internal vec. +use super::{DataFrameValue, NuDataFrame}; + +const NANOS_PER_DAY: i64 = 86_400_000_000_000; + +// The values capacity is for the size of an vec. // Since this is impossible to determine without traversing every value // I just picked one. Since this is for converting back and forth // between nushell tables the values shouldn't be too extremely large for @@ -199,7 +207,7 @@ fn value_to_input_type(value: &Value) -> InputType { Value::Filesize { .. } => InputType::Filesize, Value::List { vals, .. } => { // We need to determined the type inside of the list. - // Since Value::List does not have any kind of internal + // Since Value::List does not have any kind of // type information, we need to look inside the list. // This will cause errors if lists have inconsistent types. // Basically, if a list column needs to be converted to dataframe, @@ -805,28 +813,21 @@ fn series_to_values( )), Some(ca) => { let it = ca.into_iter(); - let values: Vec = - if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) + if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|ca| { + let sublist: Vec = if let Some(ref s) = ca { + series_to_values(s, None, None, Span::unknown())? } else { - Either::Right(it) - } - .map(|ca| { - let sublist = ca - .map(|ref s| { - match series_to_values(s, None, None, Span::unknown()) { - Ok(v) => v, - Err(e) => { - eprintln!("Error list values: {e}"); - vec![] - } - } - }) - .unwrap_or(vec![]); - Value::list(sublist, span) - }) - .collect::>(); - Ok(values) + // empty item + vec![] + }; + Ok(Value::list(sublist, span)) + }) + .collect::, ShellError>>() } } } @@ -849,51 +850,16 @@ fn series_to_values( } .map(|v| match v { Some(a) => { - // elapsed time in day since 1970-01-01 - let seconds = a as i64 * SECS_PER_DAY; - let naive_datetime = match NaiveDateTime::from_timestamp_opt(seconds, 0) { - Some(val) => val, - None => { - return Value::error( - ShellError::UnsupportedInput { - msg: "The given local datetime representation is invalid." - .to_string(), - input: format!("timestamp is {a:?}"), - msg_span: span, - input_span: Span::unknown(), - }, - span, - ) - } - }; - // Zero length offset - let offset = match FixedOffset::east_opt(0) { - Some(val) => val, - None => { - return Value::error( - ShellError::UnsupportedInput { - msg: "The given local datetime representation is invalid." - .to_string(), - input: format!("timestamp is {a:?}"), - msg_span: span, - input_span: Span::unknown(), - }, - span, - ) - } - }; - let datetime = - DateTime::::from_naive_utc_and_offset(naive_datetime, offset); - - Value::date(datetime, span) + let nanos = nanos_per_day(a); + let datetime = datetime_from_epoch_nanos(nanos, &None, span)?; + Ok(Value::date(datetime, span)) } - None => Value::nothing(span), + None => Ok(Value::nothing(span)), }) - .collect::>(); - + .collect::, ShellError>>()?; Ok(values) } - DataType::Datetime(time_unit, _) => { + DataType::Datetime(time_unit, tz) => { let casted = series.datetime().map_err(|e| { ShellError::GenericError( "Error casting column to datetime".into(), @@ -912,55 +878,48 @@ fn series_to_values( } .map(|v| match v { Some(a) => { - let unit_divisor = match time_unit { - TimeUnit::Nanoseconds => 1_000_000_000, - TimeUnit::Microseconds => 1_000_000, - TimeUnit::Milliseconds => 1_000, - }; // elapsed time in nano/micro/milliseconds since 1970-01-01 - let seconds = a / unit_divisor; - let naive_datetime = match NaiveDateTime::from_timestamp_opt(seconds, 0) { - Some(val) => val, - None => { - return Value::error( - ShellError::UnsupportedInput { - msg: "The given local datetime representation is invalid." - .to_string(), - input: format!("timestamp is {a:?}"), - msg_span: span, - input_span: Span::unknown(), - }, - span, - ) - } - }; - // Zero length offset - let offset = match FixedOffset::east_opt(0) { - Some(val) => val, - None => { - return Value::error( - ShellError::UnsupportedInput { - msg: "The given local datetime representation is invalid." - .to_string(), - input: format!("timestamp is {a:?}"), - msg_span: span, - input_span: Span::unknown(), - }, - span, - ) - } - }; - let datetime = - DateTime::::from_naive_utc_and_offset(naive_datetime, offset); - - Value::date(datetime, span) + let nanos = nanos_from_timeunit(a, *time_unit); + let datetime = datetime_from_epoch_nanos(nanos, tz, span)?; + Ok(Value::date(datetime, span)) } - None => Value::nothing(span), + None => Ok(Value::nothing(span)), }) - .collect::>(); - + .collect::, ShellError>>()?; Ok(values) } + DataType::Struct(polar_fields) => { + let casted = series.struct_().map_err(|e| { + ShellError::GenericError( + "Error casting column to struct".into(), + "".to_string(), + None, + Some(e.to_string()), + Vec::new(), + ) + })?; + let it = casted.into_iter(); + let values: Result, ShellError> = + if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|any_values| { + let vals: Result, ShellError> = any_values + .iter() + .map(|v| any_value_to_value(v, span)) + .collect(); + let cols: Vec = polar_fields + .iter() + .map(|field| field.name.to_string()) + .collect(); + let record = Record { cols, vals: vals? }; + Ok(Value::record(record, span)) + }) + .collect(); + values + } DataType::Time => { let casted = series.timestamp(TimeUnit::Nanoseconds).map_err(|e| { ShellError::GenericError( @@ -996,10 +955,252 @@ fn series_to_values( } } +fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result { + match any_value { + AnyValue::Null => Ok(Value::nothing(span)), + AnyValue::Boolean(b) => Ok(Value::bool(*b, span)), + AnyValue::Utf8(s) => Ok(Value::string(s.to_string(), span)), + AnyValue::UInt8(i) => Ok(Value::int(*i as i64, span)), + AnyValue::UInt16(i) => Ok(Value::int(*i as i64, span)), + AnyValue::UInt32(i) => Ok(Value::int(*i as i64, span)), + AnyValue::UInt64(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int8(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int16(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int32(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int64(i) => Ok(Value::int(*i, span)), + AnyValue::Float32(f) => Ok(Value::float(*f as f64, span)), + AnyValue::Float64(f) => Ok(Value::float(*f, span)), + AnyValue::Date(d) => { + let nanos = nanos_per_day(*d); + datetime_from_epoch_nanos(nanos, &None, span) + .map(|datetime| Value::date(datetime, span)) + } + AnyValue::Datetime(a, time_unit, tz) => { + let nanos = nanos_from_timeunit(*a, *time_unit); + datetime_from_epoch_nanos(nanos, tz, span).map(|datetime| Value::date(datetime, span)) + } + AnyValue::Duration(a, time_unit) => { + let nanos = match time_unit { + TimeUnit::Nanoseconds => *a, + TimeUnit::Microseconds => *a * 1_000, + TimeUnit::Milliseconds => *a * 1_000_000, + }; + Ok(Value::duration(nanos, span)) + } + // AnyValue::Time represents the current time since midnight. + // Unfortunately, there is no timezone related information. + // Given this, calculate the current date from UTC and add the time. + AnyValue::Time(nanos) => time_from_midnight(*nanos, span), + AnyValue::List(series) => { + series_to_values(series, None, None, span).map(|values| Value::list(values, span)) + } + AnyValue::Struct(idx, struct_array, s_fields) => { + let cols: Vec = s_fields.iter().map(|f| f.name().to_string()).collect(); + let vals: Result, ShellError> = struct_array + .values() + .iter() + .enumerate() + .map(|(pos, v)| { + let f = &s_fields[pos]; + arr_to_value(&f.dtype, &**v, *idx, span) + }) + .collect(); + let record = Record { cols, vals: vals? }; + Ok(Value::record(record, span)) + } + AnyValue::StructOwned(struct_tuple) => { + let values: Result, ShellError> = struct_tuple + .0 + .iter() + .map(|s| any_value_to_value(s, span)) + .collect(); + let fields = struct_tuple + .1 + .iter() + .map(|f| f.name().to_string()) + .collect(); + Ok(Value::Record { + val: Record { + cols: fields, + vals: values?, + }, + internal_span: span, + }) + } + AnyValue::Utf8Owned(s) => Ok(Value::string(s.to_string(), span)), + AnyValue::Binary(bytes) => Ok(Value::binary(*bytes, span)), + AnyValue::BinaryOwned(bytes) => Ok(Value::binary(bytes.to_owned(), span)), + e => Err(ShellError::GenericError( + "Error creating Value".into(), + "".to_string(), + None, + Some(format!("Value not supported in nushell: {e}")), + Vec::new(), + )), + } +} + +#[inline] +fn arr_to_value( + dt: &DataType, + arr: &dyn Array, + idx: usize, + span: Span, +) -> Result { + macro_rules! downcast { + ($casttype:ident) => {{ + let arr = &*(arr as *const dyn Array as *const $casttype); + arr.value_unchecked(idx) + }}; + } + + // Not loving the unsafe here, however this largely based off the one + // example I found for converting Array values in: + // polars_core::chunked_array::ops::any_value::arr_to_any_value + unsafe { + match dt { + DataType::Boolean => Ok(Value::bool(downcast!(BooleanArray), span)), + DataType::UInt8 => Ok(Value::int(downcast!(UInt8Array) as i64, span)), + DataType::UInt16 => Ok(Value::int(downcast!(UInt16Array) as i64, span)), + DataType::UInt32 => Ok(Value::int(downcast!(UInt32Array) as i64, span)), + DataType::UInt64 => Ok(Value::int(downcast!(UInt64Array) as i64, span)), + DataType::Int8 => Ok(Value::int(downcast!(Int8Array) as i64, span)), + DataType::Int16 => Ok(Value::int(downcast!(Int16Array) as i64, span)), + DataType::Int32 => Ok(Value::int(downcast!(Int32Array) as i64, span)), + DataType::Int64 => Ok(Value::int(downcast!(Int64Array), span)), + DataType::Float32 => Ok(Value::float(downcast!(Float32Array) as f64, span)), + DataType::Float64 => Ok(Value::float(downcast!(Float64Array), span)), + // DataType::Decimal(_, _) => {} + DataType::Utf8 => Ok(Value::string(downcast!(LargeStringArray).to_string(), span)), + DataType::Binary => Ok(Value::binary(downcast!(LargeBinaryArray).to_owned(), span)), + DataType::Date => { + let date = downcast!(Int32Array); + let nanos = nanos_per_day(date); + datetime_from_epoch_nanos(nanos, &None, span) + .map(|datetime| Value::date(datetime, span)) + } + DataType::Datetime(time_unit, tz) => { + let nanos = nanos_from_timeunit(downcast!(Int64Array), *time_unit); + datetime_from_epoch_nanos(nanos, tz, span) + .map(|datetime| Value::date(datetime, span)) + } + // DataType::Duration(_) => {} + DataType::Time => { + let t = downcast!(Int64Array); + time_from_midnight(t, span) + } + DataType::List(dt) => { + let v: ArrayRef = downcast!(LargeListArray); + let values_result = if dt.is_primitive() { + let s = Series::from_chunks_and_dtype_unchecked("", vec![v], dt); + series_to_values(&s, None, None, span) + } else { + let s = Series::from_chunks_and_dtype_unchecked("", vec![v], &dt.to_physical()) + .cast_unchecked(dt) + .map_err(|e| { + ShellError::GenericError( + "Error creating Value from polars LargeListArray".into(), + e.to_string(), + Some(span), + None, + Vec::new(), + ) + })?; + series_to_values(&s, None, None, span) + }; + values_result.map(|values| Value::list(values, span)) + } + DataType::Null => Ok(Value::nothing(span)), + DataType::Struct(fields) => { + let arr = &*(arr as *const dyn Array as *const StructArray); + let vals: Result, ShellError> = arr + .values() + .iter() + .enumerate() + .map(|(pos, v)| { + let f = &fields[pos]; + arr_to_value(&f.dtype, &**v, 0, span) + }) + .collect(); + let cols = fields.iter().map(|f| f.name().to_string()).collect(); + Ok(Value::record(Record { cols, vals: vals? }, span)) + } + DataType::Unknown => Ok(Value::nothing(span)), + _ => Err(ShellError::CantConvert { + to_type: dt.to_string(), + from_type: "polars array".to_string(), + span, + help: Some(format!( + "Could not convert polars array of type {:?} to value", + dt + )), + }), + } + } +} + +fn nanos_per_day(days: i32) -> i64 { + days as i64 * NANOS_PER_DAY +} + +fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> i64 { + a * match time_unit { + TimeUnit::Microseconds => 1_000, // Convert microseconds to nanoseconds + TimeUnit::Milliseconds => 1_000_000, // Convert milliseconds to nanoseconds + TimeUnit::Nanoseconds => 1, // Already in nanoseconds + } +} + +fn datetime_from_epoch_nanos( + nanos: i64, + timezone: &Option, + span: Span, +) -> Result, ShellError> { + let tz: Tz = if let Some(polars_tz) = timezone { + polars_tz.parse::().map_err(|_| { + ShellError::GenericError( + format!("Could not parse polars timezone: {polars_tz}"), + "".to_string(), + Some(span), + None, + vec![], + ) + })? + } else { + Tz::UTC + }; + + Ok(tz.timestamp_nanos(nanos).fixed_offset()) +} + +fn time_from_midnight(nanos: i64, span: Span) -> Result { + let today = Utc::now().date_naive(); + NaiveTime::from_hms_opt(0, 0, 0) // midnight + .map(|time| time + Duration::nanoseconds(nanos)) // current time + .map(|time| today.and_time(time)) // current date and time + .and_then(|datetime| { + FixedOffset::east_opt(0) // utc + .map(|offset| { + DateTime::::from_naive_utc_and_offset(datetime, offset) + }) + }) + .map(|datetime| Value::date(datetime, span)) // current date and time + .ok_or(ShellError::CantConvert { + to_type: "datetime".to_string(), + from_type: "polars time".to_string(), + span, + help: Some("Could not convert polars time of {nanos} to datetime".to_string()), + }) +} + #[cfg(test)] mod tests { - use super::*; use indexmap::indexmap; + use polars::export::arrow::array::{ListArray, NullArray, PrimitiveArray}; + use polars::export::arrow::buffer::Buffer; + use polars::prelude::Field; + + use super::*; #[test] fn test_parsed_column_string_list() -> Result<(), Box> { @@ -1034,4 +1235,375 @@ mod tests { Ok(()) } + + #[test] + fn test_any_value_to_value() -> Result<(), Box> { + let span = Span::test_data(); + assert_eq!( + any_value_to_value(&AnyValue::Null, span)?, + Value::nothing(span) + ); + + let test_bool = true; + assert_eq!( + any_value_to_value(&AnyValue::Boolean(test_bool), span)?, + Value::bool(test_bool, span) + ); + + let test_str = "foo"; + assert_eq!( + any_value_to_value(&AnyValue::Utf8(test_str), span)?, + Value::string(test_str.to_string(), span) + ); + assert_eq!( + any_value_to_value(&AnyValue::Utf8Owned(test_str.into()), span)?, + Value::string(test_str.to_owned(), span) + ); + + let tests_uint8 = 4; + assert_eq!( + any_value_to_value(&AnyValue::UInt8(tests_uint8), span)?, + Value::int(tests_uint8 as i64, span) + ); + + let tests_uint16 = 233; + assert_eq!( + any_value_to_value(&AnyValue::UInt16(tests_uint16), span)?, + Value::int(tests_uint16 as i64, span) + ); + + let tests_uint32 = 897688233; + assert_eq!( + any_value_to_value(&AnyValue::UInt32(tests_uint32), span)?, + Value::int(tests_uint32 as i64, span) + ); + + let tests_uint64 = 903225135897388233; + assert_eq!( + any_value_to_value(&AnyValue::UInt64(tests_uint64), span)?, + Value::int(tests_uint64 as i64, span) + ); + + let tests_float32 = 903225135897388233.3223353; + assert_eq!( + any_value_to_value(&AnyValue::Float32(tests_float32), span)?, + Value::float(tests_float32 as f64, span) + ); + + let tests_float64 = 9064251358973882322333.64233533232; + assert_eq!( + any_value_to_value(&AnyValue::Float64(tests_float64), span)?, + Value::float(tests_float64, span) + ); + + let test_days = 10_957; + let comparison_date = Utc + .with_ymd_and_hms(2000, 1, 1, 0, 0, 0) + .unwrap() + .fixed_offset(); + assert_eq!( + any_value_to_value(&AnyValue::Date(test_days), span)?, + Value::date(comparison_date, span) + ); + + let test_millis = 946_684_800_000; + assert_eq!( + any_value_to_value( + &AnyValue::Datetime(test_millis, TimeUnit::Milliseconds, &None), + span + )?, + Value::date(comparison_date, span) + ); + + let test_duration_millis = 99_999; + let test_duration_micros = 99_999_000; + let test_duration_nanos = 99_999_000_000; + assert_eq!( + any_value_to_value( + &AnyValue::Duration(test_duration_nanos, TimeUnit::Nanoseconds), + span + )?, + Value::duration(test_duration_nanos, span) + ); + assert_eq!( + any_value_to_value( + &AnyValue::Duration(test_duration_micros, TimeUnit::Microseconds), + span + )?, + Value::duration(test_duration_nanos, span) + ); + assert_eq!( + any_value_to_value( + &AnyValue::Duration(test_duration_millis, TimeUnit::Milliseconds), + span + )?, + Value::duration(test_duration_nanos, span) + ); + + let test_binary = b"sdf2332f32q3f3afwaf3232f32"; + assert_eq!( + any_value_to_value(&AnyValue::Binary(test_binary), span)?, + Value::binary(test_binary.to_vec(), span) + ); + assert_eq!( + any_value_to_value(&AnyValue::BinaryOwned(test_binary.to_vec()), span)?, + Value::binary(test_binary.to_vec(), span) + ); + + let test_time_nanos = 54_000_000_000_000; + let test_time = DateTime::::from_naive_utc_and_offset( + Utc::now() + .date_naive() + .and_time(NaiveTime::from_hms_opt(15, 00, 00).unwrap()), + FixedOffset::east_opt(0).unwrap(), + ); + assert_eq!( + any_value_to_value(&AnyValue::Time(test_time_nanos), span)?, + Value::date(test_time, span) + ); + + let test_list_series = Series::new("int series", &[1, 2, 3]); + let comparison_list_series = Value::list( + vec![ + Value::int(1, span), + Value::int(2, span), + Value::int(3, span), + ], + span, + ); + assert_eq!( + any_value_to_value(&AnyValue::List(test_list_series), span)?, + comparison_list_series + ); + + let field_value_0 = AnyValue::Int32(1); + let field_value_1 = AnyValue::Boolean(true); + let values = vec![field_value_0, field_value_1]; + let field_name_0 = "num_field"; + let field_name_1 = "bool_field"; + let fields = vec![ + Field::new(field_name_0, DataType::Int32), + Field::new(field_name_1, DataType::Boolean), + ]; + let test_owned_struct = AnyValue::StructOwned(Box::new((values, fields.clone()))); + let comparison_owned_record = Value::record( + Record { + cols: vec![field_name_0.to_owned(), field_name_1.to_owned()], + vals: vec![Value::int(1, span), Value::bool(true, span)], + }, + span, + ); + assert_eq!( + any_value_to_value(&test_owned_struct, span)?, + comparison_owned_record.clone() + ); + + let test_int_arr = PrimitiveArray::from([Some(1_i32)]); + let test_bool_arr = BooleanArray::from([Some(true)]); + let test_struct_arr = StructArray::new( + DataType::Struct(fields.clone()).to_arrow(), + vec![Box::new(test_int_arr), Box::new(test_bool_arr)], + None, + ); + assert_eq!( + any_value_to_value( + &AnyValue::Struct(0, &test_struct_arr, fields.as_slice()), + span + )?, + comparison_owned_record + ); + + Ok(()) + } + + #[test] + fn test_arr_to_value() -> Result<(), Box> { + let test_bool_arr = BooleanArray::from([Some(true)]); + assert_eq!( + arr_to_value(&DataType::Boolean, &test_bool_arr, 0, Span::test_data())?, + Value::bool(true, Span::test_data()) + ); + + let test_uint8_arr = PrimitiveArray::from([Some(9_u8)]); + assert_eq!( + arr_to_value(&DataType::UInt8, &test_uint8_arr, 0, Span::test_data())?, + Value::int(9, Span::test_data()) + ); + + let test_uint16_arr = PrimitiveArray::from([Some(3223_u16)]); + assert_eq!( + arr_to_value(&DataType::UInt16, &test_uint16_arr, 0, Span::test_data())?, + Value::int(3223, Span::test_data()) + ); + + let test_uint32_arr = PrimitiveArray::from([Some(33_u32)]); + assert_eq!( + arr_to_value(&DataType::UInt32, &test_uint32_arr, 0, Span::test_data())?, + Value::int(33, Span::test_data()) + ); + + let test_uint64_arr = PrimitiveArray::from([Some(33_3232_u64)]); + assert_eq!( + arr_to_value(&DataType::UInt64, &test_uint64_arr, 0, Span::test_data())?, + Value::int(33_3232, Span::test_data()) + ); + + let test_int8_arr = PrimitiveArray::from([Some(9_i8)]); + assert_eq!( + arr_to_value(&DataType::Int8, &test_int8_arr, 0, Span::test_data())?, + Value::int(9, Span::test_data()) + ); + + let test_int16_arr = PrimitiveArray::from([Some(3223_i16)]); + assert_eq!( + arr_to_value(&DataType::Int16, &test_int16_arr, 0, Span::test_data())?, + Value::int(3223, Span::test_data()) + ); + + let test_int32_arr = PrimitiveArray::from([Some(33_i32)]); + assert_eq!( + arr_to_value(&DataType::Int32, &test_int32_arr, 0, Span::test_data())?, + Value::int(33, Span::test_data()) + ); + + let test_int64_arr = PrimitiveArray::from([Some(33_3232_i64)]); + assert_eq!( + arr_to_value(&DataType::Int64, &test_int64_arr, 0, Span::test_data())?, + Value::int(33_3232, Span::test_data()) + ); + + let test_float32_arr = PrimitiveArray::from([Some(33.32_f32)]); + assert_eq!( + arr_to_value(&DataType::Float32, &test_float32_arr, 0, Span::test_data())?, + Value::float(33.32_f32 as f64, Span::test_data()) + ); + + let test_float64_arr = PrimitiveArray::from([Some(33_3232.999_f64)]); + assert_eq!( + arr_to_value(&DataType::Float64, &test_float64_arr, 0, Span::test_data())?, + Value::float(33_3232.999, Span::test_data()) + ); + + let test_str = "hello world"; + let test_str_arr = LargeStringArray::from(vec![Some(test_str.to_string())]); + assert_eq!( + arr_to_value(&DataType::Utf8, &test_str_arr, 0, Span::test_data())?, + Value::string(test_str.to_string(), Span::test_data()) + ); + + let test_bin = b"asdlfkjadsf"; + let test_bin_arr = LargeBinaryArray::from(vec![Some(test_bin.to_vec())]); + assert_eq!( + arr_to_value(&DataType::Binary, &test_bin_arr, 0, Span::test_data())?, + Value::binary(test_bin.to_vec(), Span::test_data()) + ); + + let test_days = 10_957_i32; + let comparison_date = Utc + .with_ymd_and_hms(2000, 1, 1, 0, 0, 0) + .unwrap() + .fixed_offset(); + let test_date_arr = PrimitiveArray::from([Some(test_days)]); + assert_eq!( + arr_to_value(&DataType::Date, &test_date_arr, 0, Span::test_data())?, + Value::date(comparison_date, Span::test_data()) + ); + + let test_dt_nanos = 1_357_488_900_000_000_000_i64; + let test_dt_arr = PrimitiveArray::from([Some(test_dt_nanos)]); + let test_dt = Utc.timestamp_nanos(test_dt_nanos).fixed_offset(); + assert_eq!( + arr_to_value( + &DataType::Datetime(TimeUnit::Nanoseconds, Some("UTC".to_owned())), + &test_dt_arr, + 0, + Span::test_data() + )?, + Value::date(test_dt, Span::test_data()) + ); + + let test_time_nanos = 54_000_000_000_000_i64; + let test_dt_arr = PrimitiveArray::from([Some(test_time_nanos)]); + let test_time = DateTime::::from_naive_utc_and_offset( + Utc::now() + .date_naive() + .and_time(NaiveTime::from_hms_opt(15, 00, 00).unwrap()), + FixedOffset::east_opt(0).unwrap(), + ); + assert_eq!( + arr_to_value(&DataType::Time, &test_dt_arr, 0, Span::test_data())?, + Value::date(test_time, Span::test_data()) + ); + + let values = Buffer::from(vec![1, 2, 3]); + let values = PrimitiveArray::::new(DataType::Int64.to_arrow(), values, None); + let data_type = ListArray::::default_datatype(DataType::Int64.to_arrow()); + let array = ListArray::::new( + data_type, + vec![0, 3].try_into().unwrap(), + Box::new(values), + None, + ); + let comparison_list_series = Value::list( + vec![ + Value::int(1, Span::test_data()), + Value::int(2, Span::test_data()), + Value::int(3, Span::test_data()), + ], + Span::test_data(), + ); + assert_eq!( + arr_to_value( + &DataType::List(Box::new(DataType::Int64)), + &array, + 0, + Span::test_data() + )?, + comparison_list_series + ); + + let field_name_0 = "num_field"; + let field_name_1 = "bool_field"; + let fields = vec![ + Field::new(field_name_0, DataType::Int32), + Field::new(field_name_1, DataType::Boolean), + ]; + let test_int_arr = PrimitiveArray::from([Some(1_i32)]); + let test_struct_arr = StructArray::new( + DataType::Struct(fields.clone()).to_arrow(), + vec![Box::new(test_int_arr), Box::new(test_bool_arr)], + None, + ); + let comparison_owned_record = Value::record( + Record { + cols: vec![field_name_0.to_owned(), field_name_1.to_owned()], + vals: vec![ + Value::int(1, Span::test_data()), + Value::bool(true, Span::test_data()), + ], + }, + Span::test_data(), + ); + assert_eq!( + arr_to_value( + &DataType::Struct(fields), + &test_struct_arr, + 0, + Span::test_data(), + )?, + comparison_owned_record + ); + + assert_eq!( + arr_to_value( + &DataType::Null, + &NullArray::new(DataType::Null.to_arrow(), 0), + 0, + Span::test_data() + )?, + Value::nothing(Span::test_data()) + ); + + Ok(()) + } }