From 5fa9d7650073c1dc764c16ea565bd26c0fdec747 Mon Sep 17 00:00:00 2001 From: Skyler Hawthorne Date: Mon, 23 Sep 2024 07:28:41 -0400 Subject: [PATCH] polars: add binary type support (#13830) # Description This adds support for reading and writing binary types in the polars commands. The `BinaryOffset` type can be read into a Nushell native `Value` type no problem, but unfortunately this is a lossy conversion, as there's no Nushell-native semantic equivalent to the fixed size binary type in Arrow. # User-Facing Changes `polars open` and `polars save` now work with binary types. --- .../values/nu_dataframe/conversion.rs | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs index 2e988a08b3..c01b0ae834 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs @@ -12,8 +12,8 @@ use polars::prelude::{ DataFrame, DataType, DatetimeChunked, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder, ListBuilderTrait, ListPrimitiveChunkedBuilder, ListStringChunkedBuilder, ListType, NamedFrom, NewChunkedArray, - ObjectType, Schema, Series, StructChunked, TemporalMethods, TimeUnit, UInt16Type, UInt32Type, - UInt64Type, UInt8Type, + ObjectType, PolarsError, Schema, Series, StructChunked, TemporalMethods, TimeUnit, UInt16Type, + UInt32Type, UInt64Type, UInt8Type, }; use nu_protocol::{Record, ShellError, Span, Value}; @@ -237,6 +237,7 @@ pub fn insert_value( | (Value::Bool { .. }, Value::Bool { .. }) | (Value::Date { .. }, Value::Date { .. }) | (Value::Filesize { .. }, Value::Filesize { .. }) + | (Value::Binary { .. }, Value::Binary { .. }) | (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value), (_, Value::Nothing { .. }) => col_val.values.push(value), (Value::List { .. }, _) => { @@ -261,6 +262,7 @@ fn value_to_data_type(value: &Value) -> DataType { Value::Date { .. } => DataType::Date, Value::Duration { .. } => DataType::Duration(TimeUnit::Nanoseconds), Value::Filesize { .. } => DataType::Int64, + Value::Binary { .. } => DataType::Binary, Value::List { vals, .. } => { // We need to determined the type inside of the list. // Since Value::List does not have any kind of @@ -404,6 +406,11 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result { + let series_values: Result, _> = + column.values.iter().map(|v| v.coerce_binary()).collect(); + Ok(Series::new(name, series_values?)) + } DataType::Object(_, _) => value_to_series(name, &column.values), DataType::Duration(time_unit) => { let series_values: Result, _> = column @@ -965,6 +972,34 @@ fn series_to_values( Ok(values) } + t @ (DataType::Binary | DataType::BinaryOffset) => { + let make_err = |e: PolarsError| ShellError::GenericError { + error: "Error casting column to binary".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + }; + + let it = match t { + DataType::Binary => series.binary().map_err(make_err)?.into_iter(), + DataType::BinaryOffset => series.binary_offset().map_err(make_err)?.into_iter(), + _ => unreachable!(), + }; + + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(b) => Value::binary(b, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } DataType::Object(x, _) => { let casted = series .as_any()