From 0f6996b70d9dd1dc99b45c9996e779e939c8eee0 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Wed, 12 Mar 2025 14:11:00 -0700 Subject: [PATCH] Support for reading Categorical and Enum types (#15292) # fixes https://github.com/nushell/nushell/issues/15281 # Description Provides the ability read dataframes with Categorical and Enum data The ability to write Categorical and Enum data will provided in a future PR --- .../values/nu_dataframe/conversion.rs | 55 ++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs index ad488b02d4..82051f7b1b 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs @@ -18,6 +18,7 @@ use polars::prelude::{ }; use nu_protocol::{Record, ShellError, Span, Value}; +use polars_arrow::array::Utf8ViewArray; use polars_arrow::Either; use crate::dataframe::values::NuSchema; @@ -1196,6 +1197,14 @@ fn series_to_values( })?; series_to_values(&casted, maybe_from_row, maybe_size, span) } + DataType::Categorical(maybe_rev_mapping, _categorical_ordering) + | DataType::Enum(maybe_rev_mapping, _categorical_ordering) => { + if let Some(rev_mapping) = maybe_rev_mapping { + Ok(utf8_view_array_to_value(rev_mapping.get_categories())) + } else { + Ok(vec![]) + } + } e => Err(ShellError::GenericError { error: "Error creating Dataframe".into(), msg: "".to_string(), @@ -1266,10 +1275,44 @@ fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result Ok(Value::string(s.to_string(), span)), AnyValue::Binary(bytes) => Ok(Value::binary(*bytes, span)), AnyValue::BinaryOwned(bytes) => Ok(Value::binary(bytes.to_owned(), span)), + AnyValue::Categorical(_, rev_mapping, utf8_array_pointer) + | AnyValue::Enum(_, rev_mapping, utf8_array_pointer) => { + let value: Vec = if utf8_array_pointer.is_null() { + utf8_view_array_to_value(rev_mapping.get_categories()) + } else { + // This is no good way around having an unsafe block here + // as polars is using a raw pointer to the utf8 array + unsafe { + utf8_array_pointer + .get() + .as_ref() + .map(utf8_view_array_to_value) + .unwrap_or_else(Vec::new) + } + }; + Ok(Value::list(value, span)) + } + AnyValue::CategoricalOwned(_, rev_mapping, utf8_array_pointer) + | AnyValue::EnumOwned(_, rev_mapping, utf8_array_pointer) => { + let value: Vec = if utf8_array_pointer.is_null() { + utf8_view_array_to_value(rev_mapping.get_categories()) + } else { + // This is no good way around having an unsafe block here + // as polars is using a raw pointer to the utf8 array + unsafe { + utf8_array_pointer + .get() + .as_ref() + .map(utf8_view_array_to_value) + .unwrap_or_else(Vec::new) + } + }; + Ok(Value::list(value, span)) + } e => Err(ShellError::GenericError { error: "Error creating Value".into(), msg: "".to_string(), - span: None, + span: Some(span), help: Some(format!("Value not supported in nushell: {e}")), inner: Vec::new(), }), @@ -1355,6 +1398,16 @@ where } } +fn utf8_view_array_to_value(array: &Utf8ViewArray) -> Vec { + array + .iter() + .map(|x| match x { + Some(s) => Value::string(s.to_string(), Span::unknown()), + None => Value::nothing(Span::unknown()), + }) + .collect::>() +} + #[cfg(test)] mod tests { use indexmap::indexmap;