Support for reading Categorical and Enum types (#15292)

# fixes https://github.com/nushell/nushell/issues/15281

# Description
Provides the ability read dataframes with Categorical and Enum data

The ability to write Categorical and Enum data will provided in a future
PR
This commit is contained in:
Jack Wright 2025-03-12 14:11:00 -07:00 committed by GitHub
parent 9160f36ea5
commit 0f6996b70d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -18,6 +18,7 @@ use polars::prelude::{
};
use nu_protocol::{Record, ShellError, Span, Value};
use polars_arrow::array::Utf8ViewArray;
use polars_arrow::Either;
use crate::dataframe::values::NuSchema;
@ -1196,6 +1197,14 @@ fn series_to_values(
})?;
series_to_values(&casted, maybe_from_row, maybe_size, span)
}
DataType::Categorical(maybe_rev_mapping, _categorical_ordering)
| DataType::Enum(maybe_rev_mapping, _categorical_ordering) => {
if let Some(rev_mapping) = maybe_rev_mapping {
Ok(utf8_view_array_to_value(rev_mapping.get_categories()))
} else {
Ok(vec![])
}
}
e => Err(ShellError::GenericError {
error: "Error creating Dataframe".into(),
msg: "".to_string(),
@ -1266,10 +1275,44 @@ fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result<Value, ShellEr
AnyValue::StringOwned(s) => Ok(Value::string(s.to_string(), span)),
AnyValue::Binary(bytes) => Ok(Value::binary(*bytes, span)),
AnyValue::BinaryOwned(bytes) => Ok(Value::binary(bytes.to_owned(), span)),
AnyValue::Categorical(_, rev_mapping, utf8_array_pointer)
| AnyValue::Enum(_, rev_mapping, utf8_array_pointer) => {
let value: Vec<Value> = if utf8_array_pointer.is_null() {
utf8_view_array_to_value(rev_mapping.get_categories())
} else {
// This is no good way around having an unsafe block here
// as polars is using a raw pointer to the utf8 array
unsafe {
utf8_array_pointer
.get()
.as_ref()
.map(utf8_view_array_to_value)
.unwrap_or_else(Vec::new)
}
};
Ok(Value::list(value, span))
}
AnyValue::CategoricalOwned(_, rev_mapping, utf8_array_pointer)
| AnyValue::EnumOwned(_, rev_mapping, utf8_array_pointer) => {
let value: Vec<Value> = if utf8_array_pointer.is_null() {
utf8_view_array_to_value(rev_mapping.get_categories())
} else {
// This is no good way around having an unsafe block here
// as polars is using a raw pointer to the utf8 array
unsafe {
utf8_array_pointer
.get()
.as_ref()
.map(utf8_view_array_to_value)
.unwrap_or_else(Vec::new)
}
};
Ok(Value::list(value, span))
}
e => Err(ShellError::GenericError {
error: "Error creating Value".into(),
msg: "".to_string(),
span: None,
span: Some(span),
help: Some(format!("Value not supported in nushell: {e}")),
inner: Vec::new(),
}),
@ -1355,6 +1398,16 @@ where
}
}
fn utf8_view_array_to_value(array: &Utf8ViewArray) -> Vec<Value> {
array
.iter()
.map(|x| match x {
Some(s) => Value::string(s.to_string(), Span::unknown()),
None => Value::nothing(Span::unknown()),
})
.collect::<Vec<Value>>()
}
#[cfg(test)]
mod tests {
use indexmap::indexmap;