mirror of
https://github.com/nushell/nushell.git
synced 2025-03-13 15:08:43 +01:00
Support for reading Categorical and Enum types (#15292)
# fixes https://github.com/nushell/nushell/issues/15281 # Description Provides the ability read dataframes with Categorical and Enum data The ability to write Categorical and Enum data will provided in a future PR
This commit is contained in:
parent
9160f36ea5
commit
0f6996b70d
@ -18,6 +18,7 @@ use polars::prelude::{
|
||||
};
|
||||
|
||||
use nu_protocol::{Record, ShellError, Span, Value};
|
||||
use polars_arrow::array::Utf8ViewArray;
|
||||
use polars_arrow::Either;
|
||||
|
||||
use crate::dataframe::values::NuSchema;
|
||||
@ -1196,6 +1197,14 @@ fn series_to_values(
|
||||
})?;
|
||||
series_to_values(&casted, maybe_from_row, maybe_size, span)
|
||||
}
|
||||
DataType::Categorical(maybe_rev_mapping, _categorical_ordering)
|
||||
| DataType::Enum(maybe_rev_mapping, _categorical_ordering) => {
|
||||
if let Some(rev_mapping) = maybe_rev_mapping {
|
||||
Ok(utf8_view_array_to_value(rev_mapping.get_categories()))
|
||||
} else {
|
||||
Ok(vec![])
|
||||
}
|
||||
}
|
||||
e => Err(ShellError::GenericError {
|
||||
error: "Error creating Dataframe".into(),
|
||||
msg: "".to_string(),
|
||||
@ -1266,10 +1275,44 @@ fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result<Value, ShellEr
|
||||
AnyValue::StringOwned(s) => Ok(Value::string(s.to_string(), span)),
|
||||
AnyValue::Binary(bytes) => Ok(Value::binary(*bytes, span)),
|
||||
AnyValue::BinaryOwned(bytes) => Ok(Value::binary(bytes.to_owned(), span)),
|
||||
AnyValue::Categorical(_, rev_mapping, utf8_array_pointer)
|
||||
| AnyValue::Enum(_, rev_mapping, utf8_array_pointer) => {
|
||||
let value: Vec<Value> = if utf8_array_pointer.is_null() {
|
||||
utf8_view_array_to_value(rev_mapping.get_categories())
|
||||
} else {
|
||||
// This is no good way around having an unsafe block here
|
||||
// as polars is using a raw pointer to the utf8 array
|
||||
unsafe {
|
||||
utf8_array_pointer
|
||||
.get()
|
||||
.as_ref()
|
||||
.map(utf8_view_array_to_value)
|
||||
.unwrap_or_else(Vec::new)
|
||||
}
|
||||
};
|
||||
Ok(Value::list(value, span))
|
||||
}
|
||||
AnyValue::CategoricalOwned(_, rev_mapping, utf8_array_pointer)
|
||||
| AnyValue::EnumOwned(_, rev_mapping, utf8_array_pointer) => {
|
||||
let value: Vec<Value> = if utf8_array_pointer.is_null() {
|
||||
utf8_view_array_to_value(rev_mapping.get_categories())
|
||||
} else {
|
||||
// This is no good way around having an unsafe block here
|
||||
// as polars is using a raw pointer to the utf8 array
|
||||
unsafe {
|
||||
utf8_array_pointer
|
||||
.get()
|
||||
.as_ref()
|
||||
.map(utf8_view_array_to_value)
|
||||
.unwrap_or_else(Vec::new)
|
||||
}
|
||||
};
|
||||
Ok(Value::list(value, span))
|
||||
}
|
||||
e => Err(ShellError::GenericError {
|
||||
error: "Error creating Value".into(),
|
||||
msg: "".to_string(),
|
||||
span: None,
|
||||
span: Some(span),
|
||||
help: Some(format!("Value not supported in nushell: {e}")),
|
||||
inner: Vec::new(),
|
||||
}),
|
||||
@ -1355,6 +1398,16 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn utf8_view_array_to_value(array: &Utf8ViewArray) -> Vec<Value> {
|
||||
array
|
||||
.iter()
|
||||
.map(|x| match x {
|
||||
Some(s) => Value::string(s.to_string(), Span::unknown()),
|
||||
None => Value::nothing(Span::unknown()),
|
||||
})
|
||||
.collect::<Vec<Value>>()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use indexmap::indexmap;
|
||||
|
Loading…
Reference in New Issue
Block a user