mirror of
https://github.com/nushell/nushell.git
synced 2025-01-22 06:08:47 +01:00
Improved null handling when converting from nu -> dataframe. (#13855)
# Description Fixes: #12726 and #13185 Previously converting columns that contained null caused polars to force a dtype of object even when using a schema. Now: 1. When using a schema, the type the schema defines for the column will always be used. 2. When a schema is not used, the previous type is used when a value is null. # User-Facing Changes - The type defined by the schema we be respected when passing in a null value `[a]; [null] | polars into-df -s {a: str}` will create a df with an str dtype column with one null value versus a column of type object. - *BREAKING CHANGE* If you define a schema, all columns must be in the schema.
This commit is contained in:
parent
9ca0fb772d
commit
af77bc60e2
@ -32,12 +32,9 @@ impl PluginCommand for ExprCount {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
// to add an example with a result that contains a null we will need to be able to
|
||||
// allow null values to be entered into the dataframe from nushell
|
||||
// and retain the correct dtype. Right now null values cause the dtype to be object
|
||||
vec![Example {
|
||||
description: "Count the number of non-null values in a column",
|
||||
example: r#"[[a]; ["foo"] ["bar"]] | polars into-df
|
||||
example: r#"[[a]; ["foo"] ["bar"] [null]] | polars into-df
|
||||
| polars select (polars col a | polars count)
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
|
@ -71,8 +71,11 @@ pub struct Column {
|
||||
}
|
||||
|
||||
impl Column {
|
||||
pub fn new(name: String, values: Vec<Value>) -> Self {
|
||||
Self { name, values }
|
||||
pub fn new(name: impl Into<String>, values: Vec<Value>) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_empty(name: String) -> Self {
|
||||
@ -149,7 +152,7 @@ pub fn create_column(
|
||||
) -> Result<Column, ShellError> {
|
||||
let size = to_row - from_row;
|
||||
let values = series_to_values(series, Some(from_row), Some(size), span)?;
|
||||
Ok(Column::new(series.name().into(), values))
|
||||
Ok(Column::new(series.name(), values))
|
||||
}
|
||||
|
||||
// Adds a separator to the vector of values using the column names from the
|
||||
@ -194,9 +197,25 @@ pub fn insert_value(
|
||||
Entry::Occupied(entry) => entry.into_mut(),
|
||||
};
|
||||
|
||||
// If we have a schema, use that for determining how things should be added to each column
|
||||
if let Some(schema) = maybe_schema {
|
||||
if let Some(field) = schema.schema.get_field(&key) {
|
||||
col_val.column_type = Some(field.data_type().clone());
|
||||
col_val.values.push(value);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ShellError::GenericError {
|
||||
error: format!("Schema does not contain column: {key}"),
|
||||
msg: "".into(),
|
||||
span: Some(value.span()),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})
|
||||
}
|
||||
}
|
||||
// Checking that the type for the value is the same
|
||||
// for the previous value in the column
|
||||
if col_val.values.is_empty() {
|
||||
else if col_val.values.is_empty() {
|
||||
if let Some(schema) = maybe_schema {
|
||||
if let Some(field) = schema.schema.get_field(&key) {
|
||||
col_val.column_type = Some(field.data_type().clone());
|
||||
@ -206,8 +225,8 @@ pub fn insert_value(
|
||||
if col_val.column_type.is_none() {
|
||||
col_val.column_type = Some(value_to_data_type(&value));
|
||||
}
|
||||
|
||||
col_val.values.push(value);
|
||||
Ok(())
|
||||
} else {
|
||||
let prev_value = &col_val.values[col_val.values.len() - 1];
|
||||
|
||||
@ -219,6 +238,7 @@ pub fn insert_value(
|
||||
| (Value::Date { .. }, Value::Date { .. })
|
||||
| (Value::Filesize { .. }, Value::Filesize { .. })
|
||||
| (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value),
|
||||
(_, Value::Nothing { .. }) => col_val.values.push(value),
|
||||
(Value::List { .. }, _) => {
|
||||
col_val.column_type = Some(value_to_data_type(&value));
|
||||
col_val.values.push(value);
|
||||
@ -228,9 +248,8 @@ pub fn insert_value(
|
||||
col_val.values.push(value);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn value_to_data_type(value: &Value) -> DataType {
|
||||
@ -269,16 +288,18 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
Value::Float { val, .. } => Ok(*val as f32),
|
||||
Value::Int { val, .. } => Ok(*val as f32),
|
||||
x => Err(ShellError::GenericError {
|
||||
error: "Error converting to f32".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: Some(format!("Unexpected type: {x:?}")),
|
||||
inner: vec![],
|
||||
}),
|
||||
.map(|v| {
|
||||
value_to_option(v, |v| match v {
|
||||
Value::Float { val, .. } => Ok(*val as f32),
|
||||
Value::Int { val, .. } => Ok(*val as f32),
|
||||
x => Err(ShellError::GenericError {
|
||||
error: "Error converting to f32".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: Some(format!("Unexpected type: {x:?}")),
|
||||
inner: vec![],
|
||||
}),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
@ -287,16 +308,18 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
Value::Float { val, .. } => Ok(*val),
|
||||
Value::Int { val, .. } => Ok(*val as f64),
|
||||
x => Err(ShellError::GenericError {
|
||||
error: "Error converting to f64".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: Some(format!("Unexpected type: {x:?}")),
|
||||
inner: vec![],
|
||||
}),
|
||||
.map(|v| {
|
||||
value_to_option(v, |v| match v {
|
||||
Value::Float { val, .. } => Ok(*val),
|
||||
Value::Int { val, .. } => Ok(*val as f64),
|
||||
x => Err(ShellError::GenericError {
|
||||
error: "Error converting to f64".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: Some(format!("Unexpected type: {x:?}")),
|
||||
inner: vec![],
|
||||
}),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
@ -305,7 +328,7 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as u8))
|
||||
.map(|v| value_to_option(v, |v| v.as_i64().map(|v| v as u8)))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
@ -313,7 +336,7 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as u16))
|
||||
.map(|v| value_to_option(v, |v| v.as_i64().map(|v| v as u16)))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
@ -321,7 +344,7 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as u32))
|
||||
.map(|v| value_to_option(v, |v| v.as_i64().map(|v| v as u32)))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
@ -329,7 +352,7 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as u64))
|
||||
.map(|v| value_to_option(v, |v| v.as_i64().map(|v| v as u64)))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
@ -337,7 +360,7 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as i8))
|
||||
.map(|v| value_to_option(v, |v| v.as_i64().map(|v| v as i8)))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
@ -345,7 +368,7 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as i16))
|
||||
.map(|v| value_to_option(v, |v| v.as_i64().map(|v| v as i16)))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
@ -353,23 +376,32 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| v as i32))
|
||||
.map(|v| value_to_option(v, |v| v.as_i64().map(|v| v as i32)))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::Int64 => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_i64()).collect();
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| value_to_option(v, |v| v.as_i64()))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::Boolean => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_bool()).collect();
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| value_to_option(v, |v| v.as_bool()))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::String => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.coerce_string()).collect();
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| value_to_option(v, |v| v.coerce_string()))
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
DataType::Object(_, _) => value_to_series(name, &column.values),
|
||||
@ -377,7 +409,11 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.as_i64().map(|v| nanos_from_timeunit(v, *time_unit)))
|
||||
.map(|v| {
|
||||
value_to_option(v, |v| {
|
||||
v.as_i64().map(|v| nanos_from_timeunit(v, *time_unit))
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
Ok(Series::new(name, series_values?))
|
||||
}
|
||||
@ -1241,6 +1277,17 @@ fn time_from_midnight(nanos: i64, span: Span) -> Result<Value, ShellError> {
|
||||
})
|
||||
}
|
||||
|
||||
fn value_to_option<T, F>(value: &Value, func: F) -> Result<Option<T>, ShellError>
|
||||
where
|
||||
F: FnOnce(&Value) -> Result<T, ShellError>,
|
||||
{
|
||||
if value.is_nothing() {
|
||||
Ok(None)
|
||||
} else {
|
||||
func(value).map(|v| Some(v))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use indexmap::indexmap;
|
||||
@ -1461,4 +1508,187 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_f32() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new(
|
||||
"foo",
|
||||
vec![
|
||||
Value::test_float(1.1),
|
||||
Value::test_int(2),
|
||||
Value::test_nothing(),
|
||||
],
|
||||
),
|
||||
column_type: Some(DataType::Float32),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(1.1f32), Some(2.0), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_f64() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new(
|
||||
"foo",
|
||||
vec![
|
||||
Value::test_float(1.1),
|
||||
Value::test_int(2),
|
||||
Value::test_nothing(),
|
||||
],
|
||||
),
|
||||
column_type: Some(DataType::Float64),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(1.1f64), Some(2.0), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_u8() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new("foo", vec![Value::test_int(1), Value::test_nothing()]),
|
||||
column_type: Some(DataType::UInt8),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(1u8), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_u16() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new("foo", vec![Value::test_int(1), Value::test_nothing()]),
|
||||
column_type: Some(DataType::UInt16),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(1u16), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_u32() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new("foo", vec![Value::test_int(1), Value::test_nothing()]),
|
||||
column_type: Some(DataType::UInt32),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(1u32), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_u64() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new("foo", vec![Value::test_int(1), Value::test_nothing()]),
|
||||
column_type: Some(DataType::UInt64),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(1u64), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_i8() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new("foo", vec![Value::test_int(1), Value::test_nothing()]),
|
||||
column_type: Some(DataType::Int8),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(1i8), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_i16() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new("foo", vec![Value::test_int(1), Value::test_nothing()]),
|
||||
column_type: Some(DataType::Int16),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(1i16), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_i32() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new("foo", vec![Value::test_int(1), Value::test_nothing()]),
|
||||
column_type: Some(DataType::Int32),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(1i32), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_i64() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new("foo", vec![Value::test_int(1), Value::test_nothing()]),
|
||||
column_type: Some(DataType::Int64),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(1i64), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_bool() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new(
|
||||
"foo",
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
Value::test_nothing(),
|
||||
],
|
||||
),
|
||||
column_type: Some(DataType::Boolean),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(result, Series::new("name", [Some(true), Some(false), None]));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typed_column_to_series_string() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let column = TypedColumn {
|
||||
column: Column::new(
|
||||
"foo",
|
||||
vec![Value::test_string("barbaz"), Value::test_nothing()],
|
||||
),
|
||||
column_type: Some(DataType::String),
|
||||
};
|
||||
|
||||
let result = typed_column_to_series("foo", column)?;
|
||||
|
||||
assert_eq!(
|
||||
result,
|
||||
Series::new("name", [Some("barbaz".to_string()), None])
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user