mirror of
https://github.com/nushell/nushell.git
synced 2025-04-29 15:44:28 +02:00
feat(polars): enable parsing decimals in polars schemas (#15632)
<!-- if this PR closes one or more issues, you can automatically link the PR with them by using one of the [*linking keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword), e.g. - this PR should close #xxxx - fixes #xxxx you can also mention related issues, PRs or discussions! --> # Description <!-- Thank you for improving Nushell. Please, check our [contributing guide](../CONTRIBUTING.md) and talk to the core team before making major changes. Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience. --> This PR enables the option to set a column type to `decimal` in the `--schema` parameter of `polars into-df` and `polars into-lazy` commands. This option was already available in `polars open`, which used the underlying polars io commands that already accounted for decimal types when specified in the schema. See below for a comparison of the current and proposed implementation. ```nushell # Current Implementation > [[a b]; [1 1.618]]| polars into-df -s {a: u8, b: 'decimal<4,3>'} Error: × Error creating dataframe: Unsupported type: Decimal(Some(4), Some(3)) # Proposed Implementation > [[a b]; [1 1.618]]| polars into-df -s {a: u8, b: 'decimal<4,3>'} | polars schema ╭───┬──────────────╮ │ a │ u8 │ │ b │ decimal<4,3> │ ╰───┴──────────────╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> No breaking change. Users has the new option to specify decimal in `--schema` in `polars into-df` and `polars into-lazy`. # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> An example in `polars into-df` was modified to showcase the decimal type. # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
b33f4b7f55
commit
208ebeefab
@ -160,7 +160,7 @@ impl PluginCommand for ToDataFrame {
|
|||||||
},
|
},
|
||||||
Example {
|
Example {
|
||||||
description: "Convert to a dataframe and provide a schema",
|
description: "Convert to a dataframe and provide a schema",
|
||||||
example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list<u64>}, c: list<u8>}",
|
example: "[[a b c e]; [1 {d: [1 2 3]} [10 11 12] 1.618]]| polars into-df -s {a: u8, b: {d: list<u64>}, c: list<u8>, e: 'decimal<4,3>'}",
|
||||||
result: Some(
|
result: Some(
|
||||||
NuDataFrame::try_from_series_vec(vec![
|
NuDataFrame::try_from_series_vec(vec![
|
||||||
Series::new("a".into(), &[1u8]),
|
Series::new("a".into(), &[1u8]),
|
||||||
@ -172,11 +172,12 @@ impl PluginCommand for ToDataFrame {
|
|||||||
.expect("Struct series should not fail")
|
.expect("Struct series should not fail")
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
let dtype = DataType::List(Box::new(DataType::String));
|
let dtype = DataType::List(Box::new(DataType::UInt8));
|
||||||
let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))];
|
let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))];
|
||||||
Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false)
|
Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false)
|
||||||
.expect("List series should not fail")
|
.expect("List series should not fail")
|
||||||
}
|
},
|
||||||
|
Series::new("e".into(), &[1.618]),
|
||||||
], Span::test_data())
|
], Span::test_data())
|
||||||
.expect("simple df for test should not fail")
|
.expect("simple df for test should not fail")
|
||||||
.into_value(Span::test_data()),
|
.into_value(Span::test_data()),
|
||||||
|
@ -320,6 +320,34 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
|
|||||||
.collect();
|
.collect();
|
||||||
Ok(Series::new(name, series_values?))
|
Ok(Series::new(name, series_values?))
|
||||||
}
|
}
|
||||||
|
DataType::Decimal(precision, scale) => {
|
||||||
|
let series_values: Result<Vec<_>, _> = column
|
||||||
|
.values
|
||||||
|
.iter()
|
||||||
|
.map(|v| {
|
||||||
|
value_to_option(v, |v| match v {
|
||||||
|
Value::Float { val, .. } => Ok(*val),
|
||||||
|
Value::Int { val, .. } => Ok(*val as f64),
|
||||||
|
x => Err(ShellError::GenericError {
|
||||||
|
error: "Error converting to decimal".into(),
|
||||||
|
msg: "".into(),
|
||||||
|
span: None,
|
||||||
|
help: Some(format!("Unexpected type: {x:?}")),
|
||||||
|
inner: vec![],
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
Series::new(name, series_values?)
|
||||||
|
.cast_with_options(&DataType::Decimal(*precision, *scale), Default::default())
|
||||||
|
.map_err(|e| ShellError::GenericError {
|
||||||
|
error: "Error parsing decimal".into(),
|
||||||
|
msg: "".into(),
|
||||||
|
span: None,
|
||||||
|
help: Some(e.to_string()),
|
||||||
|
inner: vec![],
|
||||||
|
})
|
||||||
|
}
|
||||||
DataType::UInt8 => {
|
DataType::UInt8 => {
|
||||||
let series_values: Result<Vec<_>, _> = column
|
let series_values: Result<Vec<_>, _> = column
|
||||||
.values
|
.values
|
||||||
|
Loading…
Reference in New Issue
Block a user