mirror of
https://github.com/nushell/nushell.git
synced 2025-08-09 17:25:15 +02:00
Merge branch 'main' into polars_categorical_2
This commit is contained in:
@ -39,14 +39,14 @@ impl PluginCommand for ToRepr {
|
||||
result: Some(Value::string(
|
||||
r#"
|
||||
shape: (2, 2)
|
||||
┌─────────────────────┬─────┐
|
||||
│ a ┆ b │
|
||||
│ --- ┆ --- │
|
||||
│ datetime[ns] ┆ i64 │
|
||||
╞═════════════════════╪═════╡
|
||||
│ 2025-01-01 00:00:00 ┆ 2 │
|
||||
│ 2025-01-02 00:00:00 ┆ 4 │
|
||||
└─────────────────────┴─────┘"#
|
||||
┌─────────────────────────┬─────┐
|
||||
│ a ┆ b │
|
||||
│ --- ┆ --- │
|
||||
│ datetime[ns, UTC] ┆ i64 │
|
||||
╞═════════════════════════╪═════╡
|
||||
│ 2025-01-01 00:00:00 UTC ┆ 2 │
|
||||
│ 2025-01-02 00:00:00 UTC ┆ 4 │
|
||||
└─────────────────────────┴─────┘"#
|
||||
.trim(),
|
||||
Span::test_data(),
|
||||
)),
|
||||
@ -54,18 +54,18 @@ shape: (2, 2)
|
||||
Example {
|
||||
description: "Shows lazy dataframe in repr format",
|
||||
example:
|
||||
"[[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-df | polars into-lazy | polars into-repr",
|
||||
"[[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-lazy | polars into-repr",
|
||||
result: Some(Value::string(
|
||||
r#"
|
||||
shape: (2, 2)
|
||||
┌─────────────────────┬─────┐
|
||||
│ a ┆ b │
|
||||
│ --- ┆ --- │
|
||||
│ datetime[ns] ┆ i64 │
|
||||
╞═════════════════════╪═════╡
|
||||
│ 2025-01-01 00:00:00 ┆ 2 │
|
||||
│ 2025-01-02 00:00:00 ┆ 4 │
|
||||
└─────────────────────┴─────┘"#
|
||||
┌─────────────────────────┬─────┐
|
||||
│ a ┆ b │
|
||||
│ --- ┆ --- │
|
||||
│ datetime[ns, UTC] ┆ i64 │
|
||||
╞═════════════════════════╪═════╡
|
||||
│ 2025-01-01 00:00:00 UTC ┆ 2 │
|
||||
│ 2025-01-02 00:00:00 UTC ┆ 4 │
|
||||
└─────────────────────────┴─────┘"#
|
||||
.trim(),
|
||||
Span::test_data(),
|
||||
)),
|
||||
|
@ -1,6 +1,7 @@
|
||||
use crate::{values::CustomValueSupport, PolarsPlugin};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
use super::super::super::values::{Column, NuDataFrame, NuSchema};
|
||||
|
||||
use chrono::DateTime;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
@ -8,7 +9,7 @@ use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{IntoSeries, StringMethods, TimeUnit};
|
||||
use polars::prelude::{DataType, Field, IntoSeries, Schema, StringMethods, TimeUnit};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AsDateTime;
|
||||
@ -43,6 +44,7 @@ impl PluginCommand for AsDateTime {
|
||||
Signature::build(self.name())
|
||||
.required("format", SyntaxShape::String, "formatting date time string")
|
||||
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
|
||||
.switch("naive", "the input datetimes should be parsed as naive (i.e., not timezone-aware)", None)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
@ -54,7 +56,7 @@ impl PluginCommand for AsDateTime {
|
||||
vec![
|
||||
Example {
|
||||
description: "Converts string to datetime",
|
||||
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S""#,
|
||||
example: r#"["2021-12-30 00:00:00 -0400" "2021-12-31 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z""#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
@ -62,7 +64,7 @@ impl PluginCommand for AsDateTime {
|
||||
vec![
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-30 00:00:00 +0000",
|
||||
"2021-12-30 00:00:00 -0400",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
@ -70,7 +72,7 @@ impl PluginCommand for AsDateTime {
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-31 00:00:00 +0000",
|
||||
"2021-12-31 00:00:00 -0400",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
@ -86,7 +88,7 @@ impl PluginCommand for AsDateTime {
|
||||
},
|
||||
Example {
|
||||
description: "Converts string to datetime with high resolutions",
|
||||
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S.%9f""#,
|
||||
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S.%9f" --naive"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
@ -110,7 +112,15 @@ impl PluginCommand for AsDateTime {
|
||||
),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new(
|
||||
"datetime".into(),
|
||||
DataType::Datetime(
|
||||
TimeUnit::Nanoseconds,
|
||||
None
|
||||
),
|
||||
),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
@ -118,7 +128,7 @@ impl PluginCommand for AsDateTime {
|
||||
},
|
||||
Example {
|
||||
description: "Converts string to datetime using the `--not-exact` flag even with excessive symbols",
|
||||
example: r#"["2021-12-30 00:00:00 GMT+4"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S" --not-exact"#,
|
||||
example: r#"["2021-12-30 00:00:00 GMT+4"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S" --not-exact --naive"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
@ -134,7 +144,15 @@ impl PluginCommand for AsDateTime {
|
||||
),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new(
|
||||
"datetime".into(),
|
||||
DataType::Datetime(
|
||||
TimeUnit::Nanoseconds,
|
||||
None
|
||||
),
|
||||
),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
@ -162,6 +180,7 @@ fn command(
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let format: String = call.req(0)?;
|
||||
let not_exact = call.has_flag("not-exact")?;
|
||||
let tz_aware = !call.has_flag("naive")?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
@ -177,7 +196,7 @@ fn command(
|
||||
casted.as_datetime_not_exact(
|
||||
Some(format.as_str()),
|
||||
TimeUnit::Nanoseconds,
|
||||
false,
|
||||
tz_aware,
|
||||
None,
|
||||
&Default::default(),
|
||||
)
|
||||
@ -186,7 +205,7 @@ fn command(
|
||||
Some(format.as_str()),
|
||||
TimeUnit::Nanoseconds,
|
||||
false,
|
||||
false,
|
||||
tz_aware,
|
||||
None,
|
||||
&Default::default(),
|
||||
)
|
||||
|
@ -1,7 +1,8 @@
|
||||
use crate::values::NuExpression;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame},
|
||||
dataframe::values::{Column, NuDataFrame, NuSchema},
|
||||
values::CustomValueSupport,
|
||||
PolarsPlugin,
|
||||
};
|
||||
@ -13,7 +14,7 @@ use nu_protocol::{
|
||||
};
|
||||
use polars::{
|
||||
datatypes::{DataType, TimeUnit},
|
||||
prelude::NamedFrom,
|
||||
prelude::{Field, NamedFrom, Schema},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
@ -54,14 +55,20 @@ impl PluginCommand for ExprDatePart {
|
||||
vec![
|
||||
Example {
|
||||
description: "Creates an expression to capture the year date part",
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | polars with-column [(polars col datetime | polars datepart year | polars as datetime_year )]"#,
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" --naive | polars with-column [(polars col datetime | polars datepart year | polars as datetime_year )]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("datetime".to_string(), vec![Value::test_date(dt)]),
|
||||
Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]),
|
||||
],
|
||||
None,
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new(
|
||||
"datetime".into(),
|
||||
DataType::Datetime(TimeUnit::Nanoseconds, None),
|
||||
),
|
||||
Field::new("datetime_year".into(), DataType::Int64),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
@ -69,7 +76,7 @@ impl PluginCommand for ExprDatePart {
|
||||
},
|
||||
Example {
|
||||
description: "Creates an expression to capture multiple date parts",
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" |
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" --naive |
|
||||
polars with-column [ (polars col datetime | polars datepart year | polars as datetime_year ),
|
||||
(polars col datetime | polars datepart month | polars as datetime_month ),
|
||||
(polars col datetime | polars datepart day | polars as datetime_day ),
|
||||
|
@ -245,7 +245,10 @@ fn value_to_data_type(value: &Value) -> Option<DataType> {
|
||||
Value::Float { .. } => Some(DataType::Float64),
|
||||
Value::String { .. } => Some(DataType::String),
|
||||
Value::Bool { .. } => Some(DataType::Boolean),
|
||||
Value::Date { .. } => Some(DataType::Date),
|
||||
Value::Date { .. } => Some(DataType::Datetime(
|
||||
TimeUnit::Nanoseconds,
|
||||
Some(PlSmallStr::from_static("UTC")),
|
||||
)),
|
||||
Value::Duration { .. } => Some(DataType::Duration(TimeUnit::Nanoseconds)),
|
||||
Value::Filesize { .. } => Some(DataType::Int64),
|
||||
Value::Binary { .. } => Some(DataType::Binary),
|
||||
@ -447,24 +450,28 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::Date { val, .. } = &v {
|
||||
// If there is a timezone specified, make sure
|
||||
// the value is converted to it
|
||||
Ok(maybe_tz
|
||||
.as_ref()
|
||||
.map(|tz| tz.parse::<Tz>().map(|tz| val.with_timezone(&tz)))
|
||||
.transpose()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error parsing timezone".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: Some(e.to_string()),
|
||||
inner: vec![],
|
||||
})?
|
||||
.and_then(|dt| dt.timestamp_nanos_opt())
|
||||
.map(|nanos| nanos_from_timeunit(nanos, *tu)))
|
||||
} else {
|
||||
Ok(None)
|
||||
match (maybe_tz, &v) {
|
||||
(Some(tz), Value::Date { val, .. }) => {
|
||||
// If there is a timezone specified, make sure
|
||||
// the value is converted to it
|
||||
Ok(tz
|
||||
.parse::<Tz>()
|
||||
.map(|tz| val.with_timezone(&tz))
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error parsing timezone".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: Some(e.to_string()),
|
||||
inner: vec![],
|
||||
})?
|
||||
.timestamp_nanos_opt()
|
||||
.map(|nanos| nanos_from_timeunit(nanos, *tu)))
|
||||
}
|
||||
(None, Value::Date { val, .. }) => Ok(val
|
||||
.timestamp_nanos_opt()
|
||||
.map(|nanos| nanos_from_timeunit(nanos, *tu))),
|
||||
|
||||
_ => Ok(None),
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<Option<i64>>, ShellError>>()?;
|
||||
|
@ -71,7 +71,7 @@ impl Default for DataFrameValue {
|
||||
|
||||
impl PartialEq for DataFrameValue {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq)
|
||||
self.0.partial_cmp(&other.0).is_some_and(Ordering::is_eq)
|
||||
}
|
||||
}
|
||||
impl Eq for DataFrameValue {}
|
||||
|
@ -169,6 +169,67 @@ pub fn str_to_dtype(dtype: &str, span: Span) -> Result<DataType, ShellError> {
|
||||
let time_unit = str_to_time_unit(next, span)?;
|
||||
Ok(DataType::Duration(time_unit))
|
||||
}
|
||||
_ if dtype.starts_with("decimal") => {
|
||||
let dtype = dtype
|
||||
.trim_start_matches("decimal")
|
||||
.trim_start_matches('<')
|
||||
.trim_end_matches('>');
|
||||
let mut split = dtype.split(',');
|
||||
let next = split
|
||||
.next()
|
||||
.ok_or_else(|| ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: "Missing decimal precision".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.trim();
|
||||
let precision = match next {
|
||||
"*" => None, // infer
|
||||
_ => Some(
|
||||
next.parse::<usize>()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: format!("Error in parsing decimal precision: {e}"),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?,
|
||||
),
|
||||
};
|
||||
|
||||
let next = split
|
||||
.next()
|
||||
.ok_or_else(|| ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: "Missing decimal scale".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.trim();
|
||||
let scale = match next {
|
||||
"*" => Err(ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: "`*` is not a permitted value for scale".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
_ => next
|
||||
.parse::<usize>()
|
||||
.map(Some)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: format!("Error in parsing decimal precision: {e}"),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}?;
|
||||
Ok(DataType::Decimal(precision, scale))
|
||||
}
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Invalid polars data type".into(),
|
||||
msg: format!("Unknown type: {dtype}"),
|
||||
@ -367,6 +428,24 @@ mod test {
|
||||
assert_eq!(schema, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dtype_str_schema_decimal() {
|
||||
let dtype = "decimal<7,2>";
|
||||
let schema = str_to_dtype(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Decimal(Some(7usize), Some(2usize));
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
// "*" is not a permitted value for scale
|
||||
let dtype = "decimal<7,*>";
|
||||
let schema = str_to_dtype(dtype, Span::unknown());
|
||||
assert!(matches!(schema, Err(ShellError::GenericError { .. })));
|
||||
|
||||
let dtype = "decimal<*,2>";
|
||||
let schema = str_to_dtype(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::Decimal(None, Some(2usize));
|
||||
assert_eq!(schema, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dtype_str_to_schema_list_types() {
|
||||
let dtype = "list<i32>";
|
||||
@ -383,5 +462,19 @@ mod test {
|
||||
let schema = str_to_dtype(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::List(Box::new(DataType::Datetime(TimeUnit::Milliseconds, None)));
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "list<decimal<7,2>>";
|
||||
let schema = str_to_dtype(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::List(Box::new(DataType::Decimal(Some(7usize), Some(2usize))));
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "list<decimal<*,2>>";
|
||||
let schema = str_to_dtype(dtype, Span::unknown()).unwrap();
|
||||
let expected = DataType::List(Box::new(DataType::Decimal(None, Some(2usize))));
|
||||
assert_eq!(schema, expected);
|
||||
|
||||
let dtype = "list<decimal<7,*>>";
|
||||
let schema = str_to_dtype(dtype, Span::unknown());
|
||||
assert!(matches!(schema, Err(ShellError::GenericError { .. })));
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user