Polars update (#4875)

* update to polars 0.20

* add to date parser for series
This commit is contained in:
Fernando Herrera
2022-03-19 11:13:34 +00:00
committed by GitHub
parent 3db608eb5c
commit d6669d3f33
17 changed files with 255 additions and 161 deletions

View File

@@ -162,7 +162,7 @@ fn command(
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let names = ChunkedArray::<Utf8Type>::new_from_opt_slice("descriptor", &labels).into_series();
let names = ChunkedArray::<Utf8Type>::from_slice_options("descriptor", &labels).into_series();
let head = std::iter::once(names);
@@ -235,7 +235,7 @@ fn command(
descriptors.push(max);
let name = format!("{} ({})", col.name(), col.dtype());
ChunkedArray::<Float64Type>::new_from_opt_slice(&name, &descriptors).into_series()
ChunkedArray::<Float64Type>::from_slice_options(&name, &descriptors).into_series()
});
let res = head.chain(tail).collect::<Vec<Series>>();

View File

@@ -4,6 +4,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use polars::prelude::DistinctKeepStrategy;
use super::super::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame};
@@ -28,6 +29,11 @@ impl Command for DropDuplicates {
"subset of columns to drop duplicates",
)
.switch("maintain", "maintain order", Some('m'))
.switch(
"last",
"keeps last duplicate value (by default keeps first)",
Some('l'),
)
.category(Category::Custom("dataframe".into()))
}
@@ -82,8 +88,14 @@ fn command(
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let keep_strategy = if call.has_flag("last") {
DistinctKeepStrategy::Last
} else {
DistinctKeepStrategy::First
};
df.as_ref()
.drop_duplicates(call.has_flag("maintain"), subset_slice)
.distinct(subset_slice, keep_strategy)
.map_err(|e| {
ShellError::SpannedLabeledError(
"Error dropping duplicates".into(),

View File

@@ -71,7 +71,7 @@ fn command(
let delimiter: Option<Spanned<String>> = call.get_flag(engine_state, stack, "delimiter")?;
let no_header: bool = call.has_flag("no-header");
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut file = File::create(&file_name.item).map_err(|e| {
ShellError::SpannedLabeledError(
@@ -109,7 +109,7 @@ fn command(
}
};
writer.finish(df.as_ref()).map_err(|e| {
writer.finish(df.as_mut()).map_err(|e| {
ShellError::SpannedLabeledError(
"Error writing to file".into(),
e.to_string(),

View File

@@ -55,7 +55,7 @@ fn command(
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
let file = File::create(&file_name.item).map_err(|e| {
ShellError::SpannedLabeledError(
@@ -65,7 +65,7 @@ fn command(
)
})?;
ParquetWriter::new(file).finish(df.as_ref()).map_err(|e| {
ParquetWriter::new(file).finish(df.as_mut()).map_err(|e| {
ShellError::SpannedLabeledError("Error saving file".into(), e.to_string(), file_name.span)
})?;