New take command (#3722)

* Type in command description

* filter name change

* Clean column name

* Clippy error and updated polars version

* Lint correction in file

* CSV Infer schema optional

* Correct float operations

* changes in series castings to allow other types

* Clippy error correction

* Removed lists from command signatures

* Added not command for series

* take command with args

* set with idx command
This commit is contained in:
Fernando Herrera 2021-07-05 00:46:53 +01:00 committed by GitHub
parent c94c87eec0
commit af2b2c668d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 319 additions and 57 deletions

43
Cargo.lock generated
View File

@ -208,9 +208,11 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "arrow"
version = "5.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow-rs?rev=9f56afb2d2347310184706f7d5e46af583557bea#9f56afb2d2347310184706f7d5e46af583557bea"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6f3334cea4f209440350d00ae1dab237ced49d80b664cc4b0e984893d583890"
dependencies = [
"cfg_aliases",
"chrono",
"csv",
"flatbuffers",
@ -753,6 +755,12 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cfg_aliases"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
[[package]]
name = "chrono"
version = "0.4.19"
@ -4352,8 +4360,9 @@ dependencies = [
[[package]]
name = "parquet"
version = "5.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow-rs?rev=9f56afb2d2347310184706f7d5e46af583557bea#9f56afb2d2347310184706f7d5e46af583557bea"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "265044e41d674fad4c7860a3e245e53138e926fe83cad8d45193a7a354c56a54"
dependencies = [
"arrow",
"base64 0.13.0",
@ -4364,7 +4373,6 @@ dependencies = [
"lz4",
"num-bigint 0.4.0",
"parquet-format",
"rand 0.8.4",
"snap",
"thrift",
"zstd",
@ -4592,8 +4600,9 @@ dependencies = [
[[package]]
name = "polars"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=adc358b437f93bc7f844a94d68c064616e9d2ac2#adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f254b79757346a86a8371ea4a087ce6a56e604c82d61093a1b85bfd0df99aeb"
dependencies = [
"polars-core",
"polars-io",
@ -4602,8 +4611,9 @@ dependencies = [
[[package]]
name = "polars-arrow"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=adc358b437f93bc7f844a94d68c064616e9d2ac2#adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec1ef88e60b660c51644a5b098570519948d95f389b67ef690a0f1187395d7bf"
dependencies = [
"arrow",
"num 0.4.0",
@ -4612,8 +4622,9 @@ dependencies = [
[[package]]
name = "polars-core"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=adc358b437f93bc7f844a94d68c064616e9d2ac2#adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e5e6ee23eb50845501c8c31368051af75801185cf4bedf9e7b3ec945a49af9c"
dependencies = [
"ahash",
"anyhow",
@ -4638,8 +4649,9 @@ dependencies = [
[[package]]
name = "polars-io"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=adc358b437f93bc7f844a94d68c064616e9d2ac2#adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94e8719cdb70555e0492dd24e8f09f637cc112bac438be754bad8dca75f466ab"
dependencies = [
"ahash",
"anyhow",
@ -4661,8 +4673,9 @@ dependencies = [
[[package]]
name = "polars-lazy"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=adc358b437f93bc7f844a94d68c064616e9d2ac2#adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ca6b2fb59bbe6725a84c48df12f509b4655d173cd113e5fb51f971cff1f93bc"
dependencies = [
"ahash",
"itertools",

View File

@ -99,9 +99,7 @@ which = { version="4.1.0", optional=true }
zip = { version="0.5.9", optional=true }
[dependencies.polars]
git = "https://github.com/pola-rs/polars"
rev = "adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.2"
version = "0.14.5"
optional = true
features = ["parquet", "json", "random", "pivot", "strings", "is_in"]

View File

@ -165,8 +165,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(OutputStream::one(value))
}
_ => Err(ShellError::labeled_error(
"No groupby or dataframe",
"no groupby or found in input stream",
"No groupby, dataframe or series in stream",
"no groupby, dataframe or series found in input stream",
&value.tag.span,
)),
}

View File

@ -21,6 +21,7 @@ pub mod select;
pub mod show;
pub mod slice;
pub mod sort;
pub mod take;
pub mod to_csv;
pub mod to_df;
pub mod to_parquet;
@ -52,6 +53,7 @@ pub use select::DataFrame as DataFrameSelect;
pub use show::DataFrame as DataFrameShow;
pub use slice::DataFrame as DataFrameSlice;
pub use sort::DataFrame as DataFrameSort;
pub use take::DataFrame as DataFrameTake;
pub use to_csv::DataFrame as DataFrameToCsv;
pub use to_df::DataFrame as DataFrameToDF;
pub use to_parquet::DataFrame as DataFrameToParquet;
@ -77,6 +79,7 @@ pub use series::DataFrameNUnique;
pub use series::DataFrameNot;
pub use series::DataFrameSeriesRename;
pub use series::DataFrameSet;
pub use series::DataFrameSetWithIdx;
pub use series::DataFrameShift;
pub use series::DataFrameUnique;
pub use series::DataFrameValueCounts;

View File

@ -1,9 +1,9 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value,
};
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::{IntoSeries, NewChunkedArray, UInt32Chunked};
pub struct DataFrame;
@ -40,18 +40,12 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let res = series.as_ref().arg_max();
let value = match res {
Some(index) => UntaggedValue::Primitive(Primitive::Int(index as i64)),
None => UntaggedValue::Primitive(Primitive::Nothing),
let chunked = match res {
Some(index) => UInt32Chunked::new_from_slice("arg_max", &[index as u32]),
None => UInt32Chunked::new_from_slice("arg_max", &[]),
};
let value = Value {
value,
tag: tag.clone(),
};
let res = chunked.into_series();
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("arg-max", value);
Ok(OutputStream::one(data.into_value()))
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
}

View File

@ -1,9 +1,9 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value,
};
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::{IntoSeries, NewChunkedArray, UInt32Chunked};
pub struct DataFrame;
@ -40,18 +40,12 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let res = series.as_ref().arg_min();
let value = match res {
Some(index) => UntaggedValue::Primitive(Primitive::Int(index as i64)),
None => UntaggedValue::Primitive(Primitive::Nothing),
let chunked = match res {
Some(index) => UInt32Chunked::new_from_slice("arg_min", &[index as u32]),
None => UInt32Chunked::new_from_slice("arg_min", &[]),
};
let value = Value {
value,
tag: tag.clone(),
};
let res = chunked.into_series();
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("arg-min", value);
Ok(OutputStream::one(data.into_value()))
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
}

View File

@ -15,6 +15,7 @@ pub mod n_unique;
pub mod not;
pub mod rename;
pub mod set;
pub mod set_with_idx;
pub mod shift;
pub mod unique;
pub mod value_counts;
@ -36,6 +37,7 @@ pub use n_unique::DataFrame as DataFrameNUnique;
pub use not::DataFrame as DataFrameNot;
pub use rename::DataFrame as DataFrameSeriesRename;
pub use set::DataFrame as DataFrameSet;
pub use set_with_idx::DataFrame as DataFrameSetWithIdx;
pub use shift::DataFrame as DataFrameShift;
pub use unique::DataFrame as DataFrameUnique;
pub use value_counts::DataFrame as DataFrameValueCounts;

View File

@ -0,0 +1,153 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Primitive, Signature, SyntaxShape, UntaggedValue, Value};
use polars::prelude::{ChunkSet, DataType, IntoSeries};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe set-with-idx"
}
fn usage(&self) -> &str {
"[Series] Sets value in the given index"
}
fn signature(&self) -> Signature {
Signature::build("dataframe set-with-idx")
.required("value", SyntaxShape::Any, "value to be inserted in series")
.required_named(
"indices",
SyntaxShape::Any,
"list of indices indicating where to set the value",
Some('i'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Set value in selected rows from series",
example: r#"let series = ([4 1 5 2 4 3] | dataframe to-series);
let indices = ([0 2] | dataframe to-series);
$series | dataframe set-with-idx 6 -i $indices"#,
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let value: Value = args.req(0)?;
let indices: Value = args.req_named("indices")?;
let indices = match &indices.value {
UntaggedValue::DataFrame(nu_protocol::dataframe::PolarsData::Series(series)) => Ok(series),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only use a series for set command",
value.tag.span,
)),
}?;
let casted = match indices.as_ref().dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
.as_ref()
.cast_with_dtype(&DataType::UInt32)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)),
_ => Err(ShellError::labeled_error_with_secondary(
"Incorrect type",
"Series with incorrect type",
&value.tag.span,
"Consider using a Series with type int type",
&value.tag.span,
)),
}?;
let indices = casted
.u32()
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?
.into_iter()
.filter_map(|val| val.map(|v| v as usize));
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
match &value.value {
UntaggedValue::Primitive(Primitive::Int(val)) => {
let chunked = series.as_ref().i64().map_err(|e| {
parse_polars_error::<&str>(
&e,
&value.tag.span,
Some("The value has to match the set value type"),
)
})?;
let res = chunked
.set_at_idx(indices, Some(*val))
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}
UntaggedValue::Primitive(Primitive::Decimal(val)) => {
let chunked = series.as_ref().f64().map_err(|e| {
parse_polars_error::<&str>(
&e,
&value.tag.span,
Some("The value has to match the series type"),
)
})?;
let res = chunked
.set_at_idx(
indices,
Some(
val.to_f64()
.expect("internal error: expected f64-compatible decimal"),
),
)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}
UntaggedValue::Primitive(Primitive::String(val)) => {
let chunked = series.as_ref().utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&value.tag.span,
Some("The value has to match the series type"),
)
})?;
let res = chunked
.set_at_idx(indices, Some(val.as_ref()))
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
let mut res = res.into_series();
res.rename("string");
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
format!(
"this value cannot be set in a series of type '{}'",
series.as_ref().dtype()
),
value.tag.span,
)),
}
}

View File

@ -0,0 +1,107 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, NuSeries, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use polars::prelude::DataType;
use super::utils::parse_polars_error;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe take"
}
fn usage(&self) -> &str {
"[DataFrame, Series] Creates new dataframe using the given indices"
}
fn signature(&self) -> Signature {
Signature::build("dataframe take").required(
"indices",
SyntaxShape::Any,
"list of indices used to take data",
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes selected rows from dataframe",
example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | dataframe to-df);
let indices = ([0 2] | dataframe to-series);
$df | dataframe take $indices"#,
result: None,
},
Example {
description: "Takes selected rows from series",
example: r#"let series = ([4 1 5 2 4 3] | dataframe to-series);
let indices = ([0 2] | dataframe to-series);
$series | dataframe take $indices"#,
result: None,
},
]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let value: Value = args.req(0)?;
let series = match &value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only use a series for take command",
value.tag.span,
)),
}?;
let casted = match series.as_ref().dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => series
.as_ref()
.cast_with_dtype(&DataType::UInt32)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)),
_ => Err(ShellError::labeled_error_with_secondary(
"Incorrect type",
"Series with incorrect type",
&value.tag.span,
"Consider using a Series with type int type",
&value.tag.span,
)),
}?;
let indices = casted
.u32()
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
let value = args.input.next().ok_or_else(|| {
ShellError::labeled_error("Empty stream", "No value found in the stream", &tag)
})?;
match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
let res = df.as_ref().take(indices);
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
let res = series.as_ref().take(indices);
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
}
_ => Err(ShellError::labeled_error(
"No dataframe or series in stream",
"no dataframe or series found in input stream",
&value.tag.span,
)),
}
}

View File

@ -32,9 +32,9 @@ pub use dataframe::{
DataFrameIsIn, DataFrameIsNotNull, DataFrameIsNull, DataFrameIsUnique, DataFrameJoin,
DataFrameLast, DataFrameList, DataFrameMelt, DataFrameNNull, DataFrameNUnique, DataFrameNot,
DataFrameOpen, DataFramePivot, DataFrameSample, DataFrameSelect, DataFrameSeriesRename,
DataFrameSet, DataFrameShift, DataFrameShow, DataFrameSlice, DataFrameSort, DataFrameToCsv,
DataFrameToDF, DataFrameToParquet, DataFrameToSeries, DataFrameUnique, DataFrameValueCounts,
DataFrameWhere, DataFrameWithColumn,
DataFrameSet, DataFrameSetWithIdx, DataFrameShift, DataFrameShow, DataFrameSlice,
DataFrameSort, DataFrameTake, DataFrameToCsv, DataFrameToDF, DataFrameToParquet,
DataFrameToSeries, DataFrameUnique, DataFrameValueCounts, DataFrameWhere, DataFrameWithColumn,
};
pub use env::*;
pub use filesystem::*;

View File

@ -315,6 +315,8 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(DataFrameShift),
whole_stream_command(DataFrameSet),
whole_stream_command(DataFrameNot),
whole_stream_command(DataFrameTake),
whole_stream_command(DataFrameSetWithIdx),
]);
#[cfg(feature = "clipboard-cli")]

View File

@ -38,9 +38,7 @@ nu-value-ext = { version="0.33.1", path="../nu-value-ext" }
nu-ansi-term = { version="0.33.1", path="../nu-ansi-term" }
[dependencies.polars]
git = "https://github.com/pola-rs/polars"
rev = "adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.2"
version = "0.14.5"
optional = true
features = ["strings", "checked_arithmetic"]

View File

@ -31,9 +31,7 @@ serde_yaml = "0.8.16"
toml = "0.5.8"
[dependencies.polars]
git = "https://github.com/pola-rs/polars"
rev = "adc358b437f93bc7f844a94d68c064616e9d2ac2"
version = "0.14.2"
version = "0.14.5"
optional = true
features = ["serde", "rows"]