forked from extern/nushell
uniq-by
command (#7295)
New command `uniq-by` to get uniq results by column. Closes https://github.com/nushell/nushell/issues/7109
This commit is contained in:
parent
f491d3e1e1
commit
fcdc474731
@ -132,6 +132,7 @@ pub fn create_default_context() -> EngineState {
|
||||
SplitList,
|
||||
Transpose,
|
||||
Uniq,
|
||||
UniqBy,
|
||||
Upsert,
|
||||
Update,
|
||||
UpdateCells,
|
||||
|
@ -41,6 +41,7 @@ mod split_by;
|
||||
mod take;
|
||||
mod transpose;
|
||||
mod uniq;
|
||||
mod uniq_by;
|
||||
mod update;
|
||||
mod update_cells;
|
||||
mod upsert;
|
||||
@ -93,6 +94,7 @@ pub use split_by::SplitBy;
|
||||
pub use take::*;
|
||||
pub use transpose::Transpose;
|
||||
pub use uniq::*;
|
||||
pub use uniq_by::UniqBy;
|
||||
pub use update::Update;
|
||||
pub use update_cells::UpdateCells;
|
||||
pub use upsert::Upsert;
|
||||
|
@ -1,7 +1,8 @@
|
||||
use nu_protocol::ast::Call;
|
||||
use nu_protocol::engine::{Command, EngineState, Stack};
|
||||
use nu_protocol::{
|
||||
Category, Example, IntoPipelineData, PipelineData, Signature, Span, Type, Value,
|
||||
Category, Example, IntoPipelineData, PipelineData, PipelineMetadata, Signature, Span, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -63,7 +64,19 @@ impl Command for Uniq {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<nu_protocol::PipelineData, nu_protocol::ShellError> {
|
||||
uniq(engine_state, stack, call, input)
|
||||
let mapper = Box::new(move |ms: ItemMapperState| -> ValueCounter {
|
||||
item_mapper(ms.item, ms.flag_ignore_case)
|
||||
});
|
||||
|
||||
let metadata = input.metadata();
|
||||
uniq(
|
||||
engine_state,
|
||||
stack,
|
||||
call,
|
||||
input.into_iter().collect(),
|
||||
mapper,
|
||||
metadata,
|
||||
)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
@ -123,7 +136,16 @@ impl Command for Uniq {
|
||||
}
|
||||
}
|
||||
|
||||
struct ValueCounter {
|
||||
pub struct ItemMapperState {
|
||||
pub item: Value,
|
||||
pub flag_ignore_case: bool,
|
||||
}
|
||||
|
||||
fn item_mapper(item: Value, flag_ignore_case: bool) -> ValueCounter {
|
||||
ValueCounter::new(item, flag_ignore_case)
|
||||
}
|
||||
|
||||
pub struct ValueCounter {
|
||||
val: Value,
|
||||
val_to_compare: Value,
|
||||
count: i64,
|
||||
@ -137,12 +159,15 @@ impl PartialEq<Self> for ValueCounter {
|
||||
|
||||
impl ValueCounter {
|
||||
fn new(val: Value, flag_ignore_case: bool) -> Self {
|
||||
Self::new_vals_to_compare(val.clone(), flag_ignore_case, val)
|
||||
}
|
||||
pub fn new_vals_to_compare(val: Value, flag_ignore_case: bool, vals_to_compare: Value) -> Self {
|
||||
ValueCounter {
|
||||
val: val.clone(),
|
||||
val,
|
||||
val_to_compare: if flag_ignore_case {
|
||||
clone_to_lowercase(&val)
|
||||
clone_to_lowercase(&vals_to_compare)
|
||||
} else {
|
||||
val
|
||||
vals_to_compare
|
||||
},
|
||||
count: 1,
|
||||
}
|
||||
@ -193,22 +218,29 @@ fn generate_results_with_count(head: Span, uniq_values: Vec<ValueCounter>) -> Ve
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn uniq(
|
||||
pub fn uniq(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
input: Vec<Value>,
|
||||
item_mapper: Box<dyn Fn(ItemMapperState) -> ValueCounter>,
|
||||
metadata: Option<PipelineMetadata>,
|
||||
) -> Result<nu_protocol::PipelineData, nu_protocol::ShellError> {
|
||||
let head = call.head;
|
||||
let flag_show_count = call.has_flag("count");
|
||||
let flag_show_repeated = call.has_flag("repeated");
|
||||
let flag_ignore_case = call.has_flag("ignore-case");
|
||||
let flag_only_uniques = call.has_flag("unique");
|
||||
let metadata = input.metadata();
|
||||
// let metadata = input.metadata();
|
||||
|
||||
let mut uniq_values = input
|
||||
.into_iter()
|
||||
.map(|item| ValueCounter::new(item, flag_ignore_case))
|
||||
.map(|item| {
|
||||
item_mapper(ItemMapperState {
|
||||
item,
|
||||
flag_ignore_case,
|
||||
})
|
||||
})
|
||||
.fold(Vec::<ValueCounter>::new(), |mut counter, item| {
|
||||
match counter
|
||||
.iter_mut()
|
||||
|
174
crates/nu-command/src/filters/uniq_by.rs
Normal file
174
crates/nu-command/src/filters/uniq_by.rs
Normal file
@ -0,0 +1,174 @@
|
||||
pub use super::uniq;
|
||||
use nu_engine::column::nonexistent_column;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::ast::Call;
|
||||
use nu_protocol::engine::{Command, EngineState, Stack};
|
||||
use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UniqBy;
|
||||
|
||||
impl Command for UniqBy {
|
||||
fn name(&self) -> &str {
|
||||
"uniq-by"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("uniq-by")
|
||||
.input_output_types(vec![(Type::Table(vec![]), Type::Table(vec![]))])
|
||||
.rest("columns", SyntaxShape::Any, "the column(s) to filter by")
|
||||
.switch(
|
||||
"count",
|
||||
"Return a table containing the distinct input values together with their counts",
|
||||
Some('c'),
|
||||
)
|
||||
.switch(
|
||||
"repeated",
|
||||
"Return the input values that occur more than once",
|
||||
Some('d'),
|
||||
)
|
||||
.switch(
|
||||
"ignore-case",
|
||||
"Ignore differences in case when comparing input values",
|
||||
Some('i'),
|
||||
)
|
||||
.switch(
|
||||
"unique",
|
||||
"Return the input values that occur once only",
|
||||
Some('u'),
|
||||
)
|
||||
.category(Category::Filters)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Return the distinct values in the input by the given column(s)."
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["distinct", "deduplicate"]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<nu_protocol::PipelineData, nu_protocol::ShellError> {
|
||||
let columns: Vec<String> = call.rest(engine_state, stack, 0)?;
|
||||
|
||||
if columns.is_empty() {
|
||||
return Err(ShellError::MissingParameter("columns".into(), call.head));
|
||||
}
|
||||
|
||||
let metadata = input.metadata();
|
||||
|
||||
let vec: Vec<_> = input.into_iter().collect();
|
||||
match validate(vec.clone(), &columns, call.head) {
|
||||
Ok(_) => {}
|
||||
Err(err) => {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
|
||||
let mapper = Box::new(item_mapper_by_col(columns));
|
||||
|
||||
uniq(engine_state, stack, call, vec, mapper, metadata)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Get rows from table filtered by column uniqueness ",
|
||||
example: "[[fruit count]; [apple 9] [apple 2] [pear 3] [orange 7]] | uniq-by fruit",
|
||||
result: Some(Value::List {
|
||||
vals: vec![
|
||||
Value::test_record(
|
||||
vec!["fruit", "count"],
|
||||
vec![Value::test_string("apple"), Value::test_int(9)],
|
||||
),
|
||||
Value::test_record(
|
||||
vec!["fruit", "count"],
|
||||
vec![Value::test_string("pear"), Value::test_int(3)],
|
||||
),
|
||||
Value::test_record(
|
||||
vec!["fruit", "count"],
|
||||
vec![Value::test_string("orange"), Value::test_int(7)],
|
||||
),
|
||||
],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn validate(vec: Vec<Value>, columns: &Vec<String>, span: Span) -> Result<(), ShellError> {
|
||||
if vec.is_empty() {
|
||||
return Err(ShellError::GenericError(
|
||||
"no values to work with".to_string(),
|
||||
"".to_string(),
|
||||
None,
|
||||
Some("no values to work with".to_string()),
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
|
||||
if let Value::Record {
|
||||
cols,
|
||||
vals: _input_vals,
|
||||
span: val_span,
|
||||
} = &vec[0]
|
||||
{
|
||||
if columns.is_empty() {
|
||||
// This uses the same format as the 'requires a column name' error in split_by.rs
|
||||
return Err(ShellError::GenericError(
|
||||
"expected name".into(),
|
||||
"requires a column name to filter table data".into(),
|
||||
Some(span),
|
||||
None,
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(nonexistent) = nonexistent_column(columns.clone(), cols.to_vec()) {
|
||||
return Err(ShellError::CantFindColumn(nonexistent, span, *val_span));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_data_by_columns(columns: &[String], item: &Value) -> Vec<Value> {
|
||||
columns
|
||||
.iter()
|
||||
.filter_map(|col| item.get_data_by_key(col))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn item_mapper_by_col(cols: Vec<String>) -> impl Fn(crate::ItemMapperState) -> crate::ValueCounter {
|
||||
let columns = cols;
|
||||
|
||||
Box::new(move |ms: crate::ItemMapperState| -> crate::ValueCounter {
|
||||
let item_column_values = get_data_by_columns(&columns, &ms.item);
|
||||
|
||||
let col_vals = Value::List {
|
||||
vals: item_column_values,
|
||||
span: Span { start: 0, end: 0 },
|
||||
};
|
||||
|
||||
crate::ValueCounter::new_vals_to_compare(ms.item, ms.flag_ignore_case, col_vals)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
use crate::test_examples;
|
||||
|
||||
test_examples(UniqBy {})
|
||||
}
|
||||
}
|
@ -88,6 +88,7 @@ mod touch;
|
||||
mod transpose;
|
||||
mod try_;
|
||||
mod uniq;
|
||||
mod uniq_by;
|
||||
mod update;
|
||||
mod upsert;
|
||||
mod url;
|
||||
|
222
crates/nu-command/tests/commands/uniq_by.rs
Normal file
222
crates/nu-command/tests/commands/uniq_by.rs
Normal file
@ -0,0 +1,222 @@
|
||||
use nu_test_support::fs::Stub::FileWithContentToBeTrimmed;
|
||||
use nu_test_support::playground::Playground;
|
||||
use nu_test_support::{nu, pipeline};
|
||||
|
||||
#[test]
|
||||
fn removes_duplicate_rows() {
|
||||
Playground::setup("uniq_test_1", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.csv",
|
||||
r#"
|
||||
first_name,last_name,rusty_at,type
|
||||
Andrés,Robalino,10/11/2013,A
|
||||
Afonso,Turner,10/12/2013,B
|
||||
Yehuda,Katz,10/11/2013,A
|
||||
Jonathan,Turner,11/12/2011,O
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open los_tres_caballeros.csv
|
||||
| uniq-by last_name
|
||||
| length
|
||||
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual.out, "3");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uniq_when_keys_out_of_order() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
[{"a": "a", "b": [1,2,3]}, {"b": [1,2,3,4], "a": "a"}]
|
||||
| uniq-by a
|
||||
"#
|
||||
));
|
||||
let expected = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
echo [{"a": "a", "b": [1,2,3]}]
|
||||
"#
|
||||
));
|
||||
|
||||
print!("{}", actual.out);
|
||||
print!("{}", expected.out);
|
||||
assert_eq!(actual.out, expected.out);
|
||||
assert_eq!(actual.out, expected.out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uniq_counting() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
["A", "B", "A"]
|
||||
| wrap item
|
||||
| uniq-by item --count
|
||||
| flatten
|
||||
| where item == A
|
||||
| get count
|
||||
| get 0
|
||||
"#
|
||||
));
|
||||
assert_eq!(actual.out, "2");
|
||||
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
echo ["A", "B", "A"]
|
||||
| wrap item
|
||||
| uniq-by item --count
|
||||
| flatten
|
||||
| where item == B
|
||||
| get count
|
||||
| get 0
|
||||
"#
|
||||
));
|
||||
assert_eq!(actual.out, "1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uniq_unique() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
echo [1 2 3 4 1 5]
|
||||
| wrap item
|
||||
| uniq-by item --unique
|
||||
| get item
|
||||
"#
|
||||
));
|
||||
let expected = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
echo [2 3 4 5]
|
||||
"#
|
||||
));
|
||||
print!("{}", actual.out);
|
||||
print!("{}", expected.out);
|
||||
assert_eq!(actual.out, expected.out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn table() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
[[fruit day]; [apple monday] [apple friday] [Apple friday] [apple monday] [pear monday] [orange tuesday]]
|
||||
| uniq-by fruit
|
||||
"#
|
||||
));
|
||||
|
||||
let expected = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
echo [[fruit day]; [apple monday] [Apple friday] [pear monday] [orange tuesday]]
|
||||
"#
|
||||
));
|
||||
print!("{}", actual.out);
|
||||
print!("{}", expected.out);
|
||||
assert_eq!(actual.out, expected.out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uniq_by_multiple_columns() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
[[fruit day]; [apple monday] [apple friday] [Apple friday] [apple monday] [pear monday] [orange tuesday]]
|
||||
| uniq-by fruit day
|
||||
"#
|
||||
));
|
||||
|
||||
let expected = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
echo [[fruit day]; [apple monday] [apple friday] [Apple friday] [pear monday] [orange tuesday]]
|
||||
"#
|
||||
));
|
||||
print!("{}", actual.out);
|
||||
print!("{}", expected.out);
|
||||
assert_eq!(actual.out, expected.out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn table_with_ignore_case() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
[[origin, people];
|
||||
[World, (
|
||||
[[name, meal];
|
||||
['Geremias', {plate: 'bitoque', carbs: 100}]
|
||||
]
|
||||
)],
|
||||
[World, (
|
||||
[[name, meal];
|
||||
['Martin', {plate: 'bitoque', carbs: 100}]
|
||||
]
|
||||
)],
|
||||
[World, (
|
||||
[[name, meal];
|
||||
['Geremias', {plate: 'Bitoque', carbs: 100}]
|
||||
]
|
||||
)],
|
||||
] | uniq-by people -i
|
||||
"#
|
||||
));
|
||||
|
||||
let expected = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
echo [[origin, people];
|
||||
[World, (
|
||||
[[name, meal];
|
||||
['Geremias', {plate: 'bitoque', carbs: 100}]
|
||||
]
|
||||
)],
|
||||
[World, (
|
||||
[[name, meal];
|
||||
['Martin', {plate: 'bitoque', carbs: 100}]
|
||||
]
|
||||
)],
|
||||
]
|
||||
"#
|
||||
));
|
||||
|
||||
print!("{}", actual.out);
|
||||
print!("{}", expected.out);
|
||||
assert_eq!(actual.out, expected.out);
|
||||
assert_eq!(actual.out, expected.out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_parameter() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
[11 22 33] | uniq-by
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual.err.contains("missing parameter"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_column() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
[[fruit day]; [apple monday] [apple friday]]
|
||||
| uniq-by column1
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual.err.contains("cannot find column 'column1'"));
|
||||
}
|
Loading…
Reference in New Issue
Block a user