diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 4fd26832c..9fab7c550 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -132,6 +132,7 @@ pub fn create_default_context() -> EngineState { SplitList, Transpose, Uniq, + UniqBy, Upsert, Update, UpdateCells, diff --git a/crates/nu-command/src/filters/mod.rs b/crates/nu-command/src/filters/mod.rs index 4d8fcba09..7aac8d7d5 100644 --- a/crates/nu-command/src/filters/mod.rs +++ b/crates/nu-command/src/filters/mod.rs @@ -41,6 +41,7 @@ mod split_by; mod take; mod transpose; mod uniq; +mod uniq_by; mod update; mod update_cells; mod upsert; @@ -93,6 +94,7 @@ pub use split_by::SplitBy; pub use take::*; pub use transpose::Transpose; pub use uniq::*; +pub use uniq_by::UniqBy; pub use update::Update; pub use update_cells::UpdateCells; pub use upsert::Upsert; diff --git a/crates/nu-command/src/filters/uniq.rs b/crates/nu-command/src/filters/uniq.rs index 68fe5b4a7..bcae597af 100644 --- a/crates/nu-command/src/filters/uniq.rs +++ b/crates/nu-command/src/filters/uniq.rs @@ -1,7 +1,8 @@ use nu_protocol::ast::Call; use nu_protocol::engine::{Command, EngineState, Stack}; use nu_protocol::{ - Category, Example, IntoPipelineData, PipelineData, Signature, Span, Type, Value, + Category, Example, IntoPipelineData, PipelineData, PipelineMetadata, Signature, Span, Type, + Value, }; #[derive(Clone)] @@ -63,7 +64,19 @@ impl Command for Uniq { call: &Call, input: PipelineData, ) -> Result { - uniq(engine_state, stack, call, input) + let mapper = Box::new(move |ms: ItemMapperState| -> ValueCounter { + item_mapper(ms.item, ms.flag_ignore_case) + }); + + let metadata = input.metadata(); + uniq( + engine_state, + stack, + call, + input.into_iter().collect(), + mapper, + metadata, + ) } fn examples(&self) -> Vec { @@ -123,7 +136,16 @@ impl Command for Uniq { } } -struct ValueCounter { +pub struct ItemMapperState { + pub item: Value, + pub flag_ignore_case: bool, +} + +fn item_mapper(item: Value, flag_ignore_case: bool) -> ValueCounter { + ValueCounter::new(item, flag_ignore_case) +} + +pub struct ValueCounter { val: Value, val_to_compare: Value, count: i64, @@ -137,12 +159,15 @@ impl PartialEq for ValueCounter { impl ValueCounter { fn new(val: Value, flag_ignore_case: bool) -> Self { + Self::new_vals_to_compare(val.clone(), flag_ignore_case, val) + } + pub fn new_vals_to_compare(val: Value, flag_ignore_case: bool, vals_to_compare: Value) -> Self { ValueCounter { - val: val.clone(), + val, val_to_compare: if flag_ignore_case { - clone_to_lowercase(&val) + clone_to_lowercase(&vals_to_compare) } else { - val + vals_to_compare }, count: 1, } @@ -193,22 +218,29 @@ fn generate_results_with_count(head: Span, uniq_values: Vec) -> Ve .collect() } -fn uniq( +pub fn uniq( _engine_state: &EngineState, _stack: &mut Stack, call: &Call, - input: PipelineData, + input: Vec, + item_mapper: Box ValueCounter>, + metadata: Option, ) -> Result { let head = call.head; let flag_show_count = call.has_flag("count"); let flag_show_repeated = call.has_flag("repeated"); let flag_ignore_case = call.has_flag("ignore-case"); let flag_only_uniques = call.has_flag("unique"); - let metadata = input.metadata(); + // let metadata = input.metadata(); let mut uniq_values = input .into_iter() - .map(|item| ValueCounter::new(item, flag_ignore_case)) + .map(|item| { + item_mapper(ItemMapperState { + item, + flag_ignore_case, + }) + }) .fold(Vec::::new(), |mut counter, item| { match counter .iter_mut() diff --git a/crates/nu-command/src/filters/uniq_by.rs b/crates/nu-command/src/filters/uniq_by.rs new file mode 100644 index 000000000..2358c0144 --- /dev/null +++ b/crates/nu-command/src/filters/uniq_by.rs @@ -0,0 +1,174 @@ +pub use super::uniq; +use nu_engine::column::nonexistent_column; +use nu_engine::CallExt; +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, +}; + +#[derive(Clone)] +pub struct UniqBy; + +impl Command for UniqBy { + fn name(&self) -> &str { + "uniq-by" + } + + fn signature(&self) -> Signature { + Signature::build("uniq-by") + .input_output_types(vec![(Type::Table(vec![]), Type::Table(vec![]))]) + .rest("columns", SyntaxShape::Any, "the column(s) to filter by") + .switch( + "count", + "Return a table containing the distinct input values together with their counts", + Some('c'), + ) + .switch( + "repeated", + "Return the input values that occur more than once", + Some('d'), + ) + .switch( + "ignore-case", + "Ignore differences in case when comparing input values", + Some('i'), + ) + .switch( + "unique", + "Return the input values that occur once only", + Some('u'), + ) + .category(Category::Filters) + } + + fn usage(&self) -> &str { + "Return the distinct values in the input by the given column(s)." + } + + fn search_terms(&self) -> Vec<&str> { + vec!["distinct", "deduplicate"] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let columns: Vec = call.rest(engine_state, stack, 0)?; + + if columns.is_empty() { + return Err(ShellError::MissingParameter("columns".into(), call.head)); + } + + let metadata = input.metadata(); + + let vec: Vec<_> = input.into_iter().collect(); + match validate(vec.clone(), &columns, call.head) { + Ok(_) => {} + Err(err) => { + return Err(err); + } + } + + let mapper = Box::new(item_mapper_by_col(columns)); + + uniq(engine_state, stack, call, vec, mapper, metadata) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Get rows from table filtered by column uniqueness ", + example: "[[fruit count]; [apple 9] [apple 2] [pear 3] [orange 7]] | uniq-by fruit", + result: Some(Value::List { + vals: vec![ + Value::test_record( + vec!["fruit", "count"], + vec![Value::test_string("apple"), Value::test_int(9)], + ), + Value::test_record( + vec!["fruit", "count"], + vec![Value::test_string("pear"), Value::test_int(3)], + ), + Value::test_record( + vec!["fruit", "count"], + vec![Value::test_string("orange"), Value::test_int(7)], + ), + ], + span: Span::test_data(), + }), + }] + } +} + +fn validate(vec: Vec, columns: &Vec, span: Span) -> Result<(), ShellError> { + if vec.is_empty() { + return Err(ShellError::GenericError( + "no values to work with".to_string(), + "".to_string(), + None, + Some("no values to work with".to_string()), + Vec::new(), + )); + } + + if let Value::Record { + cols, + vals: _input_vals, + span: val_span, + } = &vec[0] + { + if columns.is_empty() { + // This uses the same format as the 'requires a column name' error in split_by.rs + return Err(ShellError::GenericError( + "expected name".into(), + "requires a column name to filter table data".into(), + Some(span), + None, + Vec::new(), + )); + } + + if let Some(nonexistent) = nonexistent_column(columns.clone(), cols.to_vec()) { + return Err(ShellError::CantFindColumn(nonexistent, span, *val_span)); + } + } + + Ok(()) +} + +fn get_data_by_columns(columns: &[String], item: &Value) -> Vec { + columns + .iter() + .filter_map(|col| item.get_data_by_key(col)) + .collect::>() +} + +fn item_mapper_by_col(cols: Vec) -> impl Fn(crate::ItemMapperState) -> crate::ValueCounter { + let columns = cols; + + Box::new(move |ms: crate::ItemMapperState| -> crate::ValueCounter { + let item_column_values = get_data_by_columns(&columns, &ms.item); + + let col_vals = Value::List { + vals: item_column_values, + span: Span { start: 0, end: 0 }, + }; + + crate::ValueCounter::new_vals_to_compare(ms.item, ms.flag_ignore_case, col_vals) + }) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(UniqBy {}) + } +} diff --git a/crates/nu-command/tests/commands/mod.rs b/crates/nu-command/tests/commands/mod.rs index 63d68eca3..ee8b4a8af 100644 --- a/crates/nu-command/tests/commands/mod.rs +++ b/crates/nu-command/tests/commands/mod.rs @@ -88,6 +88,7 @@ mod touch; mod transpose; mod try_; mod uniq; +mod uniq_by; mod update; mod upsert; mod url; diff --git a/crates/nu-command/tests/commands/uniq_by.rs b/crates/nu-command/tests/commands/uniq_by.rs new file mode 100644 index 000000000..d94601ef7 --- /dev/null +++ b/crates/nu-command/tests/commands/uniq_by.rs @@ -0,0 +1,222 @@ +use nu_test_support::fs::Stub::FileWithContentToBeTrimmed; +use nu_test_support::playground::Playground; +use nu_test_support::{nu, pipeline}; + +#[test] +fn removes_duplicate_rows() { + Playground::setup("uniq_test_1", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.csv", + r#" + first_name,last_name,rusty_at,type + Andrés,Robalino,10/11/2013,A + Afonso,Turner,10/12/2013,B + Yehuda,Katz,10/11/2013,A + Jonathan,Turner,11/12/2011,O + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.csv + | uniq-by last_name + | length + + "# + )); + + assert_eq!(actual.out, "3"); + }) +} + +#[test] +fn uniq_when_keys_out_of_order() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + [{"a": "a", "b": [1,2,3]}, {"b": [1,2,3,4], "a": "a"}] + | uniq-by a + "# + )); + let expected = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + echo [{"a": "a", "b": [1,2,3]}] + "# + )); + + print!("{}", actual.out); + print!("{}", expected.out); + assert_eq!(actual.out, expected.out); + assert_eq!(actual.out, expected.out); +} + +#[test] +fn uniq_counting() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + ["A", "B", "A"] + | wrap item + | uniq-by item --count + | flatten + | where item == A + | get count + | get 0 + "# + )); + assert_eq!(actual.out, "2"); + + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + echo ["A", "B", "A"] + | wrap item + | uniq-by item --count + | flatten + | where item == B + | get count + | get 0 + "# + )); + assert_eq!(actual.out, "1"); +} + +#[test] +fn uniq_unique() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + echo [1 2 3 4 1 5] + | wrap item + | uniq-by item --unique + | get item + "# + )); + let expected = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + echo [2 3 4 5] + "# + )); + print!("{}", actual.out); + print!("{}", expected.out); + assert_eq!(actual.out, expected.out); +} + +#[test] +fn table() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + [[fruit day]; [apple monday] [apple friday] [Apple friday] [apple monday] [pear monday] [orange tuesday]] + | uniq-by fruit + "# + )); + + let expected = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + echo [[fruit day]; [apple monday] [Apple friday] [pear monday] [orange tuesday]] + "# + )); + print!("{}", actual.out); + print!("{}", expected.out); + assert_eq!(actual.out, expected.out); +} + +#[test] +fn uniq_by_multiple_columns() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + [[fruit day]; [apple monday] [apple friday] [Apple friday] [apple monday] [pear monday] [orange tuesday]] + | uniq-by fruit day + "# + )); + + let expected = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + echo [[fruit day]; [apple monday] [apple friday] [Apple friday] [pear monday] [orange tuesday]] + "# + )); + print!("{}", actual.out); + print!("{}", expected.out); + assert_eq!(actual.out, expected.out); +} + +#[test] +fn table_with_ignore_case() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + [[origin, people]; + [World, ( + [[name, meal]; + ['Geremias', {plate: 'bitoque', carbs: 100}] + ] + )], + [World, ( + [[name, meal]; + ['Martin', {plate: 'bitoque', carbs: 100}] + ] + )], + [World, ( + [[name, meal]; + ['Geremias', {plate: 'Bitoque', carbs: 100}] + ] + )], + ] | uniq-by people -i + "# + )); + + let expected = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + echo [[origin, people]; + [World, ( + [[name, meal]; + ['Geremias', {plate: 'bitoque', carbs: 100}] + ] + )], + [World, ( + [[name, meal]; + ['Martin', {plate: 'bitoque', carbs: 100}] + ] + )], + ] + "# + )); + + print!("{}", actual.out); + print!("{}", expected.out); + assert_eq!(actual.out, expected.out); + assert_eq!(actual.out, expected.out); +} + +#[test] +fn missing_parameter() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + [11 22 33] | uniq-by + "# + )); + + assert!(actual.err.contains("missing parameter")); +} + +#[test] +fn wrong_column() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + [[fruit day]; [apple monday] [apple friday]] + | uniq-by column1 + "# + )); + + assert!(actual.err.contains("cannot find column 'column1'")); +}