Uniq: --count flag to count occurences (#2017)

* uniq: Add counting option (WIP!)

Usage:

fetch https://raw.githubusercontent.com/timbray/topfew/master/test/data/access-1k | lines | wrap item | uniq | sort-by count | last 10

* uniq: Add first test

* uniq: Re-enable the non-counting variant.

* uniq: Also handle primitive lines.

* uniq: Update documentation

* uniq: Final comment about error handling. Let's get some feedback

* uniq: Address review comments.

Not happy with the way I create a TypeError. There must be a cleaner
way. Anyway, good for shipping.

* uniq: Use Labeled_error as suggested by jturner in chat.

* uniq: Return error directly.

Co-authored-by: Christoph Siedentop <christoph@siedentop.name>
This commit is contained in:
siedentop 2020-06-20 17:22:06 -07:00 committed by GitHub
parent 480600c465
commit bce6f5a3e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 95 additions and 10 deletions

View File

@ -1,9 +1,9 @@
use crate::commands::WholeStreamCommand; use crate::commands::WholeStreamCommand;
use crate::context::CommandRegistry; use crate::context::CommandRegistry;
use crate::prelude::*; use crate::prelude::*;
use indexmap::set::IndexSet; use indexmap::map::IndexMap;
use nu_errors::ShellError; use nu_errors::ShellError;
use nu_protocol::{ReturnSuccess, Signature}; use nu_protocol::Signature;
pub struct Uniq; pub struct Uniq;
@ -14,7 +14,7 @@ impl WholeStreamCommand for Uniq {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("uniq") Signature::build("uniq").switch("count", "Count the unique rows", Some('c'))
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
@ -30,17 +30,66 @@ impl WholeStreamCommand for Uniq {
} }
} }
async fn uniq(args: CommandArgs, _registry: &CommandRegistry) -> Result<OutputStream, ShellError> { async fn uniq(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
let args = args.evaluate_once(&registry).await?;
let should_show_count = args.has("count");
let input = args.input; let input = args.input;
let uniq_values: IndexSet<_> = input.collect().await; let uniq_values = {
let mut counter = IndexMap::<nu_protocol::Value, usize>::new();
for line in input.into_vec().await {
*counter.entry(line).or_insert(0) += 1;
}
counter
};
let mut values_vec_deque = VecDeque::new(); let mut values_vec_deque = VecDeque::new();
for item in uniq_values if should_show_count {
.iter() for item in uniq_values {
.map(|row| ReturnSuccess::value(row.clone())) use nu_protocol::{UntaggedValue, Value};
{ let value = {
values_vec_deque.push_back(item); match item.0.value {
UntaggedValue::Row(mut row) => {
row.entries.insert(
"count".to_string(),
UntaggedValue::int(item.1).into_untagged_value(),
);
Value {
value: UntaggedValue::Row(row),
tag: item.0.tag,
}
}
UntaggedValue::Primitive(p) => {
let mut map = IndexMap::<String, Value>::new();
map.insert(
"value".to_string(),
UntaggedValue::Primitive(p).into_untagged_value(),
);
map.insert(
"count".to_string(),
UntaggedValue::int(item.1).into_untagged_value(),
);
Value {
value: UntaggedValue::row(map),
tag: item.0.tag,
}
}
UntaggedValue::Table(_) => {
return Err(ShellError::labeled_error(
"uniq -c cannot operate on tables.",
"source",
item.0.tag.span,
))
}
UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0,
}
};
values_vec_deque.push_back(value);
}
} else {
for item in uniq_values {
values_vec_deque.push_back(item.0);
}
} }
Ok(futures::stream::iter(values_vec_deque).to_output_stream()) Ok(futures::stream::iter(values_vec_deque).to_output_stream())

View File

@ -140,3 +140,26 @@ fn uniq_when_keys_out_of_order() {
assert_eq!(actual.out, "1"); assert_eq!(actual.out, "1");
} }
#[test]
fn uniq_counting() {
let actual = nu!(
cwd: "tests/fixtures/formats", pipeline(
r#"
echo '["A", "B", "A"]'
| from json
| wrap item
| uniq --count
"#
));
let expected = nu!(
cwd: "tests/fixtures/formats", pipeline(
r#"
echo '[{"item": "A", "count": 2}, {"item": "B", "count": 1}]'
| from json
"#
));
print!("{}", actual.out);
print!("{}", expected.out);
assert_eq!(actual.out, expected.out);
}

View File

@ -34,3 +34,16 @@ Yehuda,Katz,10/11/2013,A
1 │ B 1 │ B
━━━┷━━━━━━━━━ ━━━┷━━━━━━━━━
``` ```
### Counting
`--count` or `-c` is the flag to output a `count` column.
```
> `open test.csv | get type | uniq -c`
───┬───────┬───────
# │ value │ count
───┼───────┼───────
0 │ A │ 3
1 │ B │ 2
───┴───────┴───────
```