mirror of
https://github.com/nushell/nushell.git
synced 2025-01-22 14:18:55 +01:00
Uniq: --count
flag to count occurences (#2017)
* uniq: Add counting option (WIP!) Usage: fetch https://raw.githubusercontent.com/timbray/topfew/master/test/data/access-1k | lines | wrap item | uniq | sort-by count | last 10 * uniq: Add first test * uniq: Re-enable the non-counting variant. * uniq: Also handle primitive lines. * uniq: Update documentation * uniq: Final comment about error handling. Let's get some feedback * uniq: Address review comments. Not happy with the way I create a TypeError. There must be a cleaner way. Anyway, good for shipping. * uniq: Use Labeled_error as suggested by jturner in chat. * uniq: Return error directly. Co-authored-by: Christoph Siedentop <christoph@siedentop.name>
This commit is contained in:
parent
480600c465
commit
bce6f5a3e6
@ -1,9 +1,9 @@
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::context::CommandRegistry;
|
||||
use crate::prelude::*;
|
||||
use indexmap::set::IndexSet;
|
||||
use indexmap::map::IndexMap;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{ReturnSuccess, Signature};
|
||||
use nu_protocol::Signature;
|
||||
|
||||
pub struct Uniq;
|
||||
|
||||
@ -14,7 +14,7 @@ impl WholeStreamCommand for Uniq {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("uniq")
|
||||
Signature::build("uniq").switch("count", "Count the unique rows", Some('c'))
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
@ -30,17 +30,66 @@ impl WholeStreamCommand for Uniq {
|
||||
}
|
||||
}
|
||||
|
||||
async fn uniq(args: CommandArgs, _registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
|
||||
async fn uniq(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
|
||||
let args = args.evaluate_once(®istry).await?;
|
||||
let should_show_count = args.has("count");
|
||||
let input = args.input;
|
||||
let uniq_values: IndexSet<_> = input.collect().await;
|
||||
let uniq_values = {
|
||||
let mut counter = IndexMap::<nu_protocol::Value, usize>::new();
|
||||
for line in input.into_vec().await {
|
||||
*counter.entry(line).or_insert(0) += 1;
|
||||
}
|
||||
counter
|
||||
};
|
||||
|
||||
let mut values_vec_deque = VecDeque::new();
|
||||
|
||||
for item in uniq_values
|
||||
.iter()
|
||||
.map(|row| ReturnSuccess::value(row.clone()))
|
||||
{
|
||||
values_vec_deque.push_back(item);
|
||||
if should_show_count {
|
||||
for item in uniq_values {
|
||||
use nu_protocol::{UntaggedValue, Value};
|
||||
let value = {
|
||||
match item.0.value {
|
||||
UntaggedValue::Row(mut row) => {
|
||||
row.entries.insert(
|
||||
"count".to_string(),
|
||||
UntaggedValue::int(item.1).into_untagged_value(),
|
||||
);
|
||||
Value {
|
||||
value: UntaggedValue::Row(row),
|
||||
tag: item.0.tag,
|
||||
}
|
||||
}
|
||||
UntaggedValue::Primitive(p) => {
|
||||
let mut map = IndexMap::<String, Value>::new();
|
||||
map.insert(
|
||||
"value".to_string(),
|
||||
UntaggedValue::Primitive(p).into_untagged_value(),
|
||||
);
|
||||
map.insert(
|
||||
"count".to_string(),
|
||||
UntaggedValue::int(item.1).into_untagged_value(),
|
||||
);
|
||||
Value {
|
||||
value: UntaggedValue::row(map),
|
||||
tag: item.0.tag,
|
||||
}
|
||||
}
|
||||
UntaggedValue::Table(_) => {
|
||||
return Err(ShellError::labeled_error(
|
||||
"uniq -c cannot operate on tables.",
|
||||
"source",
|
||||
item.0.tag.span,
|
||||
))
|
||||
}
|
||||
UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0,
|
||||
}
|
||||
};
|
||||
values_vec_deque.push_back(value);
|
||||
}
|
||||
} else {
|
||||
for item in uniq_values {
|
||||
values_vec_deque.push_back(item.0);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(futures::stream::iter(values_vec_deque).to_output_stream())
|
||||
|
@ -140,3 +140,26 @@ fn uniq_when_keys_out_of_order() {
|
||||
|
||||
assert_eq!(actual.out, "1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uniq_counting() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
echo '["A", "B", "A"]'
|
||||
| from json
|
||||
| wrap item
|
||||
| uniq --count
|
||||
"#
|
||||
));
|
||||
let expected = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
echo '[{"item": "A", "count": 2}, {"item": "B", "count": 1}]'
|
||||
| from json
|
||||
"#
|
||||
));
|
||||
print!("{}", actual.out);
|
||||
print!("{}", expected.out);
|
||||
assert_eq!(actual.out, expected.out);
|
||||
}
|
||||
|
@ -34,3 +34,16 @@ Yehuda,Katz,10/11/2013,A
|
||||
1 │ B
|
||||
━━━┷━━━━━━━━━
|
||||
```
|
||||
|
||||
### Counting
|
||||
`--count` or `-c` is the flag to output a `count` column.
|
||||
|
||||
```
|
||||
> `open test.csv | get type | uniq -c`
|
||||
───┬───────┬───────
|
||||
# │ value │ count
|
||||
───┼───────┼───────
|
||||
0 │ A │ 3
|
||||
1 │ B │ 2
|
||||
───┴───────┴───────
|
||||
```
|
Loading…
Reference in New Issue
Block a user