mirror of
https://github.com/nushell/nushell.git
synced 2025-01-22 22:29:10 +01:00
Uniq: --count
flag to count occurences (#2017)
* uniq: Add counting option (WIP!) Usage: fetch https://raw.githubusercontent.com/timbray/topfew/master/test/data/access-1k | lines | wrap item | uniq | sort-by count | last 10 * uniq: Add first test * uniq: Re-enable the non-counting variant. * uniq: Also handle primitive lines. * uniq: Update documentation * uniq: Final comment about error handling. Let's get some feedback * uniq: Address review comments. Not happy with the way I create a TypeError. There must be a cleaner way. Anyway, good for shipping. * uniq: Use Labeled_error as suggested by jturner in chat. * uniq: Return error directly. Co-authored-by: Christoph Siedentop <christoph@siedentop.name>
This commit is contained in:
parent
480600c465
commit
bce6f5a3e6
@ -1,9 +1,9 @@
|
|||||||
use crate::commands::WholeStreamCommand;
|
use crate::commands::WholeStreamCommand;
|
||||||
use crate::context::CommandRegistry;
|
use crate::context::CommandRegistry;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use indexmap::set::IndexSet;
|
use indexmap::map::IndexMap;
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_protocol::{ReturnSuccess, Signature};
|
use nu_protocol::Signature;
|
||||||
|
|
||||||
pub struct Uniq;
|
pub struct Uniq;
|
||||||
|
|
||||||
@ -14,7 +14,7 @@ impl WholeStreamCommand for Uniq {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build("uniq")
|
Signature::build("uniq").switch("count", "Count the unique rows", Some('c'))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn usage(&self) -> &str {
|
fn usage(&self) -> &str {
|
||||||
@ -30,17 +30,66 @@ impl WholeStreamCommand for Uniq {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn uniq(args: CommandArgs, _registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
|
async fn uniq(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
|
||||||
|
let args = args.evaluate_once(®istry).await?;
|
||||||
|
let should_show_count = args.has("count");
|
||||||
let input = args.input;
|
let input = args.input;
|
||||||
let uniq_values: IndexSet<_> = input.collect().await;
|
let uniq_values = {
|
||||||
|
let mut counter = IndexMap::<nu_protocol::Value, usize>::new();
|
||||||
|
for line in input.into_vec().await {
|
||||||
|
*counter.entry(line).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
counter
|
||||||
|
};
|
||||||
|
|
||||||
let mut values_vec_deque = VecDeque::new();
|
let mut values_vec_deque = VecDeque::new();
|
||||||
|
|
||||||
for item in uniq_values
|
if should_show_count {
|
||||||
.iter()
|
for item in uniq_values {
|
||||||
.map(|row| ReturnSuccess::value(row.clone()))
|
use nu_protocol::{UntaggedValue, Value};
|
||||||
{
|
let value = {
|
||||||
values_vec_deque.push_back(item);
|
match item.0.value {
|
||||||
|
UntaggedValue::Row(mut row) => {
|
||||||
|
row.entries.insert(
|
||||||
|
"count".to_string(),
|
||||||
|
UntaggedValue::int(item.1).into_untagged_value(),
|
||||||
|
);
|
||||||
|
Value {
|
||||||
|
value: UntaggedValue::Row(row),
|
||||||
|
tag: item.0.tag,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
UntaggedValue::Primitive(p) => {
|
||||||
|
let mut map = IndexMap::<String, Value>::new();
|
||||||
|
map.insert(
|
||||||
|
"value".to_string(),
|
||||||
|
UntaggedValue::Primitive(p).into_untagged_value(),
|
||||||
|
);
|
||||||
|
map.insert(
|
||||||
|
"count".to_string(),
|
||||||
|
UntaggedValue::int(item.1).into_untagged_value(),
|
||||||
|
);
|
||||||
|
Value {
|
||||||
|
value: UntaggedValue::row(map),
|
||||||
|
tag: item.0.tag,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
UntaggedValue::Table(_) => {
|
||||||
|
return Err(ShellError::labeled_error(
|
||||||
|
"uniq -c cannot operate on tables.",
|
||||||
|
"source",
|
||||||
|
item.0.tag.span,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
values_vec_deque.push_back(value);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for item in uniq_values {
|
||||||
|
values_vec_deque.push_back(item.0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(futures::stream::iter(values_vec_deque).to_output_stream())
|
Ok(futures::stream::iter(values_vec_deque).to_output_stream())
|
||||||
|
@ -140,3 +140,26 @@ fn uniq_when_keys_out_of_order() {
|
|||||||
|
|
||||||
assert_eq!(actual.out, "1");
|
assert_eq!(actual.out, "1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn uniq_counting() {
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: "tests/fixtures/formats", pipeline(
|
||||||
|
r#"
|
||||||
|
echo '["A", "B", "A"]'
|
||||||
|
| from json
|
||||||
|
| wrap item
|
||||||
|
| uniq --count
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
let expected = nu!(
|
||||||
|
cwd: "tests/fixtures/formats", pipeline(
|
||||||
|
r#"
|
||||||
|
echo '[{"item": "A", "count": 2}, {"item": "B", "count": 1}]'
|
||||||
|
| from json
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
print!("{}", actual.out);
|
||||||
|
print!("{}", expected.out);
|
||||||
|
assert_eq!(actual.out, expected.out);
|
||||||
|
}
|
||||||
|
@ -34,3 +34,16 @@ Yehuda,Katz,10/11/2013,A
|
|||||||
1 │ B
|
1 │ B
|
||||||
━━━┷━━━━━━━━━
|
━━━┷━━━━━━━━━
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Counting
|
||||||
|
`--count` or `-c` is the flag to output a `count` column.
|
||||||
|
|
||||||
|
```
|
||||||
|
> `open test.csv | get type | uniq -c`
|
||||||
|
───┬───────┬───────
|
||||||
|
# │ value │ count
|
||||||
|
───┼───────┼───────
|
||||||
|
0 │ A │ 3
|
||||||
|
1 │ B │ 2
|
||||||
|
───┴───────┴───────
|
||||||
|
```
|
Loading…
Reference in New Issue
Block a user