mirror of
https://github.com/nushell/nushell.git
synced 2025-01-11 00:38:23 +01:00
Add uniq command (#1132)
* start playing with ways to use the uniq command * WIP * Got uniq working, but still need to figure out args issue and add tests * Add some tests for uniq * fmt * remove commented out code * Add documentation and some additional tests showing uniq values and rows. Also removed args TODO * add changes that didn't get committed * whoops, I didn't save the docs correctly... * fmt * Add a test for uniq with nested json * Add another test * Fix unique-ness when json keys are out of order and make the test json more complicated
This commit is contained in:
parent
dba82ac530
commit
f37f29b441
@ -23,7 +23,7 @@ use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||
pub enum UntaggedValue {
|
||||
Primitive(Primitive),
|
||||
Row(Dictionary),
|
||||
@ -182,7 +182,7 @@ impl UntaggedValue {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct Value {
|
||||
pub value: UntaggedValue,
|
||||
pub tag: Tag,
|
||||
|
@ -7,6 +7,7 @@ use indexmap::IndexMap;
|
||||
use nu_source::{b, DebugDocBuilder, PrettyDebug, Spanned, Tag};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::cmp::{Ord, Ordering, PartialOrd};
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq, Clone, Getters, new)]
|
||||
pub struct Dictionary {
|
||||
@ -14,6 +15,15 @@ pub struct Dictionary {
|
||||
pub entries: IndexMap<String, Value>,
|
||||
}
|
||||
|
||||
impl Hash for Dictionary {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
let mut entries = self.entries.clone();
|
||||
entries.sort_keys();
|
||||
entries.keys().collect::<Vec<&String>>().hash(state);
|
||||
entries.values().collect::<Vec<&Value>>().hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Dictionary {
|
||||
fn partial_cmp(&self, other: &Dictionary) -> Option<Ordering> {
|
||||
let this: Vec<&String> = self.entries.keys().collect();
|
||||
|
@ -12,7 +12,7 @@ use num_traits::cast::{FromPrimitive, ToPrimitive};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Deserialize, Serialize)]
|
||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Deserialize, Serialize)]
|
||||
pub enum Primitive {
|
||||
Nothing,
|
||||
#[serde(with = "serde_bigint")]
|
||||
|
@ -3,7 +3,7 @@ use derive_new::new;
|
||||
use nu_source::{b, DebugDocBuilder, Spanned};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize, Hash)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||
pub enum RangeInclusion {
|
||||
Inclusive,
|
||||
Exclusive,
|
||||
@ -25,7 +25,7 @@ impl RangeInclusion {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize, new)]
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize, new)]
|
||||
pub struct Range {
|
||||
pub from: (Spanned<Primitive>, RangeInclusion),
|
||||
pub to: (Spanned<Primitive>, RangeInclusion),
|
||||
|
36
docs/commands/uniq.rs
Normal file
36
docs/commands/uniq.rs
Normal file
@ -0,0 +1,36 @@
|
||||
# uniq
|
||||
|
||||
Returns unique rows or values from a dataset.
|
||||
|
||||
## Examples
|
||||
|
||||
Given a file `test.csv`
|
||||
|
||||
```
|
||||
first_name,last_name,rusty_at,type
|
||||
Andrés,Robalino,10/11/2013,A
|
||||
Andrés,Robalino,10/11/2013,A
|
||||
Jonathan,Turner,10/12/2013,B
|
||||
Yehuda,Katz,10/11/2013,A
|
||||
```
|
||||
|
||||
```
|
||||
> `open test.csv | uniq`
|
||||
━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━
|
||||
# │ first_name │ last_name │ rusty_at │ type
|
||||
───┼────────────┼───────────┼────────────┼──────
|
||||
0 │ Andrés │ Robalino │ 10/11/2013 │ A
|
||||
1 │ Jonathan │ Turner │ 10/12/2013 │ B
|
||||
2 │ Yehuda │ Katz │ 10/11/2013 │ A
|
||||
━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━
|
||||
```
|
||||
|
||||
```
|
||||
> `open test.csv | get type | uniq`
|
||||
━━━┯━━━━━━━━━
|
||||
# │ <value>
|
||||
───┼─────────
|
||||
0 │ A
|
||||
1 │ B
|
||||
━━━┷━━━━━━━━━
|
||||
```
|
@ -294,6 +294,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
|
||||
whole_stream_command(Default),
|
||||
whole_stream_command(SkipWhile),
|
||||
whole_stream_command(Range),
|
||||
whole_stream_command(Uniq),
|
||||
// Table manipulation
|
||||
whole_stream_command(Wrap),
|
||||
whole_stream_command(Pivot),
|
||||
|
@ -90,6 +90,7 @@ pub(crate) mod to_tsv;
|
||||
pub(crate) mod to_url;
|
||||
pub(crate) mod to_yaml;
|
||||
pub(crate) mod trim;
|
||||
pub(crate) mod uniq;
|
||||
pub(crate) mod version;
|
||||
pub(crate) mod what;
|
||||
pub(crate) mod where_;
|
||||
@ -185,6 +186,7 @@ pub(crate) use to_tsv::ToTSV;
|
||||
pub(crate) use to_url::ToURL;
|
||||
pub(crate) use to_yaml::ToYAML;
|
||||
pub(crate) use trim::Trim;
|
||||
pub(crate) use uniq::Uniq;
|
||||
pub(crate) use version::Version;
|
||||
pub(crate) use what::What;
|
||||
pub(crate) use where_::Where;
|
||||
|
48
src/commands/uniq.rs
Normal file
48
src/commands/uniq.rs
Normal file
@ -0,0 +1,48 @@
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::context::CommandRegistry;
|
||||
use crate::prelude::*;
|
||||
use indexmap::set::IndexSet;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{ReturnSuccess, Signature};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct UniqArgs {}
|
||||
|
||||
pub struct Uniq;
|
||||
|
||||
impl WholeStreamCommand for Uniq {
|
||||
fn name(&self) -> &str {
|
||||
"uniq"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("uniq")
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Return the unique rows"
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
args: CommandArgs,
|
||||
registry: &CommandRegistry,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
args.process(registry, uniq)?.run()
|
||||
}
|
||||
}
|
||||
|
||||
fn uniq(
|
||||
UniqArgs {}: UniqArgs,
|
||||
RunnableContext { input, .. }: RunnableContext,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let stream = async_stream! {
|
||||
let uniq_values: IndexSet<_> = input.values.collect().await;
|
||||
|
||||
for item in uniq_values.iter().map(|row| ReturnSuccess::value(row.clone())) {
|
||||
yield item;
|
||||
}
|
||||
};
|
||||
|
||||
Ok(stream.to_output_stream())
|
||||
}
|
@ -325,6 +325,10 @@ mod tests {
|
||||
loc: fixtures().join("jonathan.xml"),
|
||||
at: 0
|
||||
},
|
||||
Res {
|
||||
loc: fixtures().join("nested_uniq.json"),
|
||||
at: 0
|
||||
},
|
||||
Res {
|
||||
loc: fixtures().join("sample.bson"),
|
||||
at: 0
|
||||
|
@ -26,5 +26,6 @@ mod save;
|
||||
mod sort_by;
|
||||
mod split_by;
|
||||
mod split_column;
|
||||
mod uniq;
|
||||
mod where_;
|
||||
mod wrap;
|
||||
|
118
tests/commands/uniq.rs
Normal file
118
tests/commands/uniq.rs
Normal file
@ -0,0 +1,118 @@
|
||||
use nu_test_support::fs::Stub::FileWithContentToBeTrimmed;
|
||||
use nu_test_support::playground::Playground;
|
||||
use nu_test_support::{nu, pipeline};
|
||||
|
||||
#[test]
|
||||
fn uniq_rows() {
|
||||
Playground::setup("uniq_test_1", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.csv",
|
||||
r#"
|
||||
first_name,last_name,rusty_at,type
|
||||
Andrés,Robalino,10/11/2013,A
|
||||
Jonathan,Turner,10/12/2013,B
|
||||
Yehuda,Katz,10/11/2013,A
|
||||
Jonathan,Turner,10/12/2013,B
|
||||
Yehuda,Katz,10/11/2013,A
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open los_tres_caballeros.csv
|
||||
| uniq
|
||||
| count
|
||||
| echo $it
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual, "3");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uniq_columns() {
|
||||
Playground::setup("uniq_test_2", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.csv",
|
||||
r#"
|
||||
first_name,last_name,rusty_at,type
|
||||
Andrés,Robalino,10/11/2013,A
|
||||
Jonathan,Turner,10/12/2013,B
|
||||
Yehuda,Katz,10/11/2013,A
|
||||
Jonathan,Turner,10/12/2013,B
|
||||
Yehuda,Katz,10/11/2013,A
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open los_tres_caballeros.csv
|
||||
| pick rusty_at type
|
||||
| uniq
|
||||
| count
|
||||
| echo $it
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual, "2");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uniq_values() {
|
||||
Playground::setup("uniq_test_3", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.csv",
|
||||
r#"
|
||||
first_name,last_name,rusty_at,type
|
||||
Andrés,Robalino,10/11/2013,A
|
||||
Jonathan,Turner,10/12/2013,B
|
||||
Yehuda,Katz,10/11/2013,A
|
||||
Jonathan,Turner,10/12/2013,B
|
||||
Yehuda,Katz,10/11/2013,A
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open los_tres_caballeros.csv
|
||||
| pick get type
|
||||
| uniq
|
||||
| count
|
||||
| echo $it
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual, "2");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uniq_when_keys_out_of_order() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
echo '[{"a": "a", "b": [1,2,3]},{"b": [1,2,3], "a": "a"}]'
|
||||
| from-json
|
||||
| uniq
|
||||
| count
|
||||
| echo $it
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual, "1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uniq_nested_json_structures() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats",
|
||||
"open nested_uniq.json | uniq | count | echo $it"
|
||||
);
|
||||
|
||||
assert_eq!(actual, "3");
|
||||
}
|
@ -7,7 +7,7 @@ fn filters_by_unit_size_comparison() {
|
||||
"ls | where size > 1kb | sort-by size | get name | first 1 | trim | echo $it"
|
||||
);
|
||||
|
||||
assert_eq!(actual, "cargo_sample.toml");
|
||||
assert_eq!(actual, "nested_uniq.json");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
72
tests/fixtures/formats/nested_uniq.json
vendored
Normal file
72
tests/fixtures/formats/nested_uniq.json
vendored
Normal file
@ -0,0 +1,72 @@
|
||||
[
|
||||
{
|
||||
"name": "this is duplicated",
|
||||
"nesting": [
|
||||
{
|
||||
"a": "a",
|
||||
"b": "b"
|
||||
},
|
||||
{
|
||||
"c": "c",
|
||||
"d": "d"
|
||||
}
|
||||
],
|
||||
"can_be_ordered_differently": {
|
||||
"array": [1, 2, 3, 4, 5],
|
||||
"something": { "else": "works" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"can_be_ordered_differently": {
|
||||
"something": { "else": "works" },
|
||||
"array": [1, 2, 3, 4, 5]
|
||||
},
|
||||
"nesting": [
|
||||
{
|
||||
"b": "b",
|
||||
"a": "a"
|
||||
},
|
||||
{
|
||||
"d": "d",
|
||||
"c": "c"
|
||||
}
|
||||
],
|
||||
"name": "this is duplicated"
|
||||
},
|
||||
{
|
||||
"name": "this is unique",
|
||||
"nesting": [
|
||||
{
|
||||
"a": "b",
|
||||
"b": "a"
|
||||
},
|
||||
{
|
||||
"c": "d",
|
||||
"d": "c"
|
||||
}
|
||||
],
|
||||
"can_be_ordered_differently": {
|
||||
"array": [],
|
||||
"something": { "else": "does not work" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "this is unique",
|
||||
"nesting": [
|
||||
{
|
||||
"a": "a",
|
||||
"b": "b",
|
||||
"c": "c"
|
||||
},
|
||||
{
|
||||
"d": "d",
|
||||
"e": "e",
|
||||
"f": "f"
|
||||
}
|
||||
],
|
||||
"can_be_ordered_differently": {
|
||||
"array": [],
|
||||
"something": { "else": "works" }
|
||||
}
|
||||
}
|
||||
]
|
Loading…
Reference in New Issue
Block a user