allow group-by and split-by to work with other values (#14086)

# Description

This PR updates `group-by` and `split-by` to allow other nushell Values
to be used, namely bools.

### Before
```nushell
❯ [false, false, true, false, true, false] | group-by | table -e
Error: nu:🐚:cant_convert

  × Can't convert to string.
   ╭─[entry #1:1:2]
 1 │ [false, false, true, false, true, false] | group-by | table -e
   ·  ──┬──
   ·    ╰── can't convert bool to string
   ╰────
```
### After
```nushell
❯ [false, false, true, false, true, false] | group-by | table -e
╭───────┬───────────────╮
│       │ ╭───┬───────╮ │
│ false │ │ 0 │ false │ │
│       │ │ 1 │ false │ │
│       │ │ 2 │ false │ │
│       │ │ 3 │ false │ │
│       │ ╰───┴───────╯ │
│       │ ╭───┬──────╮  │
│ true  │ │ 0 │ true │  │
│       │ │ 1 │ true │  │
│       │ ╰───┴──────╯  │
╰───────┴───────────────╯
```

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
Darren Schroeder 2024-10-17 16:14:01 -05:00 committed by GitHub
parent 5e784d38eb
commit 299d199150
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 70 additions and 43 deletions

View File

@ -38,6 +38,14 @@ impl Command for GroupBy {
"Splits a list or table into groups, and returns a record containing those groups."
}
fn extra_description(&self) -> &str {
r#"the group-by command makes some assumptions:
- if the input data is not a string, the grouper will convert the key to string but the values will remain in their original format. e.g. with bools, "true" and true would be in the same group (see example).
- datetime is formatted based on your configuration setting. use `format date` to change the format.
- filesize is formatted based on your configuration setting. use `format filesize` to change the format.
- some nushell values are not supported, such as closures."#
}
fn run(
&self,
engine_state: &EngineState,
@ -114,6 +122,20 @@ impl Command for GroupBy {
}),
])),
},
Example {
description: "Group bools, whether they are strings or actual bools",
example: r#"[true "true" false "false"] | group-by"#,
result: Some(Value::test_record(record! {
"true" => Value::test_list(vec![
Value::test_bool(true),
Value::test_string("true"),
]),
"false" => Value::test_list(vec![
Value::test_bool(false),
Value::test_string("false"),
]),
})),
}
]
}
}
@ -127,6 +149,7 @@ pub fn group_by(
let head = call.head;
let grouper: Option<Value> = call.opt(engine_state, stack, 0)?;
let to_table = call.has_flag(engine_state, stack, "to-table")?;
let config = engine_state.get_config();
let values: Vec<Value> = input.into_iter().collect();
if values.is_empty() {
@ -137,7 +160,7 @@ pub fn group_by(
Some(grouper) => {
let span = grouper.span();
match grouper {
Value::CellPath { val, .. } => group_cell_path(val, values)?,
Value::CellPath { val, .. } => group_cell_path(val, values, config)?,
Value::Closure { val, .. } => {
group_closure(values, span, *val, engine_state, stack)?
}
@ -149,7 +172,7 @@ pub fn group_by(
}
}
}
None => group_no_grouper(values)?,
None => group_no_grouper(values, config)?,
};
let value = if to_table {
@ -164,6 +187,7 @@ pub fn group_by(
fn group_cell_path(
column_name: CellPath,
values: Vec<Value>,
config: &nu_protocol::Config,
) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
let mut groups = IndexMap::<_, Vec<_>>::new();
@ -176,18 +200,21 @@ fn group_cell_path(
continue; // likely the result of a failed optional access, ignore this value
}
let key = key.coerce_string()?;
let key = key.to_abbreviated_string(config);
groups.entry(key).or_default().push(value);
}
Ok(groups)
}
fn group_no_grouper(values: Vec<Value>) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
fn group_no_grouper(
values: Vec<Value>,
config: &nu_protocol::Config,
) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
let mut groups = IndexMap::<_, Vec<_>>::new();
for value in values.into_iter() {
let key = value.coerce_string()?;
let key = value.to_abbreviated_string(config);
groups.entry(key).or_default().push(value);
}
@ -203,12 +230,13 @@ fn group_closure(
) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
let mut groups = IndexMap::<_, Vec<_>>::new();
let mut closure = ClosureEval::new(engine_state, stack, closure);
let config = engine_state.get_config();
for value in values {
let key = closure
.run_with_value(value.clone())?
.into_value(span)?
.coerce_into_string()?;
.to_abbreviated_string(config);
groups.entry(key).or_default().push(value);
}

View File

@ -84,16 +84,16 @@ pub fn split_by(
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let name = call.head;
let config = engine_state.get_config();
let splitter: Option<Value> = call.opt(engine_state, stack, 0)?;
match splitter {
Some(v) => {
let splitter = Some(Spanned {
item: v.coerce_into_string()?,
item: v.to_abbreviated_string(config),
span: name,
});
Ok(split(splitter.as_ref(), input, name)?)
Ok(split(splitter.as_ref(), input, name, config)?)
}
// This uses the same format as the 'requires a column name' error in sort_utils.rs
None => Err(ShellError::GenericError {
@ -110,6 +110,7 @@ pub fn split(
column_name: Option<&Spanned<String>>,
values: PipelineData,
span: Span,
config: &nu_protocol::Config,
) -> Result<PipelineData, ShellError> {
let grouper = if let Some(column_name) = column_name {
Grouper::ByColumn(Some(column_name.clone()))
@ -127,7 +128,7 @@ pub fn split(
};
match group_key {
Some(group_key) => Ok(group_key.coerce_string()?),
Some(group_key) => Ok(group_key.to_abbreviated_string(config)),
None => Err(ShellError::CantFindColumn {
col_name: column_name.item.to_string(),
span: Some(column_name.span),
@ -136,12 +137,12 @@ pub fn split(
}
};
data_split(values, Some(&block), span)
data_split(values, Some(&block), span, config)
}
Grouper::ByColumn(None) => {
let block = move |_, row: &Value| row.coerce_string();
let block = move |_, row: &Value| Ok(row.to_abbreviated_string(config));
data_split(values, Some(&block), span)
data_split(values, Some(&block), span, config)
}
}
}
@ -151,6 +152,7 @@ fn data_group(
values: &Value,
grouper: Option<&dyn Fn(usize, &Value) -> Result<String, ShellError>>,
span: Span,
config: &nu_protocol::Config,
) -> Result<Value, ShellError> {
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
@ -158,7 +160,7 @@ fn data_group(
let group_key = if let Some(ref grouper) = grouper {
grouper(idx, &value)
} else {
value.coerce_string()
Ok(value.to_abbreviated_string(config))
};
let group = groups.entry(group_key?).or_default();
@ -179,6 +181,7 @@ pub fn data_split(
value: PipelineData,
splitter: Option<&dyn Fn(usize, &Value) -> Result<String, ShellError>>,
dst_span: Span,
config: &nu_protocol::Config,
) -> Result<PipelineData, ShellError> {
let mut splits = indexmap::IndexMap::new();
@ -188,7 +191,7 @@ pub fn data_split(
match v {
Value::Record { val: grouped, .. } => {
for (outer_key, list) in grouped.into_owned() {
match data_group(&list, splitter, span) {
match data_group(&list, splitter, span, config) {
Ok(grouped_vals) => {
if let Value::Record { val: sub, .. } = grouped_vals {
for (inner_key, subset) in sub.into_owned() {

View File

@ -23,37 +23,33 @@ fn groups() {
#[test]
fn errors_if_given_unknown_column_name() {
let sample = r#"
{
"nu": {
"committers": [
{"name": "Andrés N. Robalino"},
{"name": "JT Turner"},
{"name": "Yehuda Katz"}
],
"releases": [
{"version": "0.2"}
{"version": "0.8"},
{"version": "0.9999999"}
],
"0xATYKARNU": [
["Th", "e", " "],
["BIG", " ", "UnO"],
["punto", "cero"]
]
}
}
"#;
let sample = r#"{
"nu": {
"committers": [
{"name": "Andrés N. Robalino"},
{"name": "JT Turner"},
{"name": "Yehuda Katz"}
],
"releases": [
{"version": "0.2"}
{"version": "0.8"},
{"version": "0.9999999"}
],
"0xATYKARNU": [
["Th", "e", " "],
["BIG", " ", "UnO"],
["punto", "cero"]
]
}
}
"#;
let actual = nu!(pipeline(&format!(
r#"
'{sample}'
| from json
| group-by {{|| get nu.releases.version }}
"#
r#"'{sample}'
| from json
| group-by {{|| get nu.releases.missing_column }}"#
)));
assert!(actual.err.contains("can't convert list<string> to string"));
assert!(actual.err.contains("cannot find column"));
}
#[test]