Deprecate group in favor of chunks (#13377)

# Description
The name of the `group` command is a little unclear/ambiguous.
Everything I look at it, I think of `group-by`. I think `chunks` more
clearly conveys what the `group` command does. Namely, it divides the
input list into chunks of a certain size. For example,
[`slice::chunks`](https://doc.rust-lang.org/std/primitive.slice.html#method.chunks)
has the same name. So, this PR adds a new `chunks` command to replace
the now deprecated `group` command.

The `chunks` command is a refactored version of `group`. As such, there
is a small performance improvement:
```nushell
# $data is a very large list
> bench { $data | chunks 2 } --rounds 30 | get mean
474ms 921µs 190ns

# deprecation warning was disabled here for fairness
> bench { $data | group 2 } --rounds 30 | get mean
592ms 702µs 440ns



> bench { $data | chunks 200 } --rounds 30 | get mean
374ms 188µs 318ns

> bench { $data | group 200 } --rounds 30 | get mean
481ms 264µs 869ns 



> bench { $data | chunks 1 } --rounds 30 | get mean
642ms 574µs 42ns

> bench { $data | group 1 } --rounds 30 | get mean
981ms 602µs 513ns
```

# User-Facing Changes
- `group` command has been deprecated in favor of new `chunks` command.
- `chunks` errors when given a chunk size of `0` whereas `group` returns
chunks with one element.

# Tests + Formatting
Added tests for `chunks`, since `group` did not have any tests.

# After Submitting
Update book if necessary.
This commit is contained in:
Ian Manske
2024-07-16 03:49:00 +00:00
committed by GitHub
parent 3d1145e759
commit 0918050ac8
11 changed files with 269 additions and 4 deletions

View File

@ -31,6 +31,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState {
All,
Any,
Append,
Chunks,
Columns,
Compact,
Default,

View File

@ -0,0 +1,153 @@
use nu_engine::command_prelude::*;
use nu_protocol::ListStream;
#[derive(Clone)]
pub struct Chunks;
impl Command for Chunks {
fn name(&self) -> &str {
"chunks"
}
fn signature(&self) -> Signature {
Signature::build("chunks")
.input_output_types(vec![
(Type::table(), Type::list(Type::table())),
(Type::list(Type::Any), Type::list(Type::list(Type::Any))),
])
.required("chunk_size", SyntaxShape::Int, "The size of each chunk.")
.category(Category::Filters)
}
fn usage(&self) -> &str {
"Divide a list or table into chunks of `chunk_size`."
}
fn extra_usage(&self) -> &str {
"This command will error if `chunk_size` is negative or zero."
}
fn search_terms(&self) -> Vec<&str> {
vec!["batch", "group"]
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
example: "[1 2 3 4] | chunks 2",
description: "Chunk a list into pairs",
result: Some(Value::test_list(vec![
Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
Value::test_list(vec![Value::test_int(3), Value::test_int(4)]),
])),
},
Example {
example: "[[foo bar]; [0 1] [2 3] [4 5] [6 7] [8 9]] | chunks 3",
description: "Chunk the rows of a table into triplets",
result: Some(Value::test_list(vec![
Value::test_list(vec![
Value::test_record(record! {
"foo" => Value::test_int(0),
"bar" => Value::test_int(1),
}),
Value::test_record(record! {
"foo" => Value::test_int(2),
"bar" => Value::test_int(3),
}),
Value::test_record(record! {
"foo" => Value::test_int(4),
"bar" => Value::test_int(5),
}),
]),
Value::test_list(vec![
Value::test_record(record! {
"foo" => Value::test_int(6),
"bar" => Value::test_int(7),
}),
Value::test_record(record! {
"foo" => Value::test_int(8),
"bar" => Value::test_int(9),
}),
]),
])),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let head = call.head;
let chunk_size: Value = call.req(engine_state, stack, 0)?;
let size =
usize::try_from(chunk_size.as_int()?).map_err(|_| ShellError::NeedsPositiveValue {
span: chunk_size.span(),
})?;
if size == 0 {
return Err(ShellError::IncorrectValue {
msg: "`chunk_size` cannot be zero".into(),
val_span: chunk_size.span(),
call_span: head,
});
}
match input {
PipelineData::Value(Value::List { vals, .. }, metadata) => {
let chunks = ChunksIter::new(vals, size, head);
let stream = ListStream::new(chunks, head, engine_state.signals().clone());
Ok(PipelineData::ListStream(stream, metadata))
}
PipelineData::ListStream(stream, metadata) => {
let stream = stream.modify(|iter| ChunksIter::new(iter, size, head));
Ok(PipelineData::ListStream(stream, metadata))
}
input => Err(input.unsupported_input_error("list", head)),
}
}
}
struct ChunksIter<I: Iterator<Item = Value>> {
iter: I,
size: usize,
span: Span,
}
impl<I: Iterator<Item = Value>> ChunksIter<I> {
fn new(iter: impl IntoIterator<IntoIter = I>, size: usize, span: Span) -> Self {
Self {
iter: iter.into_iter(),
size,
span,
}
}
}
impl<I: Iterator<Item = Value>> Iterator for ChunksIter<I> {
type Item = Value;
fn next(&mut self) -> Option<Self::Item> {
let first = self.iter.next()?;
let mut chunk = Vec::with_capacity(self.size); // delay allocation to optimize for empty iter
chunk.push(first);
chunk.extend((&mut self.iter).take(self.size - 1));
Some(Value::list(chunk, self.span))
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(Chunks {})
}
}

View File

@ -1,5 +1,5 @@
use nu_engine::command_prelude::*;
use nu_protocol::ValueIterator;
use nu_protocol::{report_warning_new, ParseWarning, ValueIterator};
#[derive(Clone)]
pub struct Group;
@ -54,6 +54,17 @@ impl Command for Group {
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let head = call.head;
report_warning_new(
engine_state,
&ParseWarning::DeprecatedWarning {
old_command: "group".into(),
new_suggestion: "the new `chunks` command".into(),
span: head,
url: "`help chunks`".into(),
},
);
let group_size: Spanned<usize> = call.req(engine_state, stack, 0)?;
let metadata = input.metadata();

View File

@ -1,6 +1,7 @@
mod all;
mod any;
mod append;
mod chunks;
mod columns;
mod compact;
mod default;
@ -58,6 +59,7 @@ mod zip;
pub use all::All;
pub use any::Any;
pub use append::Append;
pub use chunks::Chunks;
pub use columns::Columns;
pub use compact::Compact;
pub use default::Default;