Deprecate group in favor of chunks (#13377)

# Description
The name of the `group` command is a little unclear/ambiguous.
Everything I look at it, I think of `group-by`. I think `chunks` more
clearly conveys what the `group` command does. Namely, it divides the
input list into chunks of a certain size. For example,
[`slice::chunks`](https://doc.rust-lang.org/std/primitive.slice.html#method.chunks)
has the same name. So, this PR adds a new `chunks` command to replace
the now deprecated `group` command.

The `chunks` command is a refactored version of `group`. As such, there
is a small performance improvement:
```nushell
# $data is a very large list
> bench { $data | chunks 2 } --rounds 30 | get mean
474ms 921µs 190ns

# deprecation warning was disabled here for fairness
> bench { $data | group 2 } --rounds 30 | get mean
592ms 702µs 440ns



> bench { $data | chunks 200 } --rounds 30 | get mean
374ms 188µs 318ns

> bench { $data | group 200 } --rounds 30 | get mean
481ms 264µs 869ns 



> bench { $data | chunks 1 } --rounds 30 | get mean
642ms 574µs 42ns

> bench { $data | group 1 } --rounds 30 | get mean
981ms 602µs 513ns
```

# User-Facing Changes
- `group` command has been deprecated in favor of new `chunks` command.
- `chunks` errors when given a chunk size of `0` whereas `group` returns
chunks with one element.

# Tests + Formatting
Added tests for `chunks`, since `group` did not have any tests.

# After Submitting
Update book if necessary.
This commit is contained in:
Ian Manske 2024-07-16 03:49:00 +00:00 committed by GitHub
parent 3d1145e759
commit 0918050ac8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 269 additions and 4 deletions

View File

@ -31,6 +31,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState {
All,
Any,
Append,
Chunks,
Columns,
Compact,
Default,

View File

@ -0,0 +1,153 @@
use nu_engine::command_prelude::*;
use nu_protocol::ListStream;
#[derive(Clone)]
pub struct Chunks;
impl Command for Chunks {
fn name(&self) -> &str {
"chunks"
}
fn signature(&self) -> Signature {
Signature::build("chunks")
.input_output_types(vec![
(Type::table(), Type::list(Type::table())),
(Type::list(Type::Any), Type::list(Type::list(Type::Any))),
])
.required("chunk_size", SyntaxShape::Int, "The size of each chunk.")
.category(Category::Filters)
}
fn usage(&self) -> &str {
"Divide a list or table into chunks of `chunk_size`."
}
fn extra_usage(&self) -> &str {
"This command will error if `chunk_size` is negative or zero."
}
fn search_terms(&self) -> Vec<&str> {
vec!["batch", "group"]
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
example: "[1 2 3 4] | chunks 2",
description: "Chunk a list into pairs",
result: Some(Value::test_list(vec![
Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
Value::test_list(vec![Value::test_int(3), Value::test_int(4)]),
])),
},
Example {
example: "[[foo bar]; [0 1] [2 3] [4 5] [6 7] [8 9]] | chunks 3",
description: "Chunk the rows of a table into triplets",
result: Some(Value::test_list(vec![
Value::test_list(vec![
Value::test_record(record! {
"foo" => Value::test_int(0),
"bar" => Value::test_int(1),
}),
Value::test_record(record! {
"foo" => Value::test_int(2),
"bar" => Value::test_int(3),
}),
Value::test_record(record! {
"foo" => Value::test_int(4),
"bar" => Value::test_int(5),
}),
]),
Value::test_list(vec![
Value::test_record(record! {
"foo" => Value::test_int(6),
"bar" => Value::test_int(7),
}),
Value::test_record(record! {
"foo" => Value::test_int(8),
"bar" => Value::test_int(9),
}),
]),
])),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let head = call.head;
let chunk_size: Value = call.req(engine_state, stack, 0)?;
let size =
usize::try_from(chunk_size.as_int()?).map_err(|_| ShellError::NeedsPositiveValue {
span: chunk_size.span(),
})?;
if size == 0 {
return Err(ShellError::IncorrectValue {
msg: "`chunk_size` cannot be zero".into(),
val_span: chunk_size.span(),
call_span: head,
});
}
match input {
PipelineData::Value(Value::List { vals, .. }, metadata) => {
let chunks = ChunksIter::new(vals, size, head);
let stream = ListStream::new(chunks, head, engine_state.signals().clone());
Ok(PipelineData::ListStream(stream, metadata))
}
PipelineData::ListStream(stream, metadata) => {
let stream = stream.modify(|iter| ChunksIter::new(iter, size, head));
Ok(PipelineData::ListStream(stream, metadata))
}
input => Err(input.unsupported_input_error("list", head)),
}
}
}
struct ChunksIter<I: Iterator<Item = Value>> {
iter: I,
size: usize,
span: Span,
}
impl<I: Iterator<Item = Value>> ChunksIter<I> {
fn new(iter: impl IntoIterator<IntoIter = I>, size: usize, span: Span) -> Self {
Self {
iter: iter.into_iter(),
size,
span,
}
}
}
impl<I: Iterator<Item = Value>> Iterator for ChunksIter<I> {
type Item = Value;
fn next(&mut self) -> Option<Self::Item> {
let first = self.iter.next()?;
let mut chunk = Vec::with_capacity(self.size); // delay allocation to optimize for empty iter
chunk.push(first);
chunk.extend((&mut self.iter).take(self.size - 1));
Some(Value::list(chunk, self.span))
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(Chunks {})
}
}

View File

@ -1,5 +1,5 @@
use nu_engine::command_prelude::*;
use nu_protocol::ValueIterator;
use nu_protocol::{report_warning_new, ParseWarning, ValueIterator};
#[derive(Clone)]
pub struct Group;
@ -54,6 +54,17 @@ impl Command for Group {
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let head = call.head;
report_warning_new(
engine_state,
&ParseWarning::DeprecatedWarning {
old_command: "group".into(),
new_suggestion: "the new `chunks` command".into(),
span: head,
url: "`help chunks`".into(),
},
);
let group_size: Spanned<usize> = call.req(engine_state, stack, 0)?;
let metadata = input.metadata();

View File

@ -1,6 +1,7 @@
mod all;
mod any;
mod append;
mod chunks;
mod columns;
mod compact;
mod default;
@ -58,6 +59,7 @@ mod zip;
pub use all::All;
pub use any::Any;
pub use append::Append;
pub use chunks::Chunks;
pub use columns::Columns;
pub use compact::Compact;
pub use default::Default;

View File

@ -0,0 +1,43 @@
use nu_test_support::nu;
#[test]
fn chunk_size_negative() {
let actual = nu!("[0 1 2] | chunks -1");
assert!(actual.err.contains("positive"));
}
#[test]
fn chunk_size_zero() {
let actual = nu!("[0 1 2] | chunks 0");
assert!(actual.err.contains("zero"));
}
#[test]
fn chunk_size_not_int() {
let actual = nu!("[0 1 2] | chunks (if true { 1sec })");
assert!(actual.err.contains("can't convert"));
}
#[test]
fn empty() {
let actual = nu!("[] | chunks 2 | is-empty");
assert_eq!(actual.out, "true");
}
#[test]
fn list_stream() {
let actual = nu!("([0 1 2] | every 1 | chunks 2) == ([0 1 2] | chunks 2)");
assert_eq!(actual.out, "true");
}
#[test]
fn table_stream() {
let actual = nu!("([[foo bar]; [0 1] [2 3] [4 5]] | every 1 | chunks 2) == ([[foo bar]; [0 1] [2 3] [4 5]] | chunks 2)");
assert_eq!(actual.out, "true");
}
#[test]
fn no_empty_chunks() {
let actual = nu!("([0 1 2 3 4 5] | chunks 3 | length) == 2");
assert_eq!(actual.out, "true");
}

View File

@ -7,6 +7,7 @@ mod break_;
mod bytes;
mod cal;
mod cd;
mod chunks;
mod compact;
mod complete;
mod config_env_default;

View File

@ -47,6 +47,26 @@ pub fn report_error_new(
report_error(&working_set, error);
}
pub fn report_warning(
working_set: &StateWorkingSet,
error: &(dyn miette::Diagnostic + Send + Sync + 'static),
) {
eprintln!("Warning: {:?}", CliError(error, working_set));
// reset vt processing, aka ansi because illbehaved externals can break it
#[cfg(windows)]
{
let _ = nu_utils::enable_vt_processing();
}
}
pub fn report_warning_new(
engine_state: &EngineState,
error: &(dyn miette::Diagnostic + Send + Sync + 'static),
) {
let working_set = StateWorkingSet::new(engine_state);
report_error(&working_set, error);
}
impl std::fmt::Debug for CliError<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let config = self.1.get_config();

View File

@ -5,7 +5,9 @@ mod parse_error;
mod parse_warning;
mod shell_error;
pub use cli_error::{format_error, report_error, report_error_new};
pub use cli_error::{
format_error, report_error, report_error_new, report_warning, report_warning_new,
};
pub use compile_error::CompileError;
pub use labeled_error::{ErrorLabel, LabeledError};
pub use parse_error::{DidYouMean, ParseError};

View File

@ -6,11 +6,11 @@ use thiserror::Error;
#[derive(Clone, Debug, Error, Diagnostic, Serialize, Deserialize)]
pub enum ParseWarning {
#[error("Deprecated: {old_command}")]
#[diagnostic(help("for more info: {url}"))]
#[diagnostic(help("for more info see {url}"))]
DeprecatedWarning {
old_command: String,
new_suggestion: String,
#[label("`{old_command}` is deprecated and will be removed in a future release. Please {new_suggestion} instead")]
#[label("`{old_command}` is deprecated and will be removed in a future release. Please {new_suggestion} instead.")]
span: Span,
url: String,
},

View File

@ -635,6 +635,34 @@ impl PipelineData {
Ok(None)
}
}
pub fn unsupported_input_error(
self,
expected_type: impl Into<String>,
span: Span,
) -> ShellError {
match self {
PipelineData::Empty => ShellError::PipelineEmpty { dst_span: span },
PipelineData::Value(value, ..) => ShellError::OnlySupportsThisInputType {
exp_input_type: expected_type.into(),
wrong_type: value.get_type().get_non_specified_string(),
dst_span: span,
src_span: value.span(),
},
PipelineData::ListStream(stream, ..) => ShellError::OnlySupportsThisInputType {
exp_input_type: expected_type.into(),
wrong_type: "list (stream)".into(),
dst_span: span,
src_span: stream.span(),
},
PipelineData::ByteStream(stream, ..) => ShellError::OnlySupportsThisInputType {
exp_input_type: expected_type.into(),
wrong_type: stream.type_().describe().into(),
dst_span: span,
src_span: stream.span(),
},
}
}
}
enum PipelineIteratorInner {

View File

@ -36,6 +36,10 @@ pub enum Type {
}
impl Type {
pub fn list(inner: Type) -> Self {
Self::List(Box::new(inner))
}
pub fn record() -> Self {
Self::Record([].into())
}