mirror of
https://github.com/nushell/nushell.git
synced 2025-05-02 00:54:25 +02:00
Implement chunk_by operation (#14410)
# Description This pull requests implements a new ~~partition-by~~ `chunk-by` command. The operation takes a closure and partitions the input list into sublists based on the return value of the closure. - fixes #14149 Examples, tests and and documentation were added accordingly.  
This commit is contained in:
parent
7f61cbbfd6
commit
dc9e8161d9
@ -64,6 +64,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState {
|
|||||||
Length,
|
Length,
|
||||||
Lines,
|
Lines,
|
||||||
ParEach,
|
ParEach,
|
||||||
|
ChunkBy,
|
||||||
Prepend,
|
Prepend,
|
||||||
Range,
|
Range,
|
||||||
Reduce,
|
Reduce,
|
||||||
|
256
crates/nu-command/src/filters/chunk_by.rs
Normal file
256
crates/nu-command/src/filters/chunk_by.rs
Normal file
@ -0,0 +1,256 @@
|
|||||||
|
use super::utils::chain_error_with_input;
|
||||||
|
use nu_engine::{command_prelude::*, ClosureEval};
|
||||||
|
use nu_protocol::engine::Closure;
|
||||||
|
use nu_protocol::Signals;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct ChunkBy;
|
||||||
|
|
||||||
|
impl Command for ChunkBy {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"chunk-by"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("chunk-by")
|
||||||
|
.input_output_types(vec![
|
||||||
|
(
|
||||||
|
Type::List(Box::new(Type::Any)),
|
||||||
|
Type::list(Type::list(Type::Any)),
|
||||||
|
),
|
||||||
|
(Type::Range, Type::list(Type::list(Type::Any))),
|
||||||
|
])
|
||||||
|
.required(
|
||||||
|
"closure",
|
||||||
|
SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
|
||||||
|
"The closure to run.",
|
||||||
|
)
|
||||||
|
.category(Category::Filters)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> &str {
|
||||||
|
r#"Divides a sequence into sub-sequences based on a closure."#
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extra_description(&self) -> &str {
|
||||||
|
r#"chunk-by applies the given closure to each value of the input list, and groups
|
||||||
|
consecutive elements that share the same closure result value into lists."#
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(
|
||||||
|
&self,
|
||||||
|
engine_state: &EngineState,
|
||||||
|
stack: &mut Stack,
|
||||||
|
call: &Call,
|
||||||
|
input: PipelineData,
|
||||||
|
) -> Result<PipelineData, ShellError> {
|
||||||
|
chunk_by(engine_state, stack, call, input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![
|
||||||
|
Example {
|
||||||
|
description: "Chunk data into runs of larger than zero or not.",
|
||||||
|
example: "[1, 3, -2, -2, 0, 1, 2] | chunk-by {|it| $it >= 0 }",
|
||||||
|
result: Some(Value::test_list(vec![
|
||||||
|
Value::test_list(vec![Value::test_int(1), Value::test_int(3)]),
|
||||||
|
Value::test_list(vec![Value::test_int(-2), Value::test_int(-2)]),
|
||||||
|
Value::test_list(vec![
|
||||||
|
Value::test_int(0),
|
||||||
|
Value::test_int(1),
|
||||||
|
Value::test_int(2),
|
||||||
|
]),
|
||||||
|
])),
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "Identify repetitions in a string",
|
||||||
|
example: r#"[a b b c c c] | chunk-by { |it| $it }"#,
|
||||||
|
result: Some(Value::test_list(vec![
|
||||||
|
Value::test_list(vec![Value::test_string("a")]),
|
||||||
|
Value::test_list(vec![Value::test_string("b"), Value::test_string("b")]),
|
||||||
|
Value::test_list(vec![
|
||||||
|
Value::test_string("c"),
|
||||||
|
Value::test_string("c"),
|
||||||
|
Value::test_string("c"),
|
||||||
|
]),
|
||||||
|
])),
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "Chunk values of range by predicate",
|
||||||
|
example: r#"(0..8) | chunk-by { |it| $it // 3 }"#,
|
||||||
|
result: Some(Value::test_list(vec![
|
||||||
|
Value::test_list(vec![
|
||||||
|
Value::test_int(0),
|
||||||
|
Value::test_int(1),
|
||||||
|
Value::test_int(2),
|
||||||
|
]),
|
||||||
|
Value::test_list(vec![
|
||||||
|
Value::test_int(3),
|
||||||
|
Value::test_int(4),
|
||||||
|
Value::test_int(5),
|
||||||
|
]),
|
||||||
|
Value::test_list(vec![
|
||||||
|
Value::test_int(6),
|
||||||
|
Value::test_int(7),
|
||||||
|
Value::test_int(8),
|
||||||
|
]),
|
||||||
|
])),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Chunk<I, T, F, K> {
|
||||||
|
iterator: I,
|
||||||
|
last_value: Option<(T, K)>,
|
||||||
|
closure: F,
|
||||||
|
done: bool,
|
||||||
|
signals: Signals,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I, T, F, K> Chunk<I, T, F, K>
|
||||||
|
where
|
||||||
|
I: Iterator<Item = T>,
|
||||||
|
F: FnMut(&T) -> K,
|
||||||
|
K: PartialEq,
|
||||||
|
{
|
||||||
|
fn inner_iterator_next(&mut self) -> Option<I::Item> {
|
||||||
|
if self.signals.interrupted() {
|
||||||
|
self.done = true;
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
self.iterator.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I, T, F, K> Iterator for Chunk<I, T, F, K>
|
||||||
|
where
|
||||||
|
I: Iterator<Item = T>,
|
||||||
|
F: FnMut(&T) -> K,
|
||||||
|
K: PartialEq,
|
||||||
|
{
|
||||||
|
type Item = Vec<T>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.done {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let (head, head_key) = match self.last_value.take() {
|
||||||
|
None => {
|
||||||
|
let head = self.inner_iterator_next()?;
|
||||||
|
|
||||||
|
let key = (self.closure)(&head);
|
||||||
|
|
||||||
|
(head, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
Some((value, key)) => (value, key),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut result = vec![head];
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match self.inner_iterator_next() {
|
||||||
|
None => {
|
||||||
|
self.done = true;
|
||||||
|
return Some(result);
|
||||||
|
}
|
||||||
|
Some(value) => {
|
||||||
|
let value_key = (self.closure)(&value);
|
||||||
|
|
||||||
|
if value_key == head_key {
|
||||||
|
result.push(value);
|
||||||
|
} else {
|
||||||
|
self.last_value = Some((value, value_key));
|
||||||
|
return Some(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator with the semantics of the chunk_by operation.
|
||||||
|
fn chunk_iter_by<I, T, F, K>(iterator: I, signals: Signals, closure: F) -> Chunk<I, T, F, K>
|
||||||
|
where
|
||||||
|
I: Iterator<Item = T>,
|
||||||
|
F: FnMut(&T) -> K,
|
||||||
|
K: PartialEq,
|
||||||
|
{
|
||||||
|
Chunk {
|
||||||
|
closure,
|
||||||
|
iterator,
|
||||||
|
last_value: None,
|
||||||
|
done: false,
|
||||||
|
signals,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn chunk_by(
|
||||||
|
engine_state: &EngineState,
|
||||||
|
stack: &mut Stack,
|
||||||
|
call: &Call,
|
||||||
|
input: PipelineData,
|
||||||
|
) -> Result<PipelineData, ShellError> {
|
||||||
|
let head = call.head;
|
||||||
|
let closure: Closure = call.req(engine_state, stack, 0)?;
|
||||||
|
|
||||||
|
let metadata = input.metadata();
|
||||||
|
|
||||||
|
match input {
|
||||||
|
PipelineData::Empty => Ok(PipelineData::Empty),
|
||||||
|
PipelineData::Value(Value::Range { .. }, ..)
|
||||||
|
| PipelineData::Value(Value::List { .. }, ..)
|
||||||
|
| PipelineData::ListStream(..) => {
|
||||||
|
let closure = ClosureEval::new(engine_state, stack, closure);
|
||||||
|
|
||||||
|
let result = chunk_value_stream(
|
||||||
|
input.into_iter(),
|
||||||
|
closure,
|
||||||
|
head,
|
||||||
|
engine_state.signals().clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(result.into_pipeline_data(head, engine_state.signals().clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineData::ByteStream(..) | PipelineData::Value(..) => {
|
||||||
|
Err(input.unsupported_input_error("list", head))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.map(|data| data.set_metadata(metadata))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn chunk_value_stream<I>(
|
||||||
|
iterator: I,
|
||||||
|
mut closure: ClosureEval,
|
||||||
|
head: Span,
|
||||||
|
signals: Signals,
|
||||||
|
) -> impl Iterator<Item = Value> + 'static + Send
|
||||||
|
where
|
||||||
|
I: Iterator<Item = Value> + 'static + Send,
|
||||||
|
{
|
||||||
|
chunk_iter_by(iterator, signals, move |value| {
|
||||||
|
match closure.run_with_value(value.clone()) {
|
||||||
|
Ok(data) => data.into_value(head).unwrap_or_else(|error| {
|
||||||
|
Value::error(chain_error_with_input(error, value.is_error(), head), head)
|
||||||
|
}),
|
||||||
|
|
||||||
|
Err(error) => Value::error(chain_error_with_input(error, value.is_error(), head), head),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map(move |it| Value::list(it, head))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_examples() {
|
||||||
|
use crate::test_examples;
|
||||||
|
|
||||||
|
test_examples(ChunkBy {})
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
mod all;
|
mod all;
|
||||||
mod any;
|
mod any;
|
||||||
mod append;
|
mod append;
|
||||||
|
mod chunk_by;
|
||||||
mod chunks;
|
mod chunks;
|
||||||
mod columns;
|
mod columns;
|
||||||
mod compact;
|
mod compact;
|
||||||
@ -58,6 +59,7 @@ mod zip;
|
|||||||
pub use all::All;
|
pub use all::All;
|
||||||
pub use any::Any;
|
pub use any::Any;
|
||||||
pub use append::Append;
|
pub use append::Append;
|
||||||
|
pub use chunk_by::ChunkBy;
|
||||||
pub use chunks::Chunks;
|
pub use chunks::Chunks;
|
||||||
pub use columns::Columns;
|
pub use columns::Columns;
|
||||||
pub use compact::Compact;
|
pub use compact::Compact;
|
||||||
|
58
crates/nu-command/tests/commands/chunk_by.rs
Normal file
58
crates/nu-command/tests/commands/chunk_by.rs
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
use nu_test_support::{nu, pipeline};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn chunk_by_on_empty_input_returns_empty_list() {
|
||||||
|
let actual = nu!("[] | chunk-by {|it| $it} | to nuon");
|
||||||
|
assert!(actual.err.is_empty());
|
||||||
|
assert_eq!(actual.out, "[]");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn chunk_by_strings_works() {
|
||||||
|
let sample = r#"
|
||||||
|
[a a a b b b c c c a a a]
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let actual = nu!(pipeline(&format!(
|
||||||
|
r#"
|
||||||
|
{sample}
|
||||||
|
| chunk-by {{|it| $it}}
|
||||||
|
| to nuon
|
||||||
|
"#
|
||||||
|
)));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "[[a, a, a], [b, b, b], [c, c, c], [a, a, a]]");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn chunk_by_field_works() {
|
||||||
|
let sample = r#"[
|
||||||
|
{
|
||||||
|
name: bob,
|
||||||
|
age: 20,
|
||||||
|
cool: false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: jane,
|
||||||
|
age: 30,
|
||||||
|
cool: false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: marie,
|
||||||
|
age: 19,
|
||||||
|
cool: true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: carl,
|
||||||
|
age: 36,
|
||||||
|
cool: true
|
||||||
|
} ]"#;
|
||||||
|
|
||||||
|
let actual = nu!(pipeline(&format!(
|
||||||
|
r#"{sample}
|
||||||
|
| chunk-by {{|it| $it.cool}}
|
||||||
|
| length"#
|
||||||
|
)));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "2");
|
||||||
|
}
|
@ -8,6 +8,7 @@ mod break_;
|
|||||||
mod bytes;
|
mod bytes;
|
||||||
mod cal;
|
mod cal;
|
||||||
mod cd;
|
mod cd;
|
||||||
|
mod chunk_by;
|
||||||
mod chunks;
|
mod chunks;
|
||||||
mod compact;
|
mod compact;
|
||||||
mod complete;
|
mod complete;
|
||||||
|
Loading…
Reference in New Issue
Block a user