diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 9903eed774..fcca72b2f2 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -64,6 +64,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState { Length, Lines, ParEach, + ChunkBy, Prepend, Range, Reduce, diff --git a/crates/nu-command/src/filters/chunk_by.rs b/crates/nu-command/src/filters/chunk_by.rs new file mode 100644 index 0000000000..18a5110918 --- /dev/null +++ b/crates/nu-command/src/filters/chunk_by.rs @@ -0,0 +1,256 @@ +use super::utils::chain_error_with_input; +use nu_engine::{command_prelude::*, ClosureEval}; +use nu_protocol::engine::Closure; +use nu_protocol::Signals; + +#[derive(Clone)] +pub struct ChunkBy; + +impl Command for ChunkBy { + fn name(&self) -> &str { + "chunk-by" + } + + fn signature(&self) -> Signature { + Signature::build("chunk-by") + .input_output_types(vec![ + ( + Type::List(Box::new(Type::Any)), + Type::list(Type::list(Type::Any)), + ), + (Type::Range, Type::list(Type::list(Type::Any))), + ]) + .required( + "closure", + SyntaxShape::Closure(Some(vec![SyntaxShape::Any])), + "The closure to run.", + ) + .category(Category::Filters) + } + + fn description(&self) -> &str { + r#"Divides a sequence into sub-sequences based on a closure."# + } + + fn extra_description(&self) -> &str { + r#"chunk-by applies the given closure to each value of the input list, and groups +consecutive elements that share the same closure result value into lists."# + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + chunk_by(engine_state, stack, call, input) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Chunk data into runs of larger than zero or not.", + example: "[1, 3, -2, -2, 0, 1, 2] | chunk-by {|it| $it >= 0 }", + result: Some(Value::test_list(vec![ + Value::test_list(vec![Value::test_int(1), Value::test_int(3)]), + Value::test_list(vec![Value::test_int(-2), Value::test_int(-2)]), + Value::test_list(vec![ + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + ]), + ])), + }, + Example { + description: "Identify repetitions in a string", + example: r#"[a b b c c c] | chunk-by { |it| $it }"#, + result: Some(Value::test_list(vec![ + Value::test_list(vec![Value::test_string("a")]), + Value::test_list(vec![Value::test_string("b"), Value::test_string("b")]), + Value::test_list(vec![ + Value::test_string("c"), + Value::test_string("c"), + Value::test_string("c"), + ]), + ])), + }, + Example { + description: "Chunk values of range by predicate", + example: r#"(0..8) | chunk-by { |it| $it // 3 }"#, + result: Some(Value::test_list(vec![ + Value::test_list(vec![ + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + ]), + Value::test_list(vec![ + Value::test_int(3), + Value::test_int(4), + Value::test_int(5), + ]), + Value::test_list(vec![ + Value::test_int(6), + Value::test_int(7), + Value::test_int(8), + ]), + ])), + }, + ] + } +} + +struct Chunk { + iterator: I, + last_value: Option<(T, K)>, + closure: F, + done: bool, + signals: Signals, +} + +impl Chunk +where + I: Iterator, + F: FnMut(&T) -> K, + K: PartialEq, +{ + fn inner_iterator_next(&mut self) -> Option { + if self.signals.interrupted() { + self.done = true; + return None; + } + self.iterator.next() + } +} + +impl Iterator for Chunk +where + I: Iterator, + F: FnMut(&T) -> K, + K: PartialEq, +{ + type Item = Vec; + + fn next(&mut self) -> Option { + if self.done { + return None; + } + + let (head, head_key) = match self.last_value.take() { + None => { + let head = self.inner_iterator_next()?; + + let key = (self.closure)(&head); + + (head, key) + } + + Some((value, key)) => (value, key), + }; + + let mut result = vec![head]; + + loop { + match self.inner_iterator_next() { + None => { + self.done = true; + return Some(result); + } + Some(value) => { + let value_key = (self.closure)(&value); + + if value_key == head_key { + result.push(value); + } else { + self.last_value = Some((value, value_key)); + return Some(result); + } + } + } + } + } +} + +/// An iterator with the semantics of the chunk_by operation. +fn chunk_iter_by(iterator: I, signals: Signals, closure: F) -> Chunk +where + I: Iterator, + F: FnMut(&T) -> K, + K: PartialEq, +{ + Chunk { + closure, + iterator, + last_value: None, + done: false, + signals, + } +} + +pub fn chunk_by( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let head = call.head; + let closure: Closure = call.req(engine_state, stack, 0)?; + + let metadata = input.metadata(); + + match input { + PipelineData::Empty => Ok(PipelineData::Empty), + PipelineData::Value(Value::Range { .. }, ..) + | PipelineData::Value(Value::List { .. }, ..) + | PipelineData::ListStream(..) => { + let closure = ClosureEval::new(engine_state, stack, closure); + + let result = chunk_value_stream( + input.into_iter(), + closure, + head, + engine_state.signals().clone(), + ); + + Ok(result.into_pipeline_data(head, engine_state.signals().clone())) + } + + PipelineData::ByteStream(..) | PipelineData::Value(..) => { + Err(input.unsupported_input_error("list", head)) + } + } + .map(|data| data.set_metadata(metadata)) +} + +fn chunk_value_stream( + iterator: I, + mut closure: ClosureEval, + head: Span, + signals: Signals, +) -> impl Iterator + 'static + Send +where + I: Iterator + 'static + Send, +{ + chunk_iter_by(iterator, signals, move |value| { + match closure.run_with_value(value.clone()) { + Ok(data) => data.into_value(head).unwrap_or_else(|error| { + Value::error(chain_error_with_input(error, value.is_error(), head), head) + }), + + Err(error) => Value::error(chain_error_with_input(error, value.is_error(), head), head), + } + }) + .map(move |it| Value::list(it, head)) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(ChunkBy {}) + } +} diff --git a/crates/nu-command/src/filters/mod.rs b/crates/nu-command/src/filters/mod.rs index 8b56bf7a87..f5e5118390 100644 --- a/crates/nu-command/src/filters/mod.rs +++ b/crates/nu-command/src/filters/mod.rs @@ -1,6 +1,7 @@ mod all; mod any; mod append; +mod chunk_by; mod chunks; mod columns; mod compact; @@ -58,6 +59,7 @@ mod zip; pub use all::All; pub use any::Any; pub use append::Append; +pub use chunk_by::ChunkBy; pub use chunks::Chunks; pub use columns::Columns; pub use compact::Compact; diff --git a/crates/nu-command/tests/commands/chunk_by.rs b/crates/nu-command/tests/commands/chunk_by.rs new file mode 100644 index 0000000000..1a5a1ed043 --- /dev/null +++ b/crates/nu-command/tests/commands/chunk_by.rs @@ -0,0 +1,58 @@ +use nu_test_support::{nu, pipeline}; + +#[test] +fn chunk_by_on_empty_input_returns_empty_list() { + let actual = nu!("[] | chunk-by {|it| $it} | to nuon"); + assert!(actual.err.is_empty()); + assert_eq!(actual.out, "[]"); +} + +#[test] +fn chunk_by_strings_works() { + let sample = r#" + [a a a b b b c c c a a a] + "#; + + let actual = nu!(pipeline(&format!( + r#" + {sample} + | chunk-by {{|it| $it}} + | to nuon + "# + ))); + + assert_eq!(actual.out, "[[a, a, a], [b, b, b], [c, c, c], [a, a, a]]"); +} + +#[test] +fn chunk_by_field_works() { + let sample = r#"[ + { + name: bob, + age: 20, + cool: false + }, + { + name: jane, + age: 30, + cool: false + }, + { + name: marie, + age: 19, + cool: true + }, + { + name: carl, + age: 36, + cool: true + } ]"#; + + let actual = nu!(pipeline(&format!( + r#"{sample} + | chunk-by {{|it| $it.cool}} + | length"# + ))); + + assert_eq!(actual.out, "2"); +} diff --git a/crates/nu-command/tests/commands/mod.rs b/crates/nu-command/tests/commands/mod.rs index 678b8d8896..a8ffdeb917 100644 --- a/crates/nu-command/tests/commands/mod.rs +++ b/crates/nu-command/tests/commands/mod.rs @@ -8,6 +8,7 @@ mod break_; mod bytes; mod cal; mod cd; +mod chunk_by; mod chunks; mod compact; mod complete;