feat: Add unfold command (#10489)

# Description  > [!NOTE] > This PR description originally used examples where the `generator` closure returned a list. It has since been updated to use records instead. The `unfold` command allows users to dynamically generate streams of data. The stream is generated by repeatedly invoking a `generator` closure. The `generator` closure accepts a single argument and returns a record containing two optional keys: 'out' and 'next'. Each invocation, the 'out' value, if present, is added to the stream. If a 'next' key is present, it is used as the next argument to the closure, otherwise generation stops. The name "unfold" is borrowed from other functional-programming languages. Whereas `fold` (or `reduce`) takes a stream of values and outputs a single value, `unfold` takes a single value and outputs a stream of values. ### Examples A common example of using `unfold` is to generate a fibbonacci sequence. See [here](6ffdac103c/src/sources.rs (L65)) for an example of this in rust's `itertools`. ```nushell > unfold [0, 1] {|fib| {out: $fib.0, next: [$fib.1, ($fib.0 + $fib.1)]} } | first 10 ───┬──── 0 │ 0 1 │ 1 2 │ 1 3 │ 2 4 │ 3 5 │ 5 6 │ 8 7 │ 13 8 │ 21 9 │ 34 ───┴──── ``` This command is particularly useful when consuming paginated APIs, like Github's. Previously, nushell users might use a loop and buffer responses into a list, before returning all responses at once. However, this behavior is not desirable if the result result is very large. Using `unfold` avoids buffering and allows subsequent pipeline stages to use the data concurrently, as it's being fetched. #### Before ```nushell mut pages = [] for page in 1.. { let resp = http get ( { scheme: https, host: "api.github.com", path: "/repos/nushell/nushell/issues", params: { page: $page, per_page: $PAGE_SIZE } } | url join) $pages = ($pages | append $resp) if ($resp | length) < $PAGE_SIZE { break } } $pages ``` #### After ```nu unfold 1 {|page| let resp = http get ( { scheme: https, host: "api.github.com", path: "/repos/nushell/nushell/issues", params: { page: $page, per_page: $PAGE_SIZE } } | url join) if ($resp | length) < $PAGE_SIZE { {out: $resp} } else { {out: $resp, next: ($page + 1)} } } ``` # User-Facing Changes  - An `unfold` generator is added to the default context. # Tests + Formatting  # After Submitting  Given the complexity of the `generator` closure's return value, it would be good to document the semantics of `unfold` and provide some in-depth examples showcasing what it can accomplish.
2023-09-30 10:08:06 -04:00
parent 7eaa6d01ab
commit fa2e6e5d53
6 changed files with 341 additions and 3 deletions
--- a/crates/nu-command/src/default_context.rs
+++ b/crates/nu-command/src/default_context.rs
@ -382,6 +382,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState {
            Seq,
            SeqDate,
            SeqChar,
+            Unfold,
        };

        // Hash
--- a/crates/nu-command/src/example_test.rs
+++ b/crates/nu-command/src/example_test.rs
@ -9,9 +9,9 @@ pub fn test_examples(cmd: impl Command + 'static) {
 #[cfg(test)]
 mod test_examples {
    use super::super::{
-        Ansi, Date, Enumerate, Filter, Flatten, From, Get, Into, IntoDatetime, IntoString, Math,
-        MathRound, ParEach, Path, PathParse, Random, Sort, SortBy, Split, SplitColumn, SplitRow,
-        Str, StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap,
+        Ansi, Date, Enumerate, Filter, First, Flatten, From, Get, Into, IntoDatetime, IntoString,
+        Math, MathRound, ParEach, Path, PathParse, Random, Sort, SortBy, Split, SplitColumn,
+        SplitRow, Str, StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap,
    };
    use crate::{Each, To};
    use nu_cmd_lang::example_support::{
@ -71,6 +71,7 @@ mod test_examples {
            working_set.add_decl(Box::new(Echo));
            working_set.add_decl(Box::new(Enumerate));
            working_set.add_decl(Box::new(Filter));
+            working_set.add_decl(Box::new(First));
            working_set.add_decl(Box::new(Flatten));
            working_set.add_decl(Box::new(From));
            working_set.add_decl(Box::new(Get));
--- a/crates/nu-command/src/generators/mod.rs
+++ b/crates/nu-command/src/generators/mod.rs
@ -2,8 +2,10 @@ mod cal;
 mod seq;
 mod seq_char;
 mod seq_date;
+mod unfold;

 pub use cal::Cal;
 pub use seq::Seq;
 pub use seq_char::SeqChar;
 pub use seq_date::SeqDate;
+pub use unfold::Unfold;
--- a/crates/nu-command/src/generators/unfold.rs
+++ b/crates/nu-command/src/generators/unfold.rs
@ -0,0 +1,231 @@
+use itertools::unfold;
+
+use nu_engine::{eval_block_with_early_return, CallExt};
+use nu_protocol::ast::Call;
+use nu_protocol::engine::{Closure, Command, EngineState, Stack};
+use nu_protocol::{
+    Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData, ShellError,
+    Signature, Span, Spanned, SyntaxShape, Type, Value,
+};
+
+#[derive(Clone)]
+pub struct Unfold;
+
+impl Command for Unfold {
+    fn name(&self) -> &str {
+        "unfold"
+    }
+
+    fn signature(&self) -> Signature {
+        Signature::build("unfold")
+            .input_output_types(vec![
+                (Type::Nothing, Type::List(Box::new(Type::Any))),
+                (
+                    Type::List(Box::new(Type::Any)),
+                    Type::List(Box::new(Type::Any)),
+                ),
+            ])
+            .required("initial", SyntaxShape::Any, "initial value")
+            .required(
+                "closure",
+                SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
+                "generator function",
+            )
+            .allow_variants_without_examples(true)
+            .category(Category::Generators)
+    }
+
+    fn usage(&self) -> &str {
+        "Generate a list of values by successively invoking a closure."
+    }
+
+    fn extra_usage(&self) -> &str {
+        r#"The generator closure accepts a single argument and returns a record
+containing two optional keys: 'out' and 'next'. Each invocation, the 'out'
+value, if present, is added to the stream. If a 'next' key is present, it is
+used as the next argument to the closure, otherwise generation stops.
+"#
+    }
+
+    fn search_terms(&self) -> Vec<&str> {
+        vec!["generate", "stream"]
+    }
+
+    fn examples(&self) -> Vec<Example> {
+        vec![
+            Example {
+                example: "unfold 0 {|i| if $i <= 10 { {out: $i, next: ($i + 2)} }}",
+                description: "Generate a sequence of numbers",
+                result: Some(Value::list(
+                    vec![
+                        Value::test_int(0),
+                        Value::test_int(2),
+                        Value::test_int(4),
+                        Value::test_int(6),
+                        Value::test_int(8),
+                        Value::test_int(10),
+                    ],
+                    Span::test_data(),
+                )),
+            },
+            Example {
+                example: "unfold [0, 1] {|fib| {out: $fib.0, next: [$fib.1, ($fib.0 + $fib.1)]} } | first 10",
+                description: "Generate a stream of fibonacci numbers",
+                result: Some(Value::list(
+                    vec![
+                        Value::test_int(0),
+                        Value::test_int(1),
+                        Value::test_int(1),
+                        Value::test_int(2),
+                        Value::test_int(3),
+                        Value::test_int(5),
+                        Value::test_int(8),
+                        Value::test_int(13),
+                        Value::test_int(21),
+                        Value::test_int(34),
+                    ],
+                    Span::test_data(),
+                )),
+            },
+        ]
+    }
+
+    fn run(
+        &self,
+        engine_state: &EngineState,
+        stack: &mut Stack,
+        call: &Call,
+        _input: PipelineData,
+    ) -> Result<PipelineData, ShellError> {
+        let initial: Value = call.req(engine_state, stack, 0)?;
+        let capture_block: Spanned<Closure> = call.req(engine_state, stack, 1)?;
+        let block_span = capture_block.span;
+        let block = engine_state.get_block(capture_block.item.block_id).clone();
+        let ctrlc = engine_state.ctrlc.clone();
+        let engine_state = engine_state.clone();
+        let mut stack = stack.captures_to_stack(&capture_block.item.captures);
+        let orig_env_vars = stack.env_vars.clone();
+        let orig_env_hidden = stack.env_hidden.clone();
+        let redirect_stdout = call.redirect_stdout;
+        let redirect_stderr = call.redirect_stderr;
+
+        // A type of Option<S> is used to represent state. Invocation
+        // will stop on None. Using Option<S> allows functions to output
+        // one final value before stopping.
+        let iter = unfold(Some(initial), move |state| {
+            let arg = match state {
+                Some(state) => state.clone(),
+                None => return None,
+            };
+
+            // with_env() is used here to ensure that each iteration uses
+            // a different set of environment variables.
+            // Hence, a 'cd' in the first loop won't affect the next loop.
+            stack.with_env(&orig_env_vars, &orig_env_hidden);
+
+            if let Some(var) = block.signature.get_positional(0) {
+                if let Some(var_id) = &var.var_id {
+                    stack.add_var(*var_id, arg.clone());
+                }
+            }
+
+            let (output, next_input) = match eval_block_with_early_return(
+                &engine_state,
+                &mut stack,
+                &block,
+                arg.into_pipeline_data(),
+                redirect_stdout,
+                redirect_stderr,
+            ) {
+                // no data -> output nothing and stop.
+                Ok(PipelineData::Empty) => (None, None),
+
+                Ok(PipelineData::Value(value, ..)) => {
+                    let span = value.span();
+                    match value {
+                        // {out: ..., next: ...} -> output and continue
+                        Value::Record { val, .. } => {
+                            let iter = val.into_iter();
+                            let mut out = None;
+                            let mut next = None;
+                            let mut err = None;
+
+                            for (k, v) in iter {
+                                if k.to_lowercase() == "out" {
+                                    out = Some(v);
+                                } else if k.to_lowercase() == "next" {
+                                    next = Some(v);
+                                } else {
+                                    let error = ShellError::GenericError(
+                                        "Invalid block return".to_string(),
+                                        format!("Unexpected record key '{}'", k),
+                                        Some(span),
+                                        None,
+                                        Vec::new(),
+                                    );
+                                    err = Some(Value::error(error, block_span));
+                                    break;
+                                }
+                            }
+
+                            if err.is_some() {
+                                (err, None)
+                            } else {
+                                (out, next)
+                            }
+                        }
+
+                        // some other value -> error and stop
+                        _ => {
+                            let error = ShellError::GenericError(
+                                "Invalid block return".to_string(),
+                                format!("Expected record, found {}", value.get_type()),
+                                Some(span),
+                                None,
+                                Vec::new(),
+                            );
+
+                            (Some(Value::error(error, block_span)), None)
+                        }
+                    }
+                }
+
+                Ok(other) => {
+                    let val = other.into_value(block_span);
+                    let error = ShellError::GenericError(
+                        "Invalid block return".to_string(),
+                        format!("Expected record, found {}", val.get_type()),
+                        Some(val.span()),
+                        None,
+                        Vec::new(),
+                    );
+
+                    (Some(Value::error(error, block_span)), None)
+                }
+
+                // error -> error and stop
+                Err(error) => (Some(Value::error(error, block_span)), None),
+            };
+
+            // We use `state` to control when to stop, not `output`. By wrapping
+            // it in a `Some`, we allow the generator to output `None` as a valid output
+            // value.
+            *state = next_input;
+            Some(output)
+        });
+
+        Ok(iter.flatten().into_pipeline_data(ctrlc))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_examples() {
+        use crate::test_examples;
+
+        test_examples(Unfold {})
+    }
+}
--- a/crates/nu-command/tests/commands/mod.rs
+++ b/crates/nu-command/tests/commands/mod.rs
@ -103,6 +103,7 @@ mod touch;
 mod transpose;
 mod try_;
 mod ucp;
+mod unfold;
 mod uniq;
 mod uniq_by;
 mod update;
--- a/crates/nu-command/tests/commands/unfold.rs
+++ b/crates/nu-command/tests/commands/unfold.rs
@ -0,0 +1,102 @@
+use nu_test_support::{nu, pipeline};
+
+#[test]
+fn unfold_no_next_break() {
+    let actual =
+        nu!("unfold 1 {|x| if $x == 3 { {out: $x}} else { {out: $x, next: ($x + 1)} }} | to nuon");
+
+    assert_eq!(actual.out, "[1, 2, 3]");
+}
+
+#[test]
+fn unfold_null_break() {
+    let actual = nu!("unfold 1 {|x| if $x <= 3 { {out: $x, next: ($x + 1)} }} | to nuon");
+
+    assert_eq!(actual.out, "[1, 2, 3]");
+}
+
+#[test]
+fn unfold_allows_empty_output() {
+    let actual = nu!(pipeline(
+        r#"
+        unfold 0 {|x|
+          if $x == 1 {
+            {next: ($x + 1)}
+          } else if $x < 3 {
+            {out: $x, next: ($x + 1)}
+          }
+        } | to nuon
+      "#
+    ));
+
+    assert_eq!(actual.out, "[0, 2]");
+}
+
+#[test]
+fn unfold_allows_no_output() {
+    let actual = nu!(pipeline(
+        r#"
+        unfold 0 {|x|
+          if $x < 3 {
+            {next: ($x + 1)}
+          }
+        } | to nuon
+      "#
+    ));
+
+    assert_eq!(actual.out, "[]");
+}
+
+#[test]
+fn unfold_allows_null_state() {
+    let actual = nu!(pipeline(
+        r#"
+        unfold 0 {|x|
+          if $x == null {
+            {out: "done"}
+          } else if $x < 1 {
+            {out: "going", next: ($x + 1)}
+          } else {
+            {out: "stopping", next: null}
+          }
+        } | to nuon
+      "#
+    ));
+
+    assert_eq!(actual.out, "[going, stopping, done]");
+}
+
+#[test]
+fn unfold_allows_null_output() {
+    let actual = nu!(pipeline(
+        r#"
+        unfold 0 {|x|
+          if $x == 3 {
+            {out: "done"}
+          } else {
+            {out: null, next: ($x + 1)}
+          }
+        } | to nuon
+      "#
+    ));
+
+    assert_eq!(actual.out, "[null, null, null, done]");
+}
+
+#[test]
+fn unfold_disallows_extra_keys() {
+    let actual = nu!("unfold 0 {|x| {foo: bar, out: $x}}");
+    assert!(actual.err.contains("Invalid block return"));
+}
+
+#[test]
+fn unfold_disallows_list() {
+    let actual = nu!("unfold 0 {|x| [$x, ($x + 1)]}");
+    assert!(actual.err.contains("Invalid block return"));
+}
+
+#[test]
+fn unfold_disallows_primitive() {
+    let actual = nu!("unfold 0 {|x| 1}");
+    assert!(actual.err.contains("Invalid block return"));
+}