forked from extern/nushell
feat: Add unfold command (#10489)
<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx
you can also mention related issues, PRs or discussions!
-->
# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.
Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
> [!NOTE]
> This PR description originally used examples where the `generator`
closure returned a list. It has since been updated to use records
instead.
The `unfold` command allows users to dynamically generate streams of
data. The stream is generated by repeatedly invoking a `generator`
closure. The `generator` closure accepts a single argument and returns a
record containing two optional keys: 'out' and 'next'. Each invocation,
the 'out' value, if present, is added to the stream. If a 'next' key is
present, it is used as the next argument to the closure, otherwise
generation stops.
The name "unfold" is borrowed from other functional-programming
languages. Whereas `fold` (or `reduce`) takes a stream of values and
outputs a single value, `unfold` takes a single value and outputs a
stream of values.
### Examples
A common example of using `unfold` is to generate a fibbonacci sequence.
See
[here](6ffdac103c/src/sources.rs (L65)
)
for an example of this in rust's `itertools`.
```nushell
> unfold [0, 1] {|fib| {out: $fib.0, next: [$fib.1, ($fib.0 + $fib.1)]} } | first 10
───┬────
0 │ 0
1 │ 1
2 │ 1
3 │ 2
4 │ 3
5 │ 5
6 │ 8
7 │ 13
8 │ 21
9 │ 34
───┴────
```
This command is particularly useful when consuming paginated APIs, like
Github's. Previously, nushell users might use a loop and buffer
responses into a list, before returning all responses at once. However,
this behavior is not desirable if the result result is very large. Using
`unfold` avoids buffering and allows subsequent pipeline stages to use
the data concurrently, as it's being fetched.
#### Before
```nushell
mut pages = []
for page in 1.. {
let resp = http get (
{
scheme: https,
host: "api.github.com",
path: "/repos/nushell/nushell/issues",
params: {
page: $page,
per_page: $PAGE_SIZE
}
} | url join)
$pages = ($pages | append $resp)
if ($resp | length) < $PAGE_SIZE {
break
}
}
$pages
```
#### After
```nu
unfold 1 {|page|
let resp = http get (
{
scheme: https,
host: "api.github.com",
path: "/repos/nushell/nushell/issues",
params: {
page: $page,
per_page: $PAGE_SIZE
}
} | url join)
if ($resp | length) < $PAGE_SIZE {
{out: $resp}
} else {
{out: $resp, next: ($page + 1)}
}
}
```
# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
- An `unfold` generator is added to the default context.
# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.
Make sure you've run and fixed any issues with these commands:
- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library
> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
Given the complexity of the `generator` closure's return value, it would
be good to document the semantics of `unfold` and provide some in-depth
examples showcasing what it can accomplish.
This commit is contained in:
parent
7eaa6d01ab
commit
fa2e6e5d53
@ -382,6 +382,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState {
|
||||
Seq,
|
||||
SeqDate,
|
||||
SeqChar,
|
||||
Unfold,
|
||||
};
|
||||
|
||||
// Hash
|
||||
|
@ -9,9 +9,9 @@ pub fn test_examples(cmd: impl Command + 'static) {
|
||||
#[cfg(test)]
|
||||
mod test_examples {
|
||||
use super::super::{
|
||||
Ansi, Date, Enumerate, Filter, Flatten, From, Get, Into, IntoDatetime, IntoString, Math,
|
||||
MathRound, ParEach, Path, PathParse, Random, Sort, SortBy, Split, SplitColumn, SplitRow,
|
||||
Str, StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap,
|
||||
Ansi, Date, Enumerate, Filter, First, Flatten, From, Get, Into, IntoDatetime, IntoString,
|
||||
Math, MathRound, ParEach, Path, PathParse, Random, Sort, SortBy, Split, SplitColumn,
|
||||
SplitRow, Str, StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap,
|
||||
};
|
||||
use crate::{Each, To};
|
||||
use nu_cmd_lang::example_support::{
|
||||
@ -71,6 +71,7 @@ mod test_examples {
|
||||
working_set.add_decl(Box::new(Echo));
|
||||
working_set.add_decl(Box::new(Enumerate));
|
||||
working_set.add_decl(Box::new(Filter));
|
||||
working_set.add_decl(Box::new(First));
|
||||
working_set.add_decl(Box::new(Flatten));
|
||||
working_set.add_decl(Box::new(From));
|
||||
working_set.add_decl(Box::new(Get));
|
||||
|
@ -2,8 +2,10 @@ mod cal;
|
||||
mod seq;
|
||||
mod seq_char;
|
||||
mod seq_date;
|
||||
mod unfold;
|
||||
|
||||
pub use cal::Cal;
|
||||
pub use seq::Seq;
|
||||
pub use seq_char::SeqChar;
|
||||
pub use seq_date::SeqDate;
|
||||
pub use unfold::Unfold;
|
||||
|
231
crates/nu-command/src/generators/unfold.rs
Normal file
231
crates/nu-command/src/generators/unfold.rs
Normal file
@ -0,0 +1,231 @@
|
||||
use itertools::unfold;
|
||||
|
||||
use nu_engine::{eval_block_with_early_return, CallExt};
|
||||
use nu_protocol::ast::Call;
|
||||
use nu_protocol::engine::{Closure, Command, EngineState, Stack};
|
||||
use nu_protocol::{
|
||||
Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData, ShellError,
|
||||
Signature, Span, Spanned, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Unfold;
|
||||
|
||||
impl Command for Unfold {
|
||||
fn name(&self) -> &str {
|
||||
"unfold"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("unfold")
|
||||
.input_output_types(vec![
|
||||
(Type::Nothing, Type::List(Box::new(Type::Any))),
|
||||
(
|
||||
Type::List(Box::new(Type::Any)),
|
||||
Type::List(Box::new(Type::Any)),
|
||||
),
|
||||
])
|
||||
.required("initial", SyntaxShape::Any, "initial value")
|
||||
.required(
|
||||
"closure",
|
||||
SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
|
||||
"generator function",
|
||||
)
|
||||
.allow_variants_without_examples(true)
|
||||
.category(Category::Generators)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Generate a list of values by successively invoking a closure."
|
||||
}
|
||||
|
||||
fn extra_usage(&self) -> &str {
|
||||
r#"The generator closure accepts a single argument and returns a record
|
||||
containing two optional keys: 'out' and 'next'. Each invocation, the 'out'
|
||||
value, if present, is added to the stream. If a 'next' key is present, it is
|
||||
used as the next argument to the closure, otherwise generation stops.
|
||||
"#
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["generate", "stream"]
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
example: "unfold 0 {|i| if $i <= 10 { {out: $i, next: ($i + 2)} }}",
|
||||
description: "Generate a sequence of numbers",
|
||||
result: Some(Value::list(
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(2),
|
||||
Value::test_int(4),
|
||||
Value::test_int(6),
|
||||
Value::test_int(8),
|
||||
Value::test_int(10),
|
||||
],
|
||||
Span::test_data(),
|
||||
)),
|
||||
},
|
||||
Example {
|
||||
example: "unfold [0, 1] {|fib| {out: $fib.0, next: [$fib.1, ($fib.0 + $fib.1)]} } | first 10",
|
||||
description: "Generate a stream of fibonacci numbers",
|
||||
result: Some(Value::list(
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(5),
|
||||
Value::test_int(8),
|
||||
Value::test_int(13),
|
||||
Value::test_int(21),
|
||||
Value::test_int(34),
|
||||
],
|
||||
Span::test_data(),
|
||||
)),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let initial: Value = call.req(engine_state, stack, 0)?;
|
||||
let capture_block: Spanned<Closure> = call.req(engine_state, stack, 1)?;
|
||||
let block_span = capture_block.span;
|
||||
let block = engine_state.get_block(capture_block.item.block_id).clone();
|
||||
let ctrlc = engine_state.ctrlc.clone();
|
||||
let engine_state = engine_state.clone();
|
||||
let mut stack = stack.captures_to_stack(&capture_block.item.captures);
|
||||
let orig_env_vars = stack.env_vars.clone();
|
||||
let orig_env_hidden = stack.env_hidden.clone();
|
||||
let redirect_stdout = call.redirect_stdout;
|
||||
let redirect_stderr = call.redirect_stderr;
|
||||
|
||||
// A type of Option<S> is used to represent state. Invocation
|
||||
// will stop on None. Using Option<S> allows functions to output
|
||||
// one final value before stopping.
|
||||
let iter = unfold(Some(initial), move |state| {
|
||||
let arg = match state {
|
||||
Some(state) => state.clone(),
|
||||
None => return None,
|
||||
};
|
||||
|
||||
// with_env() is used here to ensure that each iteration uses
|
||||
// a different set of environment variables.
|
||||
// Hence, a 'cd' in the first loop won't affect the next loop.
|
||||
stack.with_env(&orig_env_vars, &orig_env_hidden);
|
||||
|
||||
if let Some(var) = block.signature.get_positional(0) {
|
||||
if let Some(var_id) = &var.var_id {
|
||||
stack.add_var(*var_id, arg.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let (output, next_input) = match eval_block_with_early_return(
|
||||
&engine_state,
|
||||
&mut stack,
|
||||
&block,
|
||||
arg.into_pipeline_data(),
|
||||
redirect_stdout,
|
||||
redirect_stderr,
|
||||
) {
|
||||
// no data -> output nothing and stop.
|
||||
Ok(PipelineData::Empty) => (None, None),
|
||||
|
||||
Ok(PipelineData::Value(value, ..)) => {
|
||||
let span = value.span();
|
||||
match value {
|
||||
// {out: ..., next: ...} -> output and continue
|
||||
Value::Record { val, .. } => {
|
||||
let iter = val.into_iter();
|
||||
let mut out = None;
|
||||
let mut next = None;
|
||||
let mut err = None;
|
||||
|
||||
for (k, v) in iter {
|
||||
if k.to_lowercase() == "out" {
|
||||
out = Some(v);
|
||||
} else if k.to_lowercase() == "next" {
|
||||
next = Some(v);
|
||||
} else {
|
||||
let error = ShellError::GenericError(
|
||||
"Invalid block return".to_string(),
|
||||
format!("Unexpected record key '{}'", k),
|
||||
Some(span),
|
||||
None,
|
||||
Vec::new(),
|
||||
);
|
||||
err = Some(Value::error(error, block_span));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if err.is_some() {
|
||||
(err, None)
|
||||
} else {
|
||||
(out, next)
|
||||
}
|
||||
}
|
||||
|
||||
// some other value -> error and stop
|
||||
_ => {
|
||||
let error = ShellError::GenericError(
|
||||
"Invalid block return".to_string(),
|
||||
format!("Expected record, found {}", value.get_type()),
|
||||
Some(span),
|
||||
None,
|
||||
Vec::new(),
|
||||
);
|
||||
|
||||
(Some(Value::error(error, block_span)), None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(other) => {
|
||||
let val = other.into_value(block_span);
|
||||
let error = ShellError::GenericError(
|
||||
"Invalid block return".to_string(),
|
||||
format!("Expected record, found {}", val.get_type()),
|
||||
Some(val.span()),
|
||||
None,
|
||||
Vec::new(),
|
||||
);
|
||||
|
||||
(Some(Value::error(error, block_span)), None)
|
||||
}
|
||||
|
||||
// error -> error and stop
|
||||
Err(error) => (Some(Value::error(error, block_span)), None),
|
||||
};
|
||||
|
||||
// We use `state` to control when to stop, not `output`. By wrapping
|
||||
// it in a `Some`, we allow the generator to output `None` as a valid output
|
||||
// value.
|
||||
*state = next_input;
|
||||
Some(output)
|
||||
});
|
||||
|
||||
Ok(iter.flatten().into_pipeline_data(ctrlc))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
use crate::test_examples;
|
||||
|
||||
test_examples(Unfold {})
|
||||
}
|
||||
}
|
@ -103,6 +103,7 @@ mod touch;
|
||||
mod transpose;
|
||||
mod try_;
|
||||
mod ucp;
|
||||
mod unfold;
|
||||
mod uniq;
|
||||
mod uniq_by;
|
||||
mod update;
|
||||
|
102
crates/nu-command/tests/commands/unfold.rs
Normal file
102
crates/nu-command/tests/commands/unfold.rs
Normal file
@ -0,0 +1,102 @@
|
||||
use nu_test_support::{nu, pipeline};
|
||||
|
||||
#[test]
|
||||
fn unfold_no_next_break() {
|
||||
let actual =
|
||||
nu!("unfold 1 {|x| if $x == 3 { {out: $x}} else { {out: $x, next: ($x + 1)} }} | to nuon");
|
||||
|
||||
assert_eq!(actual.out, "[1, 2, 3]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unfold_null_break() {
|
||||
let actual = nu!("unfold 1 {|x| if $x <= 3 { {out: $x, next: ($x + 1)} }} | to nuon");
|
||||
|
||||
assert_eq!(actual.out, "[1, 2, 3]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unfold_allows_empty_output() {
|
||||
let actual = nu!(pipeline(
|
||||
r#"
|
||||
unfold 0 {|x|
|
||||
if $x == 1 {
|
||||
{next: ($x + 1)}
|
||||
} else if $x < 3 {
|
||||
{out: $x, next: ($x + 1)}
|
||||
}
|
||||
} | to nuon
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual.out, "[0, 2]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unfold_allows_no_output() {
|
||||
let actual = nu!(pipeline(
|
||||
r#"
|
||||
unfold 0 {|x|
|
||||
if $x < 3 {
|
||||
{next: ($x + 1)}
|
||||
}
|
||||
} | to nuon
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual.out, "[]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unfold_allows_null_state() {
|
||||
let actual = nu!(pipeline(
|
||||
r#"
|
||||
unfold 0 {|x|
|
||||
if $x == null {
|
||||
{out: "done"}
|
||||
} else if $x < 1 {
|
||||
{out: "going", next: ($x + 1)}
|
||||
} else {
|
||||
{out: "stopping", next: null}
|
||||
}
|
||||
} | to nuon
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual.out, "[going, stopping, done]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unfold_allows_null_output() {
|
||||
let actual = nu!(pipeline(
|
||||
r#"
|
||||
unfold 0 {|x|
|
||||
if $x == 3 {
|
||||
{out: "done"}
|
||||
} else {
|
||||
{out: null, next: ($x + 1)}
|
||||
}
|
||||
} | to nuon
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual.out, "[null, null, null, done]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unfold_disallows_extra_keys() {
|
||||
let actual = nu!("unfold 0 {|x| {foo: bar, out: $x}}");
|
||||
assert!(actual.err.contains("Invalid block return"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unfold_disallows_list() {
|
||||
let actual = nu!("unfold 0 {|x| [$x, ($x + 1)]}");
|
||||
assert!(actual.err.contains("Invalid block return"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unfold_disallows_primitive() {
|
||||
let actual = nu!("unfold 0 {|x| 1}");
|
||||
assert!(actual.err.contains("Invalid block return"));
|
||||
}
|
Loading…
Reference in New Issue
Block a user