2021-12-19 08:46:13 +01:00
|
|
|
#[cfg(test)]
|
2022-11-09 22:55:05 +01:00
|
|
|
use nu_protocol::engine::Command;
|
2021-10-09 15:10:10 +02:00
|
|
|
|
2021-12-19 08:46:13 +01:00
|
|
|
#[cfg(test)]
|
2022-11-09 22:55:05 +01:00
|
|
|
pub fn test_examples(cmd: impl Command + 'static) {
|
|
|
|
test_examples::test_examples(cmd);
|
|
|
|
}
|
2021-10-29 08:26:29 +02:00
|
|
|
|
2021-12-19 08:46:13 +01:00
|
|
|
#[cfg(test)]
|
2022-11-09 22:55:05 +01:00
|
|
|
mod test_examples {
|
|
|
|
use super::super::{
|
feat: Add unfold command (#10489)
<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx
you can also mention related issues, PRs or discussions!
-->
# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.
Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
> [!NOTE]
> This PR description originally used examples where the `generator`
closure returned a list. It has since been updated to use records
instead.
The `unfold` command allows users to dynamically generate streams of
data. The stream is generated by repeatedly invoking a `generator`
closure. The `generator` closure accepts a single argument and returns a
record containing two optional keys: 'out' and 'next'. Each invocation,
the 'out' value, if present, is added to the stream. If a 'next' key is
present, it is used as the next argument to the closure, otherwise
generation stops.
The name "unfold" is borrowed from other functional-programming
languages. Whereas `fold` (or `reduce`) takes a stream of values and
outputs a single value, `unfold` takes a single value and outputs a
stream of values.
### Examples
A common example of using `unfold` is to generate a fibbonacci sequence.
See
[here](https://github.com/rust-itertools/itertools/blob/6ffdac103cf72dfd3b62a4de6dc25440b942e473/src/sources.rs#L65)
for an example of this in rust's `itertools`.
```nushell
> unfold [0, 1] {|fib| {out: $fib.0, next: [$fib.1, ($fib.0 + $fib.1)]} } | first 10
───┬────
0 │ 0
1 │ 1
2 │ 1
3 │ 2
4 │ 3
5 │ 5
6 │ 8
7 │ 13
8 │ 21
9 │ 34
───┴────
```
This command is particularly useful when consuming paginated APIs, like
Github's. Previously, nushell users might use a loop and buffer
responses into a list, before returning all responses at once. However,
this behavior is not desirable if the result result is very large. Using
`unfold` avoids buffering and allows subsequent pipeline stages to use
the data concurrently, as it's being fetched.
#### Before
```nushell
mut pages = []
for page in 1.. {
let resp = http get (
{
scheme: https,
host: "api.github.com",
path: "/repos/nushell/nushell/issues",
params: {
page: $page,
per_page: $PAGE_SIZE
}
} | url join)
$pages = ($pages | append $resp)
if ($resp | length) < $PAGE_SIZE {
break
}
}
$pages
```
#### After
```nu
unfold 1 {|page|
let resp = http get (
{
scheme: https,
host: "api.github.com",
path: "/repos/nushell/nushell/issues",
params: {
page: $page,
per_page: $PAGE_SIZE
}
} | url join)
if ($resp | length) < $PAGE_SIZE {
{out: $resp}
} else {
{out: $resp, next: ($page + 1)}
}
}
```
# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
- An `unfold` generator is added to the default context.
# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.
Make sure you've run and fixed any issues with these commands:
- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library
> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
Given the complexity of the `generator` closure's return value, it would
be good to document the semantics of `unfold` and provide some in-depth
examples showcasing what it can accomplish.
2023-09-30 16:08:06 +02:00
|
|
|
Ansi, Date, Enumerate, Filter, First, Flatten, From, Get, Into, IntoDatetime, IntoString,
|
|
|
|
Math, MathRound, ParEach, Path, PathParse, Random, Sort, SortBy, Split, SplitColumn,
|
|
|
|
SplitRow, Str, StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap,
|
2022-11-09 22:55:05 +01:00
|
|
|
};
|
2023-02-24 16:54:42 +01:00
|
|
|
use crate::{Each, To};
|
2023-02-27 22:58:56 +01:00
|
|
|
use nu_cmd_lang::example_support::{
|
|
|
|
check_all_signature_input_output_types_entries_have_examples,
|
|
|
|
check_example_evaluates_to_expected_output,
|
|
|
|
check_example_input_and_output_types_match_command_signature,
|
|
|
|
};
|
2023-02-24 16:54:42 +01:00
|
|
|
use nu_cmd_lang::{Break, Echo, If, Let, Mut};
|
2022-11-09 22:55:05 +01:00
|
|
|
use nu_protocol::{
|
2023-02-27 22:58:56 +01:00
|
|
|
engine::{Command, EngineState, StateWorkingSet},
|
|
|
|
Type,
|
2022-11-09 22:55:05 +01:00
|
|
|
};
|
revert: move to ahash (#9464)
This PR reverts https://github.com/nushell/nushell/pull/9391
We try not to revert PRs like this, though after discussion with the
Nushell team, we decided to revert this one.
The main reason is that Nushell, as a codebase, isn't ready for these
kinds of optimisations. It's in the part of the development cycle where
our main focus should be on improving the algorithms inside of Nushell
itself. Once we have matured our algorithms, then we can look for
opportunities to switch out technologies we're using for alternate
forms.
Much of Nushell still has lots of opportunities for tuning the codebase,
paying down technical debt, and making the codebase generally cleaner
and more robust. This should be the focus. Performance improvements
should flow out of that work.
Said another, optimisation that isn't part of tuning the codebase is
premature at this stage. We need to focus on doing the hard work of
making the engine, parser, etc better.
# User-Facing Changes
Reverts the HashMap -> ahash change.
cc @FilipAndersson245
2023-06-18 05:27:57 +02:00
|
|
|
use std::collections::HashSet;
|
2021-10-09 15:10:10 +02:00
|
|
|
|
2022-11-09 22:55:05 +01:00
|
|
|
pub fn test_examples(cmd: impl Command + 'static) {
|
|
|
|
let examples = cmd.examples();
|
|
|
|
let signature = cmd.signature();
|
|
|
|
let mut engine_state = make_engine_state(cmd.clone_box());
|
2021-12-24 01:16:50 +01:00
|
|
|
|
2022-11-09 22:55:05 +01:00
|
|
|
let cwd = std::env::current_dir().expect("Could not get current working directory.");
|
2021-10-09 15:10:10 +02:00
|
|
|
|
2022-11-09 22:55:05 +01:00
|
|
|
let mut witnessed_type_transformations = HashSet::<(Type, Type)>::new();
|
2021-10-09 15:10:10 +02:00
|
|
|
|
2022-11-09 22:55:05 +01:00
|
|
|
for example in examples {
|
|
|
|
if example.result.is_none() {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
witnessed_type_transformations.extend(
|
|
|
|
check_example_input_and_output_types_match_command_signature(
|
|
|
|
&example,
|
|
|
|
&cwd,
|
|
|
|
&mut make_engine_state(cmd.clone_box()),
|
|
|
|
&signature.input_output_types,
|
|
|
|
signature.operates_on_cell_paths(),
|
|
|
|
),
|
|
|
|
);
|
2023-01-29 23:31:35 +01:00
|
|
|
check_example_evaluates_to_expected_output(&example, cwd.as_path(), &mut engine_state);
|
2022-11-09 22:55:05 +01:00
|
|
|
}
|
2022-07-14 16:09:27 +02:00
|
|
|
|
2022-11-09 22:55:05 +01:00
|
|
|
check_all_signature_input_output_types_entries_have_examples(
|
|
|
|
signature,
|
|
|
|
witnessed_type_transformations,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn make_engine_state(cmd: Box<dyn Command>) -> Box<EngineState> {
|
|
|
|
let mut engine_state = Box::new(EngineState::new());
|
|
|
|
|
|
|
|
let delta = {
|
|
|
|
// Base functions that are needed for testing
|
|
|
|
// Try to keep this working set small to keep tests running as fast as possible
|
2022-12-07 19:31:57 +01:00
|
|
|
let mut working_set = StateWorkingSet::new(&engine_state);
|
2023-02-02 23:59:58 +01:00
|
|
|
working_set.add_decl(Box::new(Ansi));
|
|
|
|
working_set.add_decl(Box::new(Break));
|
|
|
|
working_set.add_decl(Box::new(Date));
|
2023-01-04 06:08:05 +01:00
|
|
|
working_set.add_decl(Box::new(Each));
|
2023-02-02 23:59:58 +01:00
|
|
|
working_set.add_decl(Box::new(Echo));
|
|
|
|
working_set.add_decl(Box::new(Enumerate));
|
2023-08-19 16:06:59 +02:00
|
|
|
working_set.add_decl(Box::new(Filter));
|
feat: Add unfold command (#10489)
<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx
you can also mention related issues, PRs or discussions!
-->
# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.
Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
> [!NOTE]
> This PR description originally used examples where the `generator`
closure returned a list. It has since been updated to use records
instead.
The `unfold` command allows users to dynamically generate streams of
data. The stream is generated by repeatedly invoking a `generator`
closure. The `generator` closure accepts a single argument and returns a
record containing two optional keys: 'out' and 'next'. Each invocation,
the 'out' value, if present, is added to the stream. If a 'next' key is
present, it is used as the next argument to the closure, otherwise
generation stops.
The name "unfold" is borrowed from other functional-programming
languages. Whereas `fold` (or `reduce`) takes a stream of values and
outputs a single value, `unfold` takes a single value and outputs a
stream of values.
### Examples
A common example of using `unfold` is to generate a fibbonacci sequence.
See
[here](https://github.com/rust-itertools/itertools/blob/6ffdac103cf72dfd3b62a4de6dc25440b942e473/src/sources.rs#L65)
for an example of this in rust's `itertools`.
```nushell
> unfold [0, 1] {|fib| {out: $fib.0, next: [$fib.1, ($fib.0 + $fib.1)]} } | first 10
───┬────
0 │ 0
1 │ 1
2 │ 1
3 │ 2
4 │ 3
5 │ 5
6 │ 8
7 │ 13
8 │ 21
9 │ 34
───┴────
```
This command is particularly useful when consuming paginated APIs, like
Github's. Previously, nushell users might use a loop and buffer
responses into a list, before returning all responses at once. However,
this behavior is not desirable if the result result is very large. Using
`unfold` avoids buffering and allows subsequent pipeline stages to use
the data concurrently, as it's being fetched.
#### Before
```nushell
mut pages = []
for page in 1.. {
let resp = http get (
{
scheme: https,
host: "api.github.com",
path: "/repos/nushell/nushell/issues",
params: {
page: $page,
per_page: $PAGE_SIZE
}
} | url join)
$pages = ($pages | append $resp)
if ($resp | length) < $PAGE_SIZE {
break
}
}
$pages
```
#### After
```nu
unfold 1 {|page|
let resp = http get (
{
scheme: https,
host: "api.github.com",
path: "/repos/nushell/nushell/issues",
params: {
page: $page,
per_page: $PAGE_SIZE
}
} | url join)
if ($resp | length) < $PAGE_SIZE {
{out: $resp}
} else {
{out: $resp, next: ($page + 1)}
}
}
```
# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
- An `unfold` generator is added to the default context.
# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.
Make sure you've run and fixed any issues with these commands:
- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library
> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
Given the complexity of the `generator` closure's return value, it would
be good to document the semantics of `unfold` and provide some in-depth
examples showcasing what it can accomplish.
2023-09-30 16:08:06 +02:00
|
|
|
working_set.add_decl(Box::new(First));
|
2023-02-02 23:59:58 +01:00
|
|
|
working_set.add_decl(Box::new(Flatten));
|
2022-11-09 22:55:05 +01:00
|
|
|
working_set.add_decl(Box::new(From));
|
2023-02-02 23:59:58 +01:00
|
|
|
working_set.add_decl(Box::new(Get));
|
2022-11-09 22:55:05 +01:00
|
|
|
working_set.add_decl(Box::new(If));
|
|
|
|
working_set.add_decl(Box::new(Into));
|
2022-12-23 19:49:19 +01:00
|
|
|
working_set.add_decl(Box::new(IntoString));
|
2023-08-19 16:06:59 +02:00
|
|
|
working_set.add_decl(Box::new(IntoDatetime));
|
2023-02-02 23:59:58 +01:00
|
|
|
working_set.add_decl(Box::new(Let));
|
|
|
|
working_set.add_decl(Box::new(Math));
|
|
|
|
working_set.add_decl(Box::new(MathRound));
|
|
|
|
working_set.add_decl(Box::new(Mut));
|
|
|
|
working_set.add_decl(Box::new(Path));
|
2023-05-18 01:34:44 +02:00
|
|
|
working_set.add_decl(Box::new(PathParse));
|
2023-02-02 23:59:58 +01:00
|
|
|
working_set.add_decl(Box::new(ParEach));
|
2022-11-09 22:55:05 +01:00
|
|
|
working_set.add_decl(Box::new(Random));
|
2023-02-02 23:59:58 +01:00
|
|
|
working_set.add_decl(Box::new(Sort));
|
|
|
|
working_set.add_decl(Box::new(SortBy));
|
2022-11-09 22:55:05 +01:00
|
|
|
working_set.add_decl(Box::new(Split));
|
|
|
|
working_set.add_decl(Box::new(SplitColumn));
|
|
|
|
working_set.add_decl(Box::new(SplitRow));
|
2023-02-02 23:59:58 +01:00
|
|
|
working_set.add_decl(Box::new(Str));
|
|
|
|
working_set.add_decl(Box::new(StrJoin));
|
|
|
|
working_set.add_decl(Box::new(StrLength));
|
|
|
|
working_set.add_decl(Box::new(StrReplace));
|
|
|
|
working_set.add_decl(Box::new(To));
|
2022-11-09 22:55:05 +01:00
|
|
|
working_set.add_decl(Box::new(Url));
|
2023-02-02 23:59:58 +01:00
|
|
|
working_set.add_decl(Box::new(Update));
|
2022-12-23 19:49:19 +01:00
|
|
|
working_set.add_decl(Box::new(Values));
|
2022-11-09 22:55:05 +01:00
|
|
|
working_set.add_decl(Box::new(Wrap));
|
|
|
|
// Adding the command that is being tested to the working set
|
|
|
|
working_set.add_decl(cmd);
|
|
|
|
|
|
|
|
working_set.render()
|
|
|
|
};
|
|
|
|
|
|
|
|
engine_state
|
|
|
|
.merge_delta(delta)
|
|
|
|
.expect("Error merging delta");
|
|
|
|
engine_state
|
|
|
|
}
|
2021-10-09 15:10:10 +02:00
|
|
|
}
|