From e7d2717424210cd6a0262ce7307964e241c8586d Mon Sep 17 00:00:00 2001 From: Bahex Date: Tue, 3 Jun 2025 18:21:12 +0300 Subject: [PATCH] feat(std-rfc): add `iter` module and `recurse` command (#15840) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description `recurse` command is similar to `jq`'s `recurse`/`..` command. Along with values, it also returns their cell-paths relative to the "root" (initial input) By default it uses breadth-first traversal, collecting child items of all available sibling items before starting to process those child items. This means output is ordered in increasing depth. With the `--depth-first` flag it uses a stack based recursive descend, which results in output order identical to `jq`'s `recurse`. It can be used in the following ways: - `... | recurse`: Recursively traverses the input value, returns each value it finds as a stream. - `... | recurse foo.bar`: Only descend through the given cell-path. - `... | recurse {|parent| ... }`: Produce child values with a closure. ```nushell { "foo": { "egg": "X" "spam": "Y" } "bar": { "quox": ["A" "B"] } } | recurse | update item { to nuon } # => ╭───┬──────────────┬───────────────────────────────────────────────╮ # => │ # │ path │ item │ # => ├───┼──────────────┼───────────────────────────────────────────────┤ # => │ 0 │ $. │ {foo: {egg: X, spam: Y}, bar: {quox: [A, B]}} │ # => │ 1 │ $.foo │ {egg: X, spam: Y} │ # => │ 2 │ $.bar │ {quox: [A, B]} │ # => │ 3 │ $.foo.egg │ "X" │ # => │ 4 │ $.foo.spam │ "Y" │ # => │ 5 │ $.bar.quox │ [A, B] │ # => │ 6 │ $.bar.quox.0 │ "A" │ # => │ 7 │ $.bar.quox.1 │ "B" │ # => ╰───┴──────────────┴───────────────────────────────────────────────╯ {"name": "/", "children": [ {"name": "/bin", "children": [ {"name": "/bin/ls", "children": []}, {"name": "/bin/sh", "children": []}]}, {"name": "/home", "children": [ {"name": "/home/stephen", "children": [ {"name": "/home/stephen/jq", "children": []}]}]}]} | recurse children | get item.name # => ╭───┬──────────────────╮ # => │ 0 │ / │ # => │ 1 │ /bin │ # => │ 2 │ /home │ # => │ 3 │ /bin/ls │ # => │ 4 │ /bin/sh │ # => │ 5 │ /home/stephen │ # => │ 6 │ /home/stephen/jq │ # => ╰───┴──────────────────╯ {"name": "/", "children": [ {"name": "/bin", "children": [ {"name": "/bin/ls", "children": []}, {"name": "/bin/sh", "children": []}]}, {"name": "/home", "children": [ {"name": "/home/stephen", "children": [ {"name": "/home/stephen/jq", "children": []}]}]}]} | recurse children --depth-first | get item.name # => ╭───┬──────────────────╮ # => │ 0 │ / │ # => │ 1 │ /bin │ # => │ 2 │ /bin/ls │ # => │ 3 │ /bin/sh │ # => │ 4 │ /home │ # => │ 5 │ /home/stephen │ # => │ 6 │ /home/stephen/jq │ # => ╰───┴──────────────────╯ 2 | recurse { ({path: square item: ($in * $in)}) } | take while { $in.item < 100 } # => ╭───┬─────────────────┬──────╮ # => │ # │ path │ item │ # => ├───┼─────────────────┼──────┤ # => │ 0 │ $. │ 2 │ # => │ 1 │ $.square │ 4 │ # => │ 2 │ $.square.square │ 16 │ # => ╰───┴─────────────────┴──────╯ ``` # User-Facing Changes No changes other than the new command. # Tests + Formatting Added tests for examples. (As we can't run them directly as tests yet.) - :green_circle: `toolkit test stdlib` # After Submitting - Update relevant parts of https://www.nushell.sh/cookbook/jq_v_nushell.html - `$env.config | recurse | where ($it.item | describe -d).type not-in [list, record, table]` can partially cover the use case of `config flatten`, should we do something? --------- Co-authored-by: Bahex <17417311+Bahex@users.noreply.github.com> --- crates/nu-std/src/lib.rs | 5 + crates/nu-std/std-rfc/iter/mod.nu | 174 +++++++++++++++++++++++ crates/nu-std/std-rfc/mod.nu | 1 + crates/nu-std/tests/test_std-rfc_iter.nu | 81 +++++++++++ 4 files changed, 261 insertions(+) create mode 100644 crates/nu-std/std-rfc/iter/mod.nu create mode 100644 crates/nu-std/tests/test_std-rfc_iter.nu diff --git a/crates/nu-std/src/lib.rs b/crates/nu-std/src/lib.rs index 894c468498..fb7c79f9c4 100644 --- a/crates/nu-std/src/lib.rs +++ b/crates/nu-std/src/lib.rs @@ -126,6 +126,11 @@ pub fn load_standard_library( "std-rfc/tables", include_str!("../std-rfc/tables/mod.nu"), ), + ( + "mod.nu", + "std-rfc/iter", + include_str!("../std-rfc/iter/mod.nu"), + ), ]; for (filename, std_rfc_subdir_name, content) in std_rfc_submodules.drain(..) { diff --git a/crates/nu-std/std-rfc/iter/mod.nu b/crates/nu-std/std-rfc/iter/mod.nu new file mode 100644 index 0000000000..5625af80a6 --- /dev/null +++ b/crates/nu-std/std-rfc/iter/mod.nu @@ -0,0 +1,174 @@ +def cell-path-join []: list -> cell-path { + each {|e| try { split cell-path } catch { $e } } + | flatten + | into cell-path +} + +def add-parent [parent: cell-path]: table -> table { + update path { [$parent, $in] | cell-path-join } +} + +def get-children []: [any -> table] { + let val = $in + match ($val | describe --detailed).type { + "record" => { $val | transpose path item } + "list" => { $val | enumerate | rename path item } + _ => { return [] } + } +} + +def get-children-at [path: cell-path]: [any -> table] { + let x = try { get $path } catch { return [] } + + if ($x | describe --detailed).type == "list" { + $x | get-children | add-parent $path + } else { + [{ + path: $path + item: $x + }] + } +} + +# Recursively descend a nested value, returning each value along with its path. +# +# Recursively descends its input, producing all values as a stream, along with +# the cell-paths to access those values. +# +# If a cell-path is provided as argument, rather than traversing all children, +# only the given cell-path is followed. The cell-path is evaluated at each level, +# relative to the parent element. +# +# If a closure is provided, it will be used to get children from parent values. +# The closure can have a variety of return types, each one in the list being +# coerced to the next type: +# - list +# - table +# - table +# `path` is used to construct the full path of an item, being concatenated to +# the parent item's path. If a child item does not have a `path` field, its +# path defaults to `` +@example "Access each possible path in a value" { + { + "foo": { + "egg": "X" + "spam": "Y" + } + "bar": { + "quox": ["A" "B"] + } + } + | recurse + | update item { to nuon } +} --result [ + [path, item]; + [ ($.), r#'{foo: {egg: X, spam: Y}, bar: {quox: [A, B]}}'# ], + [ ($.foo), r#'{egg: X, spam: Y}'# ], + [ ($.bar), r#'{quox: [A, B]}'# ], + [ ($.foo.egg), r#'X'# ], + [ ($.foo.spam), r#'Y'# ], + [ ($.bar.quox), r#'[A, B]'# ], + [ ($.bar.quox.0), r#'A'# ], + [ ($.bar.quox.1), r#'B'# ] +] +@example "Recurse example from `jq`'s manpage" { + {"name": "/", "children": [ + {"name": "/bin", "children": [ + {"name": "/bin/ls", "children": []}, + {"name": "/bin/sh", "children": []}]}, + {"name": "/home", "children": [ + {"name": "/home/stephen", "children": [ + {"name": "/home/stephen/jq", "children": []}]}]}]} + | recurse children + | get item.name +} --result [/, /bin, /home, /bin/ls, /bin/sh, /home/stephen, /home/stephen/jq] +@example "Recurse example from `jq`'s manpage, using depth-first traversal like `jq`" { + {"name": "/", "children": [ + {"name": "/bin", "children": [ + {"name": "/bin/ls", "children": []}, + {"name": "/bin/sh", "children": []}]}, + {"name": "/home", "children": [ + {"name": "/home/stephen", "children": [ + {"name": "/home/stephen/jq", "children": []}]}]}]} + | recurse children --depth-first + | get item.name +} --result [/, /bin, /bin/ls, /bin/sh, /home, /home/stephen, /home/stephen/jq] +@example '"Recurse" using a closure' { + 2 + | recurse { ({path: square item: ($in * $in)}) } + | take while { $in.item < 100 } +} --result [ + [path, item]; + [$., 2], + [$.square, 4], + [$.square.square, 16] +] +@search-terms jq ".." nested +export def recurse [ + get_children?: oneof # Specify how to get children from parent value. + --depth-first # Descend depth-first rather than breadth first +]: [any -> list] { + let descend = match ($get_children | describe --detailed).type { + "nothing" => { + {|| get-children } + } + "cell-path" | "string" | "int" => { + {|| get-children-at $get_children } + } + "closure" => { + {|parent| + let output = try { + $parent | do $get_children $parent + } catch { + return [] + } + | append [] + let has_item = try { $output | get item; true } catch { false } + + $output + | if not $has_item { wrap item } else { } + | default "" path + } + } + $type => { + error make { + msg: "Type mismatch." + label: { + text: $"Cannot get child values using a ($type)" + span: (metadata $get_children).span + } + help: "Try using a cell-path or a closure." + } + } + } + + let fn = if $depth_first { + {|stack| + match $stack { + [] => { {} } + [$head, ..$tail] => { + let children = $head.item | do $descend $head.item | add-parent $head.path + { + out: $head, + next: ($tail | prepend $children), + } + } + } + } + } else { + {|out| + let children = $out + | each {|e| $e.item | do $descend $e.item | add-parent $e.path } + | flatten + + if ($children | is-not-empty) { + {out: $out, next: $children} + } else { + {out: $out} + } + } + } + + generate $fn [{path: ($.), item: ($in) }] + | if not $depth_first { flatten } else { } +} diff --git a/crates/nu-std/std-rfc/mod.nu b/crates/nu-std/std-rfc/mod.nu index 6d2e26c427..efb375378b 100644 --- a/crates/nu-std/std-rfc/mod.nu +++ b/crates/nu-std/std-rfc/mod.nu @@ -3,6 +3,7 @@ export use tables * export use path * export module clip export module str +export module iter # kv module depends on sqlite feature, which may not be available in some builds const kv_module = if ("sqlite" in (version).features) { "std-rfc/kv" } else { null } diff --git a/crates/nu-std/tests/test_std-rfc_iter.nu b/crates/nu-std/tests/test_std-rfc_iter.nu new file mode 100644 index 0000000000..b215cc1ab6 --- /dev/null +++ b/crates/nu-std/tests/test_std-rfc_iter.nu @@ -0,0 +1,81 @@ +use std/assert +use std/testing * +use std-rfc/iter * + +@test +def recurse-example-basic [] { + let out = { + "foo": { + "egg": "X" + "spam": "Y" + } + "bar": { + "quox": ["A" "B"] + } + } + | recurse + + let expected = [ + [path, item]; + [ ($.), {foo: {egg: X, spam: Y}, bar: {quox: [A, B]}} ], + [ ($.foo), {egg: X, spam: Y} ], + [ ($.bar), {quox: [A, B]} ], + [ ($.foo.egg), X ], + [ ($.foo.spam), Y ], + [ ($.bar.quox), [A, B] ], + [ ($.bar.quox.0), A ], + [ ($.bar.quox.1), B ] + ] + + assert equal $out $expected +} + +@test +def recurse-example-jq [] { + let out = {"name": "/", "children": [ + {"name": "/bin", "children": [ + {"name": "/bin/ls", "children": []}, + {"name": "/bin/sh", "children": []}]}, + {"name": "/home", "children": [ + {"name": "/home/stephen", "children": [ + {"name": "/home/stephen/jq", "children": []}]}]}]} + | recurse children + | get item.name + + let expected = [/, /bin, /home, /bin/ls, /bin/sh, /home/stephen, /home/stephen/jq] + + assert equal $out $expected +} + +@test +def recurse-example-jq-depth-first [] { + let out = {"name": "/", "children": [ + {"name": "/bin", "children": [ + {"name": "/bin/ls", "children": []}, + {"name": "/bin/sh", "children": []}]}, + {"name": "/home", "children": [ + {"name": "/home/stephen", "children": [ + {"name": "/home/stephen/jq", "children": []}]}]}]} + | recurse children --depth-first + | get item.name + + let expected = [/, /bin, /bin/ls, /bin/sh, /home, /home/stephen, /home/stephen/jq] + + assert equal $out $expected +} + +@test +def recurse-example-closure [] { + let out = 2 + | recurse { ({path: square item: ($in * $in)}) } + | take while { $in.item < 100 } + + let expected = [ + [path, item]; + [$., 2], + [$.square, 4], + [$.square.square, 16] + ] + + assert equal $out $expected +}