From 03d455a68885fc95188cbd74f7daf2a57a47b783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Lazenga?= Date: Tue, 22 Apr 2025 00:19:08 +0100 Subject: [PATCH 01/22] Fix #13546: Outer joins incorrectly removing unmatched rows (#15472) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #13546 # Description Previously, outer joins would remove rows without join columns, since the "did not match" logic only executed when the row had the join column. To solve this, missing join columns are now treated the same as "exists but did not match" cases. The logic now executes both when the join column doesn't exist and when it exists but doesn't match, ensuring rows without join columns are preserved. If the join column is not defined at all, the previous behavior remains unchanged. Example: ``` For the tables: let left_side = [{a: a1 ref: 1} {a: a2 ref: 2} {a: a3}] let right_side = [[b ref]; [b1 1] [b2 2] [b3 3]] Running "$left_side | join -l $right_side ref" now outputs: ╭───┬────┬─────┬────╮ │ # │ a │ ref │ b │ ├───┼────┼─────┼────┤ │ 0 │ a1 │ 1 │ b1 │ │ 1 │ a2 │ 2 │ b2 │ │ 2 │ a3 │ │ │ ╰───┴────┴─────┴────╯ ``` # User-Facing Changes The ```join``` command will behave more similarly to SQL-style joins. In this case, rows that lack the join column are preserved. # Tests + Formatting Added 2 test cases. fmt + clippy OK. # After Submitting I don't believe anything is necessary. --- crates/nu-command/src/filters/join.rs | 75 ++++++++++++++---------- crates/nu-command/tests/commands/join.rs | 46 +++++++++++++++ 2 files changed, 89 insertions(+), 32 deletions(-) diff --git a/crates/nu-command/src/filters/join.rs b/crates/nu-command/src/filters/join.rs index 7f3bbae238..1dea038347 100644 --- a/crates/nu-command/src/filters/join.rs +++ b/crates/nu-command/src/filters/join.rs @@ -255,6 +255,16 @@ fn join_rows( config: &Config, span: Span, ) { + if !this + .iter() + .any(|this_record| match this_record.as_record() { + Ok(record) => record.contains(this_join_key), + Err(_) => false, + }) + { + // `this` table does not contain the join column; do nothing + return; + } for this_row in this { if let Value::Record { val: this_record, .. @@ -281,39 +291,40 @@ fn join_rows( result.push(Value::record(record, span)) } } - } else if !matches!(join_type, JoinType::Inner) { - // `other` table did not contain any rows matching - // `this` row on the join column; emit a single joined - // row with null values for columns not present, - let other_record = other_keys - .iter() - .map(|&key| { - let val = if Some(key.as_ref()) == shared_join_key { - this_record - .get(key) - .cloned() - .unwrap_or_else(|| Value::nothing(span)) - } else { - Value::nothing(span) - }; - - (key.clone(), val) - }) - .collect(); - - let record = match join_type { - JoinType::Inner | JoinType::Right => { - merge_records(&other_record, this_record, shared_join_key) - } - JoinType::Left => { - merge_records(this_record, &other_record, shared_join_key) - } - _ => panic!("not implemented"), - }; - - result.push(Value::record(record, span)) + continue; } - } // else { a row is missing a value for the join column } + } + if !matches!(join_type, JoinType::Inner) { + // Either `this` row is missing a value for the join column or + // `other` table did not contain any rows matching + // `this` row on the join column; emit a single joined + // row with null values for columns not present + let other_record = other_keys + .iter() + .map(|&key| { + let val = if Some(key.as_ref()) == shared_join_key { + this_record + .get(key) + .cloned() + .unwrap_or_else(|| Value::nothing(span)) + } else { + Value::nothing(span) + }; + + (key.clone(), val) + }) + .collect(); + + let record = match join_type { + JoinType::Inner | JoinType::Right => { + merge_records(&other_record, this_record, shared_join_key) + } + JoinType::Left => merge_records(this_record, &other_record, shared_join_key), + _ => panic!("not implemented"), + }; + + result.push(Value::record(record, span)) + } }; } } diff --git a/crates/nu-command/tests/commands/join.rs b/crates/nu-command/tests/commands/join.rs index a3c2bad11e..4483debce7 100644 --- a/crates/nu-command/tests/commands/join.rs +++ b/crates/nu-command/tests/commands/join.rs @@ -195,6 +195,52 @@ fn do_cases_where_result_differs_between_join_types(join_type: &str) { ), ], ), + ( + // a row in the left table does not have the join column + ( + "[{a: 1 ref: 1} {a: 2 ref: 2} {a: 3}]", + "[{ref: 1 b: 1} {ref: 2 b: 2} {ref: 3 b: 3}]", + "ref", + ), + [ + ("--inner", "[[a, ref, b]; [1, 1, 1], [2, 2, 2]]"), + ( + "--left", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, null, null]]", + ), + ( + "--right", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [null, 3, 3]]", + ), + ( + "--outer", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, null, null], [null, 3, 3]]", + ), + ], + ), + ( + // a row in the right table does not have the join column + ( + "[{a: 1 ref: 1} {a: 2 ref: 2} {a: 3 ref: 3}]", + "[{ref: 1 b: 1} {ref: 2 b: 2} {b: 3}]", + "ref", + ), + [ + ("--inner", "[[a, ref, b]; [1, 1, 1], [2, 2, 2]]"), + ( + "--left", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, 3, null]]", + ), + ( + "--right", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [null, null, 3]]", + ), + ( + "--outer", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, 3, null], [null, null, 3]]", + ), + ], + ), ] { for (join_type_, expected) in join_types { if join_type_ == join_type { From a9657e17ad70932209499c74759ffea72bb1728d Mon Sep 17 00:00:00 2001 From: Douglas <32344964+NotTheDr01ds@users.noreply.github.com> Date: Mon, 21 Apr 2025 19:22:46 -0400 Subject: [PATCH 02/22] Add env-conversions helpers to std (#15569) When combined with [the Cookbook update](https://github.com/nushell/nushell.github.io/pull/1878), this resolves #15452 # Description When we removed the startup `ENV_CONVERSION` for path, as noted in the issue above, we removed the ability for users to access this closure for other purposes. This PR adds the PATH closures back as a `std` commands that outputs a record of closures (similar to `ENV_CONVERSIONS`). # User-Facing Changes Doc will be updated and users can once again easily access `direnv` # Tests + Formatting - :green_circle: `toolkit fmt` - :green_circle: `toolkit clippy` - :green_circle: `toolkit test` - :green_circle: `toolkit test stdlib` # After Submitting Doc PR to be merged when released in 0.104 --- crates/nu-std/std/config/mod.nu | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/crates/nu-std/std/config/mod.nu b/crates/nu-std/std/config/mod.nu index ce4549d3be..e96da375fc 100644 --- a/crates/nu-std/std/config/mod.nu +++ b/crates/nu-std/std/config/mod.nu @@ -133,3 +133,13 @@ export def light-theme [] { shape_raw_string: light_purple } } + +# Returns helper closures that can be used for ENV_CONVERSIONS and other purposes +export def env-conversions [] { + { + "path": { + from_string: {|s| $s | split row (char esep) | path expand --no-symlink } + to_string: {|v| $v | path expand --no-symlink | str join (char esep) } + } + } +} \ No newline at end of file From 6193679dfc0c0bc6e908c38e5c5dd288ef8f3f0c Mon Sep 17 00:00:00 2001 From: Tyarel <98483313+Tyarel8@users.noreply.github.com> Date: Tue, 22 Apr 2025 16:30:38 +0200 Subject: [PATCH 03/22] Fix `kv set` with a closure argument (#15588) Fixes #15528 # Description Fixed `kv set` passing the pipeline input to the closure instead of the value stored in that key. # User-Facing Changes Now `kv set` will pass the value in that key to the closure. # Tests + Formatting # After Submitting --- crates/nu-std/std-rfc/kv/mod.nu | 2 +- crates/nu-std/tests/test_std-rfc_kv.nu | 50 +++++++++++++++++++++----- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/crates/nu-std/std-rfc/kv/mod.nu b/crates/nu-std/std-rfc/kv/mod.nu index 95d90282e1..31c990fd6a 100644 --- a/crates/nu-std/std-rfc/kv/mod.nu +++ b/crates/nu-std/std-rfc/kv/mod.nu @@ -38,7 +38,7 @@ export def "kv set" [ # If passed a closure, execute it let arg_type = ($value_or_closure | describe) let value = match $arg_type { - closure => { $input | do $value_or_closure } + closure => { kv get $key --universal=$universal | do $value_or_closure } _ => ($value_or_closure | default $input) } diff --git a/crates/nu-std/tests/test_std-rfc_kv.nu b/crates/nu-std/tests/test_std-rfc_kv.nu index 84467fceee..f30a709dee 100644 --- a/crates/nu-std/tests/test_std-rfc_kv.nu +++ b/crates/nu-std/tests/test_std-rfc_kv.nu @@ -83,7 +83,7 @@ def local-transpose_to_record [] { } @test -def local-using_closure [] { +def local-using_cellpaths [] { if ('sqlite' not-in (version).features) { return } let key = (random uuid) @@ -91,8 +91,8 @@ def local-using_closure [] { let size_key = (random uuid) ls - | kv set $name_key { get name } - | kv set $size_key { get size } + | kv set $name_key $in.name + | kv set $size_key $in.size let expected = "list" let actual = (kv get $name_key | describe) @@ -106,6 +106,22 @@ def local-using_closure [] { kv drop $size_key } +@test +def local-using_closure [] { + if ('sqlite' not-in (version).features) { return } + + let key = (random uuid) + + kv set $key 5 + kv set $key { $in + 1 } + + let expected = 6 + let actual = (kv get $key) + assert equal $actual $expected + + kv drop $key +} + @test def local-return-entire-list [] { if ('sqlite' not-in (version).features) { return } @@ -137,7 +153,7 @@ def local-return_value_only [] { let key = (random uuid) let expected = 'VALUE' - let actual = ('value' | kv set -r v $key {str upcase}) + let actual = ('value' | kv set -r v $key ($in | str upcase)) assert equal $actual $expected @@ -233,7 +249,7 @@ def universal-transpose_to_record [] { } @test -def universal-using_closure [] { +def universal-using_cellpaths [] { if ('sqlite' not-in (version).features) { return } let key = (random uuid) @@ -243,8 +259,8 @@ def universal-using_closure [] { let size_key = (random uuid) ls - | kv set -u $name_key { get name } - | kv set -u $size_key { get size } + | kv set -u $name_key $in.name + | kv set -u $size_key $in.size let expected = "list" let actual = (kv get -u $name_key | describe) @@ -259,6 +275,24 @@ def universal-using_closure [] { rm $env.NU_UNIVERSAL_KV_PATH } +@test +def universal-using_closure [] { + if ('sqlite' not-in (version).features) { return } + + let key = (random uuid) + $env.NU_UNIVERSAL_KV_PATH = (mktemp -t --suffix .sqlite3) + + kv set -u $key 5 + kv set -u $key { $in + 1 } + + let expected = 6 + let actual = (kv get -u $key) + assert equal $actual $expected + + kv drop -u $key + rm $env.NU_UNIVERSAL_KV_PATH +} + @test def universal-return-entire-list [] { if ('sqlite' not-in (version).features) { return } @@ -295,7 +329,7 @@ def universal-return_value_only [] { let key = (random uuid) let expected = 'VALUE' - let actual = ('value' | kv set --universal -r v $key {str upcase}) + let actual = ('value' | kv set --universal -r v $key ($in | str upcase)) assert equal $actual $expected From 1db4be12d1e765ec51b58e8d338a0ff353c0d849 Mon Sep 17 00:00:00 2001 From: pyz4 <42039243+pyz4@users.noreply.github.com> Date: Tue, 22 Apr 2025 13:17:11 -0400 Subject: [PATCH 04/22] fix(polars): remove requirement that pivot columns must be same type in `polars pivot` (#15608) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Contrary to the underlying implementation in polars rust/python, `polars pivot` throws an error if the user tries to pivot on multiple columns of different types. This PR seeks to remove this type-check. See comparison below. ```nushell # Current implementation: throws error when pivoting on multiple values of different types. > [[name subject date test_1 test_2 grade_1 grade_2]; [Cady maths 2025-04-01 98 100 A A] [Cady physics 2025-04-01 99 100 A A] [Karen maths 2025-04-02 61 60 D D] [Karen physics 2025-04-02 58 60 D D]] | polars into-df | polars pivot --on [subject] --index [name] --values [test_1 grade_1] Error: × Merge error ╭─[entry #291:1:271] 1 │ [[name subject date test_1 test_2 grade_1 grade_2]; [Cady maths 2025-04-01 98 100 A A] [Cady physics 2025-04-01 99 100 A A] [Karen maths 2025-04-02 61 60 D D] [Karen physics 2025-04-02 58 60 D D]] | polars into-df | polars pivot --on [subject] --index [name] --values [test_1 grade_1] · ───────┬────── · ╰── found different column types in list ╰──── help: datatypes i64 and str are incompatible # Proposed implementation > [[name subject date test_1 test_2 grade_1 grade_2]; [Cady maths 2025-04-01 98 100 A A] [Cady physics 2025-04-01 99 100 A A] [Karen maths 2025-04-02 61 60 D D] [Karen physics 2025-04-02 58 60 D D]] | polars into-df | polars pivot --on [subject] --index [name] --values [test_1 grade_1] ╭───┬───────┬──────────────┬────────────────┬───────────────┬─────────────────╮ │ # │ name │ test_1_maths │ test_1_physics │ grade_1_maths │ grade_1_physics │ ├───┼───────┼──────────────┼────────────────┼───────────────┼─────────────────┤ │ 0 │ Cady │ 98 │ 99 │ A │ A │ │ 1 │ Karen │ 61 │ 58 │ D │ D │ ╰───┴───────┴──────────────┴────────────────┴───────────────┴─────────────────╯ ``` Additionally, this PR ports over the `separator` parameter in `pivot`, which allows the user to specify how to delimit multiple `values` column names: ```nushell > [[name subject date test_1 test_2 grade_1 grade_2]; [Cady maths 2025-04-01 98 100 A A] [Cady physics 2025-04-01 99 100 A A] [Karen maths 2025-04-02 61 60 D D] [Karen physics 2025-04-02 58 60 D D]] | polars into-df | polars pivot --on [subject] --index [name] --values [test_1 grade_1] --separator / ╭───┬───────┬──────────────┬────────────────┬───────────────┬─────────────────╮ │ # │ name │ test_1/maths │ test_1/physics │ grade_1/maths │ grade_1/physics │ ├───┼───────┼──────────────┼────────────────┼───────────────┼─────────────────┤ │ 0 │ Cady │ 98 │ 99 │ A │ A │ │ 1 │ Karen │ 61 │ 58 │ D │ D │ ╰───┴───────┴──────────────┴────────────────┴───────────────┴─────────────────╯ ``` # User-Facing Changes Soft breaking change: where a user may have previously expected an error (pivoting on multiple columns with different types), no error is thrown. # Tests + Formatting Examples were added to `polars pivot`. # After Submitting --- .../src/dataframe/command/data/pivot.rs | 78 ++++++++++++++++--- 1 file changed, 69 insertions(+), 9 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/command/data/pivot.rs b/crates/nu_plugin_polars/src/dataframe/command/data/pivot.rs index 946868e6ad..9253854ab1 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/data/pivot.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/data/pivot.rs @@ -4,6 +4,7 @@ use nu_protocol::{ Value, }; +use chrono::DateTime; use polars_ops::pivot::{pivot, PivotAgg}; use crate::{ @@ -54,6 +55,12 @@ impl PluginCommand for PivotDF { "Aggregation to apply when pivoting. The following are supported: first, sum, min, max, mean, median, count, last", Some('a'), ) + .named( + "separator", + SyntaxShape::String, + "Delimiter in generated column names in case of multiple `values` columns (default '_')", + Some('p'), + ) .switch( "sort", "Sort columns", @@ -74,8 +81,8 @@ impl PluginCommand for PivotDF { fn examples(&self) -> Vec { vec![ Example { - example: "[[name subject test_1 test_2]; [Cady maths 98 100] [Cady physics 99 100] [Karen maths 61 60] [Karen physics 58 60]] | polars into-df | polars pivot --on [subject] --index [name] --values [test_1]", description: "Perform a pivot in order to show individuals test score by subject", + example: "[[name subject date test_1 test_2]; [Cady maths 2025-04-01 98 100] [Cady physics 2025-04-01 99 100] [Karen maths 2025-04-02 61 60] [Karen physics 2025-04-02 58 60]] | polars into-df | polars pivot --on [subject] --index [name date] --values [test_1]", result: Some( NuDataFrame::try_from_columns( vec![ @@ -83,6 +90,27 @@ impl PluginCommand for PivotDF { "name".to_string(), vec![Value::string("Cady", Span::test_data()), Value::string("Karen", Span::test_data())], ), + Column::new( + "date".to_string(), + vec![ + Value::date( + DateTime::parse_from_str( + "2025-04-01 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2025-04-02 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + ), Column::new( "maths".to_string(), vec![Value::int(98, Span::test_data()), Value::int(61, Span::test_data())], @@ -97,6 +125,39 @@ impl PluginCommand for PivotDF { .expect("simple df for test should not fail") .into_value(Span::unknown()) ) + }, + Example { + description: "Perform a pivot with multiple `values` columns with a separator", + example: "[[name subject date test_1 test_2 grade_1 grade_2]; [Cady maths 2025-04-01 98 100 A A] [Cady physics 2025-04-01 99 100 A A] [Karen maths 2025-04-02 61 60 D D] [Karen physics 2025-04-02 58 60 D D]] | polars into-df | polars pivot --on [subject] --index [name] --values [test_1 grade_1] --separator /", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "name".to_string(), + vec![Value::string("Cady", Span::test_data()), Value::string("Karen", Span::test_data())], + ), + Column::new( + "test_1/maths".to_string(), + vec![Value::int(98, Span::test_data()), Value::int(61, Span::test_data())], + ), + Column::new( + "test_1/physics".to_string(), + vec![Value::int(99, Span::test_data()), Value::int(58, Span::test_data())], + ), + Column::new( + "grade_1/maths".to_string(), + vec![Value::string("A", Span::test_data()), Value::string("D", Span::test_data())], + ), + Column::new( + "grade_1/physics".to_string(), + vec![Value::string("A", Span::test_data()), Value::string("D", Span::test_data())], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::unknown()) + ) } ] } @@ -135,19 +196,17 @@ fn command_eager( let index_col: Vec = call.get_flag("index")?.expect("required value"); let val_col: Vec = call.get_flag("values")?.expect("required value"); - let (on_col_string, id_col_span) = convert_columns_string(on_col, call.head)?; - let (index_col_string, index_col_span) = convert_columns_string(index_col, call.head)?; - let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?; - - check_column_datatypes(df.as_ref(), &on_col_string, id_col_span)?; - check_column_datatypes(df.as_ref(), &index_col_string, index_col_span)?; - check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?; + let (on_col_string, ..) = convert_columns_string(on_col, call.head)?; + let (index_col_string, ..) = convert_columns_string(index_col, call.head)?; + let (val_col_string, ..) = convert_columns_string(val_col, call.head)?; let aggregate: Option = call .get_flag::("aggregate")? .map(pivot_agg_for_str) .transpose()?; + let separator: Option = call.get_flag::("separator")?; + let sort = call.has_flag("sort")?; let polars_df = df.to_polars(); @@ -159,7 +218,7 @@ fn command_eager( Some(&val_col_string), sort, aggregate, - None, + separator.as_deref(), ) .map_err(|e| ShellError::GenericError { error: format!("Pivot error: {e}"), @@ -173,6 +232,7 @@ fn command_eager( res.to_pipeline_data(plugin, engine, call.head) } +#[allow(dead_code)] fn check_column_datatypes>( df: &polars::prelude::DataFrame, cols: &[T], From e1ffaf254847f7ff26c9ca06ac43bbe341eb6f0a Mon Sep 17 00:00:00 2001 From: suimong Date: Wed, 23 Apr 2025 02:00:20 +0800 Subject: [PATCH 05/22] Improve `std/log` performance (#15614) closes #15610 . # Description This PR attempts to improve the performance of `std/log *` by making the following changes: 1. use explicit piping instead of `reduce` for constructing the log message 2. constify `log-level`, `log-ansi`, `log-types` etc. 3. use `.` instead of `get` to access `$env` fields # User-Facing Changes Nothing. # Tests + Formatting # After Submitting --------- Co-authored-by: Ben Yang Co-authored-by: suimong --- crates/nu-std/std/log/mod.nu | 227 +++++++++++++++++------------------ 1 file changed, 113 insertions(+), 114 deletions(-) diff --git a/crates/nu-std/std/log/mod.nu b/crates/nu-std/std/log/mod.nu index 6686301dd8..19354a615e 100644 --- a/crates/nu-std/std/log/mod.nu +++ b/crates/nu-std/std/log/mod.nu @@ -1,80 +1,79 @@ -export def log-ansi [] { - { - "CRITICAL": (ansi red_bold), - "ERROR": (ansi red), - "WARNING": (ansi yellow), - "INFO": (ansi default), - "DEBUG": (ansi default_dimmed) - } +const LOG_ANSI = { + "CRITICAL": (ansi red_bold), + "ERROR": (ansi red), + "WARNING": (ansi yellow), + "INFO": (ansi default), + "DEBUG": (ansi default_dimmed) } -export def log-level [] { - { - "CRITICAL": 50, - "ERROR": 40, - "WARNING": 30, - "INFO": 20, - "DEBUG": 10 - } +export def log-ansi [] {$LOG_ANSI} + +const LOG_LEVEL = { + "CRITICAL": 50, + "ERROR": 40, + "WARNING": 30, + "INFO": 20, + "DEBUG": 10 } -export def log-prefix [] { - { - "CRITICAL": "CRT", - "ERROR": "ERR", - "WARNING": "WRN", - "INFO": "INF", - "DEBUG": "DBG" - } + +export def log-level [] {$LOG_LEVEL} + +const LOG_PREFIX = { + "CRITICAL": "CRT", + "ERROR": "ERR", + "WARNING": "WRN", + "INFO": "INF", + "DEBUG": "DBG" } -export def log-short-prefix [] { - { - "CRITICAL": "C", - "ERROR": "E", - "WARNING": "W", - "INFO": "I", - "DEBUG": "D" - } + +export def log-prefix [] {$LOG_PREFIX} + +const LOG_SHORT_PREFIX = { + "CRITICAL": "C", + "ERROR": "E", + "WARNING": "W", + "INFO": "I", + "DEBUG": "D" } + +export def log-short-prefix [] {$LOG_SHORT_PREFIX} + export-env { $env.NU_LOG_FORMAT = $env.NU_LOG_FORMAT? | default "%ANSI_START%%DATE%|%LEVEL%|%MSG%%ANSI_STOP%" $env.NU_LOG_DATE_FORMAT = $env.NU_LOG_DATE_FORMAT? | default "%Y-%m-%dT%H:%M:%S%.3f" } -def log-types [] { - ( - { - "CRITICAL": { - "ansi": (log-ansi).CRITICAL, - "level": (log-level).CRITICAL, - "prefix": (log-prefix).CRITICAL, - "short_prefix": (log-short-prefix).CRITICAL - }, - "ERROR": { - "ansi": (log-ansi).ERROR, - "level": (log-level).ERROR, - "prefix": (log-prefix).ERROR, - "short_prefix": (log-short-prefix).ERROR - }, - "WARNING": { - "ansi": (log-ansi).WARNING, - "level": (log-level).WARNING, - "prefix": (log-prefix).WARNING, - "short_prefix": (log-short-prefix).WARNING - }, - "INFO": { - "ansi": (log-ansi).INFO, - "level": (log-level).INFO, - "prefix": (log-prefix).INFO, - "short_prefix": (log-short-prefix).INFO - }, - "DEBUG": { - "ansi": (log-ansi).DEBUG, - "level": (log-level).DEBUG, - "prefix": (log-prefix).DEBUG, - "short_prefix": (log-short-prefix).DEBUG - } - } - ) +const LOG_TYPES = { + "CRITICAL": { + "ansi": $LOG_ANSI.CRITICAL, + "level": $LOG_LEVEL.CRITICAL, + "prefix": $LOG_PREFIX.CRITICAL, + "short_prefix": $LOG_SHORT_PREFIX.CRITICAL + }, + "ERROR": { + "ansi": $LOG_ANSI.ERROR, + "level": $LOG_LEVEL.ERROR, + "prefix": $LOG_PREFIX.ERROR, + "short_prefix": $LOG_SHORT_PREFIX.ERROR + }, + "WARNING": { + "ansi": $LOG_ANSI.WARNING, + "level": $LOG_LEVEL.WARNING, + "prefix": $LOG_PREFIX.WARNING, + "short_prefix": $LOG_SHORT_PREFIX.WARNING + }, + "INFO": { + "ansi": $LOG_ANSI.INFO, + "level": $LOG_LEVEL.INFO, + "prefix": $LOG_PREFIX.INFO, + "short_prefix": $LOG_SHORT_PREFIX.INFO + }, + "DEBUG": { + "ansi": $LOG_ANSI.DEBUG, + "level": $LOG_LEVEL.DEBUG, + "prefix": $LOG_PREFIX.DEBUG, + "short_prefix": $LOG_SHORT_PREFIX.DEBUG + } } def parse-string-level [ @@ -82,16 +81,16 @@ def parse-string-level [ ] { let level = ($level | str upcase) - if $level in [(log-prefix).CRITICAL (log-short-prefix).CRITICAL "CRIT" "CRITICAL"] { - (log-level).CRITICAL - } else if $level in [(log-prefix).ERROR (log-short-prefix).ERROR "ERROR"] { - (log-level).ERROR - } else if $level in [(log-prefix).WARNING (log-short-prefix).WARNING "WARN" "WARNING"] { - (log-level).WARNING - } else if $level in [(log-prefix).DEBUG (log-short-prefix).DEBUG "DEBUG"] { - (log-level).DEBUG + if $level in [$LOG_PREFIX.CRITICAL $LOG_SHORT_PREFIX.CRITICAL "CRIT" "CRITICAL"] { + $LOG_LEVEL.CRITICAL + } else if $level in [$LOG_PREFIX.ERROR $LOG_SHORT_PREFIX.ERROR "ERROR"] { + $LOG_LEVEL.ERROR + } else if $level in [$LOG_PREFIX.WARNING $LOG_SHORT_PREFIX.WARNING "WARN" "WARNING"] { + $LOG_LEVEL.WARNING + } else if $level in [$LOG_PREFIX.DEBUG $LOG_SHORT_PREFIX.DEBUG "DEBUG"] { + $LOG_LEVEL.DEBUG } else { - (log-level).INFO + $LOG_LEVEL.INFO } } @@ -99,41 +98,41 @@ def parse-int-level [ level: int, --short (-s) ] { - if $level >= (log-level).CRITICAL { + if $level >= $LOG_LEVEL.CRITICAL { if $short { - (log-short-prefix).CRITICAL + $LOG_SHORT_PREFIX.CRITICAL } else { - (log-prefix).CRITICAL + $LOG_PREFIX.CRITICAL } - } else if $level >= (log-level).ERROR { + } else if $level >= $LOG_LEVEL.ERROR { if $short { - (log-short-prefix).ERROR + $LOG_SHORT_PREFIX.ERROR } else { - (log-prefix).ERROR + $LOG_PREFIX.ERROR } - } else if $level >= (log-level).WARNING { + } else if $level >= $LOG_LEVEL.WARNING { if $short { - (log-short-prefix).WARNING + $LOG_SHORT_PREFIX.WARNING } else { - (log-prefix).WARNING + $LOG_PREFIX.WARNING } - } else if $level >= (log-level).INFO { + } else if $level >= $LOG_LEVEL.INFO { if $short { - (log-short-prefix).INFO + $LOG_SHORT_PREFIX.INFO } else { - (log-prefix).INFO + $LOG_PREFIX.INFO } } else { if $short { - (log-short-prefix).DEBUG + $LOG_SHORT_PREFIX.DEBUG } else { - (log-prefix).DEBUG + $LOG_PREFIX.DEBUG } } } def current-log-level [] { - let env_level = ($env.NU_LOG_LEVEL? | default (log-level).INFO) + let env_level = ($env.NU_LOG_LEVEL? | default $LOG_LEVEL.INFO) try { $env_level | into int @@ -188,7 +187,7 @@ export def critical [ --format (-f): string # A format (for further reference: help std log) ] { let format = $format | default "" - handle-log $message (log-types | get CRITICAL) $format $short + handle-log $message ($LOG_TYPES.CRITICAL) $format $short } # Log an error message @@ -198,7 +197,7 @@ export def error [ --format (-f): string # A format (for further reference: help std log) ] { let format = $format | default "" - handle-log $message (log-types | get ERROR) $format $short + handle-log $message ($LOG_TYPES.ERROR) $format $short } # Log a warning message @@ -208,7 +207,7 @@ export def warning [ --format (-f): string # A format (for further reference: help std log) ] { let format = $format | default "" - handle-log $message (log-types | get WARNING) $format $short + handle-log $message ($LOG_TYPES.WARNING) $format $short } # Log an info message @@ -218,7 +217,7 @@ export def info [ --format (-f): string # A format (for further reference: help std log) ] { let format = $format | default "" - handle-log $message (log-types | get INFO) $format $short + handle-log $message ($LOG_TYPES.INFO) $format $short } # Log a debug message @@ -228,7 +227,7 @@ export def debug [ --format (-f): string # A format (for further reference: help std log) ] { let format = $format | default "" - handle-log $message (log-types | get DEBUG) $format $short + handle-log $message ($LOG_TYPES.DEBUG) $format $short } def log-level-deduction-error [ @@ -242,7 +241,7 @@ def log-level-deduction-error [ text: ([ "Invalid log level." $" Available log levels in log-level:" - (log-level | to text | lines | each {|it| $" ($it)" } | to text) + ($LOG_LEVEL | to text | lines | each {|it| $" ($it)" } | to text) ] | str join "\n") span: $span } @@ -262,11 +261,11 @@ export def custom [ } let valid_levels_for_defaulting = [ - (log-level).CRITICAL - (log-level).ERROR - (log-level).WARNING - (log-level).INFO - (log-level).DEBUG + $LOG_LEVEL.CRITICAL + $LOG_LEVEL.ERROR + $LOG_LEVEL.WARNING + $LOG_LEVEL.INFO + $LOG_LEVEL.DEBUG ] let prefix = if ($level_prefix | is-empty) { @@ -280,7 +279,7 @@ export def custom [ $level_prefix } - let use_color = ($env | get config? | get use_ansi_coloring? | $in != false) + let use_color = ($env.config?.use_ansi_coloring? | $in != false) let ansi = if not $use_color { "" } else if ($ansi | is-empty) { @@ -289,7 +288,7 @@ export def custom [ } ( - log-types + $LOG_TYPES | values | each {|record| if ($record.level == $log_level) { @@ -301,19 +300,19 @@ export def custom [ $ansi } - print --stderr ([ - ["%MSG%" $message] - ["%DATE%" (now)] - ["%LEVEL%" $prefix] - ["%ANSI_START%" $ansi] - ["%ANSI_STOP%" (ansi reset)] - ] | reduce --fold $format { - |it, acc| $acc | str replace --all $it.0 $it.1 - }) + print --stderr ( + $format + | str replace --all "%MSG%" $message + | str replace --all "%DATE%" (now) + | str replace --all "%LEVEL%" $prefix + | str replace --all "%ANSI_START%" $ansi + | str replace --all "%ANSI_STOP%" (ansi reset) + + ) } def "nu-complete log-level" [] { - log-level | transpose description value + $LOG_LEVEL | transpose description value } # Change logging level From 717081bd2ff80892110099168f55f0c1ac5f7cf3 Mon Sep 17 00:00:00 2001 From: Matthias Meschede Date: Wed, 23 Apr 2025 12:22:40 +0200 Subject: [PATCH 06/22] fix mistake in description of polars pivot command (#15621) Very small change to fix a typo/mistake in the polars pivot command description. --- crates/nu_plugin_polars/src/dataframe/command/data/pivot.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/nu_plugin_polars/src/dataframe/command/data/pivot.rs b/crates/nu_plugin_polars/src/dataframe/command/data/pivot.rs index 9253854ab1..b6f29adf7e 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/data/pivot.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/data/pivot.rs @@ -26,7 +26,7 @@ impl PluginCommand for PivotDF { } fn description(&self) -> &str { - "Pivot a DataFrame from wide to long format." + "Pivot a DataFrame from long to wide format." } fn signature(&self) -> Signature { From cb57f0a539895e80c2779a536c4326dad3d6c0cc Mon Sep 17 00:00:00 2001 From: Sebastian Nallar <95891104+sebasnallar@users.noreply.github.com> Date: Wed, 23 Apr 2025 12:47:48 -0300 Subject: [PATCH 07/22] Add --follow-symlinks flag to glob command (fixes #15559) (#15626) Fixes #15559 # Description The glob command wasn't working correctly with symlinks in the /sys filesystem. This commit adds a new flag that allows users to explicitly control whether symlinks should be followed, with special handling for the /sys directory. The issue was that the glob command didn't follow symbolic links when traversing the /sys filesystem, resulting in an empty list even though paths should be found. This implementation adds a new `--follow-symlinks` flag that explicitly enables following symlinks. By default, it now follows symlinks in most paths but has special handling for /sys paths where the flag is required. Example: ` # Before: This would return an empty list on Linux systems glob /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor # Now: This works as expected with the new flag glob /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor --follow-symlinks ` # User-Facing Changes 1. Added the --follow-symlinks (-l) flag to the glob command that allows users to explicitly control whether symbolic links should be followed 2. Added a new example to the glob command help text demonstrating the use of this flag # Tests + Formatting 1. Added a test for the new --follow-symlinks flag --- crates/nu-command/src/filesystem/glob.rs | 20 +++++++++++++-- crates/nu-command/tests/commands/glob.rs | 32 ++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/crates/nu-command/src/filesystem/glob.rs b/crates/nu-command/src/filesystem/glob.rs index 4a293e82fe..740d5ec9e9 100644 --- a/crates/nu-command/src/filesystem/glob.rs +++ b/crates/nu-command/src/filesystem/glob.rs @@ -35,6 +35,11 @@ impl Command for Glob { "Whether to filter out symlinks from the returned paths", Some('S'), ) + .switch( + "follow-symlinks", + "Whether to follow symbolic links to their targets", + Some('l'), + ) .named( "exclude", SyntaxShape::List(Box::new(SyntaxShape::String)), @@ -111,6 +116,11 @@ impl Command for Glob { example: r#"glob **/* --exclude [**/target/** **/.git/** */]"#, result: None, }, + Example { + description: "Search for files following symbolic links to their targets", + example: r#"glob "**/*.txt" --follow-symlinks"#, + result: None, + }, ] } @@ -132,6 +142,7 @@ impl Command for Glob { let no_dirs = call.has_flag(engine_state, stack, "no-dir")?; let no_files = call.has_flag(engine_state, stack, "no-file")?; let no_symlinks = call.has_flag(engine_state, stack, "no-symlink")?; + let follow_symlinks = call.has_flag(engine_state, stack, "follow-symlinks")?; let paths_to_exclude: Option = call.get_flag(engine_state, stack, "exclude")?; let (not_patterns, not_pattern_span): (Vec, Span) = match paths_to_exclude { @@ -213,6 +224,11 @@ impl Command for Glob { } }; + let link_behavior = match follow_symlinks { + true => wax::LinkBehavior::ReadTarget, + false => wax::LinkBehavior::ReadFile, + }; + let result = if !not_patterns.is_empty() { let np: Vec<&str> = not_patterns.iter().map(|s| s as &str).collect(); let glob_results = glob @@ -220,7 +236,7 @@ impl Command for Glob { path, WalkBehavior { depth: folder_depth, - ..Default::default() + link: link_behavior, }, ) .into_owned() @@ -247,7 +263,7 @@ impl Command for Glob { path, WalkBehavior { depth: folder_depth, - ..Default::default() + link: link_behavior, }, ) .into_owned() diff --git a/crates/nu-command/tests/commands/glob.rs b/crates/nu-command/tests/commands/glob.rs index 235896f63d..bded39d09f 100644 --- a/crates/nu-command/tests/commands/glob.rs +++ b/crates/nu-command/tests/commands/glob.rs @@ -173,3 +173,35 @@ fn glob_files_in_parent( assert_eq!(actual.out, expected, "\n test: {}", tag); }); } + +#[test] +fn glob_follow_symlinks() { + Playground::setup("glob_follow_symlinks", |dirs, sandbox| { + // Create a directory with some files + sandbox.mkdir("target_dir"); + sandbox + .within("target_dir") + .with_files(&[EmptyFile("target_file.txt")]); + + let target_dir = dirs.test().join("target_dir"); + let symlink_path = dirs.test().join("symlink_dir"); + #[cfg(unix)] + std::os::unix::fs::symlink(target_dir, &symlink_path).expect("Failed to create symlink"); + #[cfg(windows)] + std::os::windows::fs::symlink_dir(target_dir, &symlink_path) + .expect("Failed to create symlink"); + + // on some systems/filesystems, symlinks are followed by default + // on others (like Linux /sys), they aren't + // Test that with the --follow-symlinks flag, files are found for sure + let with_flag = nu!( + cwd: dirs.test(), + "glob 'symlink_dir/*.txt' --follow-symlinks | length", + ); + + assert_eq!( + with_flag.out, "1", + "Should find file with --follow-symlinks flag" + ); + }) +} From 78903724f5842b6bbddb5fd52866da714e5cebbb Mon Sep 17 00:00:00 2001 From: Auca Coyan Date: Wed, 23 Apr 2025 17:55:41 +0000 Subject: [PATCH 08/22] Add labeler bot (#15627) - fixes #15607 # Description Hi! I added a labeler bot workflow and reference to the tags. This workflow runs whenever is a change in a PR (`pull_request_target`) [source](https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#pull_request_target) # User-Facing Changes Nothing here, just the CI # Tests + Formatting Not needed --------- Co-authored-by: Stefan Holderbach --- .github/labeler.yml | 40 ++++++++++++++++++++++++++++++++++++ .github/workflows/labels.yml | 19 +++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 .github/labeler.yml create mode 100644 .github/workflows/labels.yml diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 0000000000..d6c4cd0391 --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,40 @@ +# A bot for automatically labelling pull requests +# See https://github.com/actions/labeler + +dataframe: + - changed-files: + - any-glob-to-any-file: + - crates/nu_plugin_polars/** + +std-library: + - changed-files: + - any-glob-to-any-file: + - crates/nu-std/** + +ci: + - changed-files: + - any-glob-to-any-file: + - .github/workflows/** + + +LSP: + - changed-files: + - any-glob-to-any-file: + - crates/nu-lsp/** + +parser: + - changed-files: + - any-glob-to-any-file: + - crates/nu-parser/** + +pr:plugins: + - changed-files: + - any-glob-to-any-file: + # plugins API + - crates/nu-plugin/** + - crates/nu-plugin-core/** + - crates/nu-plugin-engine/** + - crates/nu-plugin-protocol/** + - crates/nu-plugin-test-support/** + # specific plugins (like polars) + - crates/nu_plugin_* diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml new file mode 100644 index 0000000000..96b84c6a47 --- /dev/null +++ b/.github/workflows/labels.yml @@ -0,0 +1,19 @@ +# Automatically labels PRs based on the configuration file +# you are probably looking for 👉 `.github/labeler.yml` +name: Label PRs + +on: + - pull_request_target + +jobs: + triage: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + if: github.repository_owner == 'nushell' + steps: + - uses: actions/labeler@v5 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" + sync-labels: true \ No newline at end of file From 7add38fe323996540ccd339ad2e4eb9dd33f8ec8 Mon Sep 17 00:00:00 2001 From: Wind Date: Thu, 24 Apr 2025 19:47:04 +0800 Subject: [PATCH 09/22] IR: allow subexpression with redirection. (#15617) # Description Try to fixes https://github.com/nushell/nushell/issues/15326 in another way. The main point of this change is to avoid duplicate `write` and `close` a redirected file. So during compile, if compiler know current element is a sub-expression(defined by private `is_subexpression` function), it will no longer invoke `finish_redirection`. In this way, we can avoid duplicate `finish_redirection`. # User-Facing Changes `(^echo aa) o> /tmp/aaa` will no longer raise an error. Here is the IR after the pr: ``` # 3 registers, 12 instructions, 11 bytes of data # 1 file used for redirection 0: load-literal %1, string("aaa") 1: open-file file(0), %1, append = false 2: load-literal %1, glob-pattern("echo", no_expand = false) 3: load-literal %2, glob-pattern("true", no_expand = false) 4: push-positional %1 5: push-positional %2 6: redirect-out file(0) 7: redirect-err caller 8: call decl 135 "run-external", %0 9: write-file file(0), %0 10: close-file file(0) 11: return %0 ``` # Tests + Formatting Added 3 tests. # After Submitting Maybe need to update doc https://github.com/nushell/nushell.github.io/pull/1876 --------- Co-authored-by: Stefan Holderbach --- .../nu-command/tests/commands/redirection.rs | 15 ++++++++++++++ crates/nu-engine/src/compile/mod.rs | 20 ++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/crates/nu-command/tests/commands/redirection.rs b/crates/nu-command/tests/commands/redirection.rs index 02f85a90f8..c263f27d83 100644 --- a/crates/nu-command/tests/commands/redirection.rs +++ b/crates/nu-command/tests/commands/redirection.rs @@ -473,3 +473,18 @@ fn pipe_redirection_in_let_and_mut( ); assert_eq!(actual.out, output); } + +#[rstest::rstest] +#[case("o>", "bar")] +#[case("e>", "")] +#[case("o+e>", "bar\nbaz")] // in subexpression, the stderr is go to the terminal +fn subexpression_redirection(#[case] redir: &str, #[case] stdout_file_body: &str) { + Playground::setup("file redirection with subexpression", |dirs, _| { + let actual = nu!( + cwd: dirs.test(), + format!("$env.BAR = 'bar'; $env.BAZ = 'baz'; (nu --testbin echo_env_mixed out-err BAR BAZ) {redir} result.txt") + ); + assert!(actual.status.success()); + assert!(file_contents(dirs.test().join("result.txt")).contains(stdout_file_body)); + }) +} diff --git a/crates/nu-engine/src/compile/mod.rs b/crates/nu-engine/src/compile/mod.rs index 1bdc2c6b41..f68232f9cb 100644 --- a/crates/nu-engine/src/compile/mod.rs +++ b/crates/nu-engine/src/compile/mod.rs @@ -1,5 +1,5 @@ use nu_protocol::{ - ast::{Block, Pipeline, PipelineRedirection, RedirectionSource, RedirectionTarget}, + ast::{Block, Expr, Pipeline, PipelineRedirection, RedirectionSource, RedirectionTarget}, engine::StateWorkingSet, ir::{Instruction, IrBlock, RedirectMode}, CompileError, IntoSpanned, RegId, Span, @@ -194,11 +194,25 @@ fn compile_pipeline( out_reg, )?; - // Clean up the redirection - finish_redirection(builder, redirect_modes, out_reg)?; + // only clean up the redirection if current element is not + // a subexpression. The subexpression itself already clean it. + if !is_subexpression(&element.expr.expr) { + // Clean up the redirection + finish_redirection(builder, redirect_modes, out_reg)?; + } // The next pipeline element takes input from this output in_reg = Some(out_reg); } Ok(()) } + +fn is_subexpression(expr: &Expr) -> bool { + match expr { + Expr::FullCellPath(inner) => { + matches!(&inner.head.expr, &Expr::Subexpression(..)) + } + Expr::Subexpression(..) => true, + _ => false, + } +} From 6be291b00a71427dc79e8a54928f69b6a08a7502 Mon Sep 17 00:00:00 2001 From: Stefan Holderbach Date: Thu, 24 Apr 2025 14:00:16 +0200 Subject: [PATCH 10/22] Fix labelling of plugins through correct glob (#15634) https://github.com/nushell/nushell/pull/15627#issuecomment-2827259125 --- .github/labeler.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index d6c4cd0391..cb500a34c8 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -37,4 +37,4 @@ pr:plugins: - crates/nu-plugin-protocol/** - crates/nu-plugin-test-support/** # specific plugins (like polars) - - crates/nu_plugin_* + - crates/nu_plugin_*/** From 82eb1c5584dcda90a9e495d45a1e01f07de20f08 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Thu, 24 Apr 2025 08:25:36 -0500 Subject: [PATCH 11/22] add more details to `decribe -d` (#15591) # Description I was playing around with the `debug` command and wanted to add this information to it but since most of it already existed in `describe` I wanted to try and add it here. It adds a few more details that are hopefully helpful. It mainly tries to add the value type, rust datatype, and value. I'm not sure all of this is wanted or needed but I thought it was an interesting introspection idea. ### Before ![image](https://github.com/user-attachments/assets/f1cfc5dd-6c02-4aa1-acb2-8e9931f66dd8) ### After ![image](https://github.com/user-attachments/assets/cfb3c8bd-70dd-4aa1-b03a-375acf6c0e09) # User-Facing Changes # Tests + Formatting # After Submitting --- .../nu-cmd-lang/src/core_commands/describe.rs | 254 ++++++++++++++---- 1 file changed, 207 insertions(+), 47 deletions(-) diff --git a/crates/nu-cmd-lang/src/core_commands/describe.rs b/crates/nu-cmd-lang/src/core_commands/describe.rs index b3f7b50611..6fa11d8061 100644 --- a/crates/nu-cmd-lang/src/core_commands/describe.rs +++ b/crates/nu-cmd-lang/src/core_commands/describe.rs @@ -1,6 +1,9 @@ use nu_engine::command_prelude::*; -use nu_protocol::{engine::StateWorkingSet, ByteStreamSource, PipelineMetadata}; - +use nu_protocol::{ + engine::{Closure, StateWorkingSet}, + BlockId, ByteStreamSource, Category, PipelineMetadata, Signature, +}; +use std::any::type_name; #[derive(Clone)] pub struct Describe; @@ -73,39 +76,116 @@ impl Command for Describe { "{shell:'true', uwu:true, features: {bugs:false, multiplatform:true, speed: 10}, fib: [1 1 2 3 5 8], on_save: {|x| $'Saving ($x)'}, first_commit: 2019-05-10, my_duration: (4min + 20sec)} | describe -d", result: Some(Value::test_record(record!( "type" => Value::test_string("record"), + "detailed_type" => Value::test_string("record, fib: list, on_save: closure, first_commit: datetime, my_duration: duration>"), "columns" => Value::test_record(record!( - "shell" => Value::test_string("string"), - "uwu" => Value::test_string("bool"), + "shell" => Value::test_record(record!( + "type" => Value::test_string("string"), + "detailed_type" => Value::test_string("string"), + "rust_type" => Value::test_string("&alloc::string::String"), + "value" => Value::test_string("true"), + )), + "uwu" => Value::test_record(record!( + "type" => Value::test_string("bool"), + "detailed_type" => Value::test_string("bool"), + "rust_type" => Value::test_string("bool"), + "value" => Value::test_bool(true), + )), "features" => Value::test_record(record!( "type" => Value::test_string("record"), + "detailed_type" => Value::test_string("record"), "columns" => Value::test_record(record!( - "bugs" => Value::test_string("bool"), - "multiplatform" => Value::test_string("bool"), - "speed" => Value::test_string("int"), + "bugs" => Value::test_record(record!( + "type" => Value::test_string("bool"), + "detailed_type" => Value::test_string("bool"), + "rust_type" => Value::test_string("bool"), + "value" => Value::test_bool(false), + )), + "multiplatform" => Value::test_record(record!( + "type" => Value::test_string("bool"), + "detailed_type" => Value::test_string("bool"), + "rust_type" => Value::test_string("bool"), + "value" => Value::test_bool(true), + )), + "speed" => Value::test_record(record!( + "type" => Value::test_string("int"), + "detailed_type" => Value::test_string("int"), + "rust_type" => Value::test_string("i64"), + "value" => Value::test_int(10), + )), )), + "rust_type" => Value::test_string("&nu_utils::shared_cow::SharedCow"), )), "fib" => Value::test_record(record!( "type" => Value::test_string("list"), + "detailed_type" => Value::test_string("list"), "length" => Value::test_int(6), - "values" => Value::test_list(vec![ - Value::test_string("int"), - Value::test_string("int"), - Value::test_string("int"), - Value::test_string("int"), - Value::test_string("int"), - Value::test_string("int"), - ]), + "rust_type" => Value::test_string("&mut alloc::vec::Vec"), + "value" => Value::test_list(vec![ + Value::test_record(record!( + "type" => Value::test_string("int"), + "detailed_type" => Value::test_string("int"), + "rust_type" => Value::test_string("i64"), + "value" => Value::test_int(1), + )), + Value::test_record(record!( + "type" => Value::test_string("int"), + "detailed_type" => Value::test_string("int"), + "rust_type" => Value::test_string("i64"), + "value" => Value::test_int(1), + )), + Value::test_record(record!( + "type" => Value::test_string("int"), + "detailed_type" => Value::test_string("int"), + "rust_type" => Value::test_string("i64"), + "value" => Value::test_int(2), + )), + Value::test_record(record!( + "type" => Value::test_string("int"), + "detailed_type" => Value::test_string("int"), + "rust_type" => Value::test_string("i64"), + "value" => Value::test_int(3), + )), + Value::test_record(record!( + "type" => Value::test_string("int"), + "detailed_type" => Value::test_string("int"), + "rust_type" => Value::test_string("i64"), + "value" => Value::test_int(5), + )), + Value::test_record(record!( + "type" => Value::test_string("int"), + "detailed_type" => Value::test_string("int"), + "rust_type" => Value::test_string("i64"), + "value" => Value::test_int(8), + ))] + ), )), "on_save" => Value::test_record(record!( "type" => Value::test_string("closure"), + "detailed_type" => Value::test_string("closure"), + "rust_type" => Value::test_string("&alloc::boxed::Box"), + "value" => Value::test_closure(Closure { + block_id: BlockId::new(1), + captures: vec![], + }), "signature" => Value::test_record(record!( "name" => Value::test_string(""), "category" => Value::test_string("default"), )), )), - "first_commit" => Value::test_string("datetime"), - "my_duration" => Value::test_string("duration"), + "first_commit" => Value::test_record(record!( + "type" => Value::test_string("datetime"), + "detailed_type" => Value::test_string("datetime"), + "rust_type" => Value::test_string("chrono::datetime::DateTime"), + "value" => Value::test_date("2019-05-10 00:00:00Z".parse().unwrap_or_default()), + )), + "my_duration" => Value::test_record(record!( + "type" => Value::test_string("duration"), + "detailed_type" => Value::test_string("duration"), + "rust_type" => Value::test_string("i64"), + "value" => Value::test_duration(260_000_000_000), + )) )), + "rust_type" => Value::test_string("&nu_utils::shared_cow::SharedCow"), ))), }, Example { @@ -175,7 +255,9 @@ fn run( Value::record( record! { - "type" => Value::string(type_, head), + "type" => Value::string("bytestream", head), + "detailed_type" => Value::string(type_, head), + "rust_type" => Value::string(type_of(&stream), head), "origin" => Value::string(origin, head), "metadata" => metadata_to_value(metadata, head), }, @@ -192,6 +274,7 @@ fn run( description } PipelineData::ListStream(stream, ..) => { + let type_ = type_of(&stream); if options.detailed { let subtype = if options.no_collect { Value::string("any", head) @@ -201,6 +284,8 @@ fn run( Value::record( record! { "type" => Value::string("stream", head), + "detailed_type" => Value::string("list stream", head), + "rust_type" => Value::string(type_, head), "origin" => Value::string("nushell", head), "subtype" => subtype, "metadata" => metadata_to_value(metadata, head), @@ -229,45 +314,95 @@ fn run( } enum Description { - String(String), Record(Record), } impl Description { fn into_value(self, span: Span) -> Value { match self { - Description::String(ty) => Value::string(ty, span), Description::Record(record) => Value::record(record, span), } } } fn describe_value(value: Value, head: Span, engine_state: Option<&EngineState>) -> Value { - let record = match describe_value_inner(value, head, engine_state) { - Description::String(ty) => record! { "type" => Value::string(ty, head) }, - Description::Record(record) => record, - }; + let Description::Record(record) = describe_value_inner(value, head, engine_state); Value::record(record, head) } +fn type_of(_: &T) -> String { + type_name::().to_string() +} + fn describe_value_inner( - value: Value, + mut value: Value, head: Span, engine_state: Option<&EngineState>, ) -> Description { + let value_type = value.get_type().to_string(); match value { - Value::Bool { .. } - | Value::Int { .. } - | Value::Float { .. } - | Value::Filesize { .. } - | Value::Duration { .. } - | Value::Date { .. } - | Value::Range { .. } - | Value::String { .. } - | Value::Glob { .. } - | Value::Nothing { .. } => Description::String(value.get_type().to_string()), - Value::Record { val, .. } => { - let mut columns = val.into_owned(); + Value::Bool { val, .. } => Description::Record(record! { + "type" => Value::string("bool", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, + }), + Value::Int { val, .. } => Description::Record(record! { + "type" => Value::string("int", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, + }), + Value::Float { val, .. } => Description::Record(record! { + "type" => Value::string("float", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, + }), + Value::Filesize { val, .. } => Description::Record(record! { + "type" => Value::string("filesize", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, + }), + Value::Duration { val, .. } => Description::Record(record! { + "type" => Value::string("duration", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, + }), + Value::Date { val, .. } => Description::Record(record! { + "type" => Value::string("datetime", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, + }), + Value::Range { ref val, .. } => Description::Record(record! { + "type" => Value::string("range", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, + }), + Value::String { ref val, .. } => Description::Record(record! { + "type" => Value::string("string", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, + }), + Value::Glob { ref val, .. } => Description::Record(record! { + "type" => Value::string("glob", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, + }), + Value::Nothing { .. } => Description::Record(record! { + "type" => Value::string("nothing", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string("", head), + "value" => value, + }), + Value::Record { ref val, .. } => { + let mut columns = val.clone().into_owned(); for (_, val) in &mut columns { *val = describe_value_inner(std::mem::take(val), head, engine_state).into_value(head); @@ -275,25 +410,34 @@ fn describe_value_inner( Description::Record(record! { "type" => Value::string("record", head), - "columns" => Value::record(columns, head), + "detailed_type" => Value::string(value_type, head), + "columns" => Value::record(columns.clone(), head), + "rust_type" => Value::string(type_of(&val), head), }) } - Value::List { mut vals, .. } => { - for val in &mut vals { + Value::List { ref mut vals, .. } => { + for val in &mut *vals { *val = describe_value_inner(std::mem::take(val), head, engine_state).into_value(head); } Description::Record(record! { "type" => Value::string("list", head), + "detailed_type" => Value::string(value_type, head), "length" => Value::int(vals.len() as i64, head), - "values" => Value::list(vals, head), + "rust_type" => Value::string(type_of(&vals), head), + "value" => value, }) } - Value::Closure { val, .. } => { + Value::Closure { ref val, .. } => { let block = engine_state.map(|engine_state| engine_state.get_block(val.block_id)); - let mut record = record! { "type" => Value::string("closure", head) }; + let mut record = record! { + "type" => Value::string("closure", head), + "detailed_type" => Value::string(value_type, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, + }; if let Some(block) = block { record.push( "signature", @@ -308,21 +452,37 @@ fn describe_value_inner( } Description::Record(record) } - Value::Error { error, .. } => Description::Record(record! { + Value::Error { ref error, .. } => Description::Record(record! { "type" => Value::string("error", head), + "detailed_type" => Value::string(value_type, head), "subtype" => Value::string(error.to_string(), head), + "rust_type" => Value::string(type_of(&error), head), + "value" => value, }), - Value::Binary { val, .. } => Description::Record(record! { + Value::Binary { ref val, .. } => Description::Record(record! { "type" => Value::string("binary", head), + "detailed_type" => Value::string(value_type, head), "length" => Value::int(val.len() as i64, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value, }), - Value::CellPath { val, .. } => Description::Record(record! { + Value::CellPath { ref val, .. } => Description::Record(record! { "type" => Value::string("cell-path", head), + "detailed_type" => Value::string(value_type, head), "length" => Value::int(val.members.len() as i64, head), + "rust_type" => Value::string(type_of(&val), head), + "value" => value }), - Value::Custom { val, .. } => Description::Record(record! { + Value::Custom { ref val, .. } => Description::Record(record! { "type" => Value::string("custom", head), + "detailed_type" => Value::string(value_type, head), "subtype" => Value::string(val.type_name(), head), + "rust_type" => Value::string(type_of(&val), head), + "value" => + match val.to_base_value(head) { + Ok(base_value) => base_value, + Err(err) => Value::error(err, head), + } }), } } From db261e3ed95a0aa1e1af7171557ee37617b9d068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Riegel?= <96702577+LoicRiegel@users.noreply.github.com> Date: Thu, 24 Apr 2025 15:32:29 +0200 Subject: [PATCH 12/22] bugfix: `str join` outputs dates consistently (RFC2822 when possible) (#15629) Closes #11265 # Description ``str join`` outputs dates just other commands: RFC2822 by default otherwise RFC3339 for negative dates # User-Facing Changes ```nushell ~> 2024-01-01 # => Mon, 1 Jan 2024 00:00:00 +0000 (a year ago) ~> '3000 years ago' | date from-human # => -0975-04-23T20:57:07.217711700+02:00 (3000 years ago) ~> [ 2024-01-01 ] | str join # => Mon, 1 Jan 2024 00:00:00 +0000 ~> [ ('3000 years ago' | date from-human) ] | str join # => -0975-04-23T20:57:56.221269600+02:00 ``` # Tests + Formatting OK # After Submitting Nothing --- crates/nu-command/src/strings/str_/join.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/nu-command/src/strings/str_/join.rs b/crates/nu-command/src/strings/str_/join.rs index e500da8343..12b2858e1e 100644 --- a/crates/nu-command/src/strings/str_/join.rs +++ b/crates/nu-command/src/strings/str_/join.rs @@ -1,3 +1,4 @@ +use chrono::Datelike; use nu_engine::command_prelude::*; use nu_protocol::{shell_error::io::IoError, Signals}; @@ -109,9 +110,14 @@ fn run( Value::Error { error, .. } => { return Err(*error); } - // Hmm, not sure what we actually want. - // `to_expanded_string` formats dates as human readable which feels funny. - Value::Date { val, .. } => write!(buffer, "{val:?}").map_err(&from_io_error)?, + Value::Date { val, .. } => { + let date_str = if val.year() >= 0 { + val.to_rfc2822() + } else { + val.to_rfc3339() + }; + write!(buffer, "{date_str}").map_err(&from_io_error)? + } value => write!(buffer, "{}", value.to_expanded_string("\n", &config)) .map_err(&from_io_error)?, } From 220858d6412762c0f17616a8cc62c93f8aa290e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Riegel?= <96702577+LoicRiegel@users.noreply.github.com> Date: Thu, 24 Apr 2025 15:33:13 +0200 Subject: [PATCH 13/22] history table using sqlite outputs start_timestamp as datetime instead of string (#15630) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #13581 # Description Before, the table you got from ``history`` had values as strings in the ``startup_timestamp`` column. Now the values are datetimes. # User-Facing Changes ```nushell ~\workspace_tns\nushell> history | last 5 ╭───┬─────────────────┬─────────────────────┬───────────────────────────────────────────┬─────╮ │ # │ start_timestamp │ command │ cwd │ ... │ ├───┼─────────────────┼─────────────────────┼───────────────────────────────────────────┼─────┤ │ 0 │ a minute ago │ history │ C:\Users\RIL1RT\workspace_tns\nushell-bis │ ... │ │ 1 │ 40 seconds ago │ cd nushell │ C:\Users\RIL1RT\workspace_tns\nushell-bis │ ... │ │ 2 │ 31 seconds ago │ target\debug\nu.exe │ C:\Users\RIL1RT\workspace_tns\nushell │ ... │ │ 3 │ 26 seconds ago │ history │ C:\Users\RIL1RT\workspace_tns\nushell │ ... │ │ 4 │ now │ history | last 5 │ C:\Users\RIL1RT\workspace_tns\nushell │ ... │ ╰───┴─────────────────┴─────────────────────┴───────────────────────────────────────────┴─────╯ ``` # Tests + Formatting # After Submitting ❓ --- crates/nu-cli/src/commands/history/history_.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/nu-cli/src/commands/history/history_.rs b/crates/nu-cli/src/commands/history/history_.rs index 00efd547d4..cdb60c5417 100644 --- a/crates/nu-cli/src/commands/history/history_.rs +++ b/crates/nu-cli/src/commands/history/history_.rs @@ -105,10 +105,9 @@ impl Command for History { .ok() }) .map(move |entries| { - entries - .into_iter() - .enumerate() - .map(move |(idx, entry)| create_history_record(idx, entry, long, head)) + entries.into_iter().enumerate().map(move |(idx, entry)| { + create_sqlite_history_record(idx, entry, long, head) + }) }) .ok_or(IoError::new( std::io::ErrorKind::NotFound, @@ -140,7 +139,7 @@ impl Command for History { } } -fn create_history_record(idx: usize, entry: HistoryItem, long: bool, head: Span) -> Value { +fn create_sqlite_history_record(idx: usize, entry: HistoryItem, long: bool, head: Span) -> Value { //1. Format all the values //2. Create a record of either short or long columns and values @@ -151,11 +150,8 @@ fn create_history_record(idx: usize, entry: HistoryItem, long: bool, head: Span) .unwrap_or_default(), head, ); - let start_timestamp_value = Value::string( - entry - .start_timestamp - .map(|time| time.to_string()) - .unwrap_or_default(), + let start_timestamp_value = Value::date( + entry.start_timestamp.unwrap_or_default().fixed_offset(), head, ); let command_value = Value::string(entry.command_line, head); From f41b1460aa90c86bfb88c60352bb6cbf28d382fb Mon Sep 17 00:00:00 2001 From: Marco Cunha Date: Thu, 24 Apr 2025 15:09:48 +0100 Subject: [PATCH 14/22] Fix #14660: to md breaks on tables with empty values (#15631) Fixes #14660 # Description Fixed an issue where tables with empty values were incorrectly replaced with [table X row] when converted to Markdown using the ```to md``` command. Empty values are now replaced with whitespaces to preserve the original table structure. Additionally, fixed a missing newline (\n) between tables when using --per-element in a list. Removed (\n) from 2 examples for consistency. Example: ``` For the list let list = [ {name: bob, age: 21} {name: jim, age: 20} {name: sarah}] Running "$list | to md --pretty" outputs: | name | age | | ----- | --- | | bob | 21 | | jim | 20 | | sarah | | ------------------------------------------------------------------------------------------------ For the list let list = [ {name: bob, age: 21} {name: jim, age: 20} {name: sarah} {name: timothy, age: 50} {name: paul} ] Running "$list | to md --per-element --pretty" outputs: | name | age | | ------- | --- | | bob | 21 | | jim | 20 | | timothy | 50 | | name | | ----- | | sarah | | paul | ``` # User-Facing Changes The ```to md``` behaves as expected when piping a table that contains empty values showing all rows and the empty items replaced with whitespace. # Tests + Formatting Added 2 test cases to cover both issues. fmt + clippy OK. # After Submitting The command documentation needs to be updated with an example for when you want to "separate list into markdown tables" --- crates/nu-command/src/formats/to/md.rs | 59 +++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/crates/nu-command/src/formats/to/md.rs b/crates/nu-command/src/formats/to/md.rs index 434b65a8d9..def4060257 100644 --- a/crates/nu-command/src/formats/to/md.rs +++ b/crates/nu-command/src/formats/to/md.rs @@ -36,13 +36,13 @@ impl Command for ToMd { Example { description: "Outputs an MD string representing the contents of this table", example: "[[foo bar]; [1 2]] | to md", - result: Some(Value::test_string("|foo|bar|\n|-|-|\n|1|2|\n")), + result: Some(Value::test_string("|foo|bar|\n|-|-|\n|1|2|")), }, Example { description: "Optionally, output a formatted markdown string", example: "[[foo bar]; [1 2]] | to md --pretty", result: Some(Value::test_string( - "| foo | bar |\n| --- | --- |\n| 1 | 2 |\n", + "| foo | bar |\n| --- | --- |\n| 1 | 2 |", )), }, Example { @@ -57,6 +57,13 @@ impl Command for ToMd { example: "[0 1 2] | to md --pretty", result: Some(Value::test_string("0\n1\n2")), }, + Example { + description: "Separate list into markdown tables", + example: "[ {foo: 1, bar: 2} {foo: 3, bar: 4} {foo: 5}] | to md --per-element", + result: Some(Value::test_string( + "|foo|bar|\n|-|-|\n|1|2|\n|3|4|\n|foo|\n|-|\n|5|", + )), + }, ] } @@ -94,11 +101,14 @@ fn to_md( grouped_input .into_iter() .map(move |val| match val { - Value::List { .. } => table(val.into_pipeline_data(), pretty, config), + Value::List { .. } => { + format!("{}\n", table(val.into_pipeline_data(), pretty, config)) + } other => fragment(other, pretty, config), }) .collect::>() - .join(""), + .join("") + .trim(), head, ) .into_pipeline_data_with_metadata(Some(metadata))); @@ -152,7 +162,13 @@ fn collect_headers(headers: &[String]) -> (Vec, Vec) { } fn table(input: PipelineData, pretty: bool, config: &Config) -> String { - let vec_of_values = input.into_iter().collect::>(); + let vec_of_values = input + .into_iter() + .flat_map(|val| match val { + Value::List { vals, .. } => vals, + other => vec![other], + }) + .collect::>(); let mut headers = merge_descriptors(&vec_of_values); let mut empty_header_index = 0; @@ -464,6 +480,39 @@ mod tests { ); } + #[test] + fn test_empty_row_value() { + let value = Value::test_list(vec![ + Value::test_record(record! { + "foo" => Value::test_string("1"), + "bar" => Value::test_string("2"), + }), + Value::test_record(record! { + "foo" => Value::test_string("3"), + "bar" => Value::test_string("4"), + }), + Value::test_record(record! { + "foo" => Value::test_string("5"), + "bar" => Value::test_string(""), + }), + ]); + + assert_eq!( + table( + value.clone().into_pipeline_data(), + false, + &Config::default() + ), + one(r#" + |foo|bar| + |-|-| + |1|2| + |3|4| + |5|| + "#) + ); + } + #[test] fn test_content_type_metadata() { let mut engine_state = Box::new(EngineState::new()); From b33f4b7f55c2248d6c94eea20ce1f68692e5ebc2 Mon Sep 17 00:00:00 2001 From: Hayden Frentzel <44076377+hfrentzel@users.noreply.github.com> Date: Thu, 24 Apr 2025 09:10:34 -0500 Subject: [PATCH 15/22] Run scripts of any file extension in PATHEXT on Windows (#15611) # Description On Windows, I would like to be able to call a script directly in nushell and have that script be found in the PATH and run based on filetype associations and PATHEXT. There have been previous discussions related to this feature, see https://github.com/nushell/nushell/issues/6440 and https://github.com/nushell/nushell/issues/15476. The latter issue is only a few weeks old, and after taking a look at it and the resultant PR I found that currently nushell is hardcoded to support only running nushell (.nu) scripts in this way. This PR seeks to make this functionality more generic. Instead of checking that the file extension is explicitly `NU`, it instead checks that it **is not** one of `COM`, `EXE`, `BAT`, `CMD`, or `PS1`. The first four of these are extensions that Windows can figure out how to run on its own. This is implied by the output of `ftype` for any of these extensions, which shows that files are just run without a calling command anyway. ``` >ftype batfile batfile="%1" %* ``` PS1 files are ignored because they are handled as a special in later logic. In implementing this I initially tried to fetch the value of PATHEXT and confirm that the file extension was indeed in PATHEXT. But I determined that because `which()` respects PATHEXT, this would be redundant; any executable that is found by `which` is already going to have an extension in PATHEXT. It is thus only necessary to check that it isn't one of the few extensions that should be called directly, without the use of `cmd.exe`. There are some small formatting changes to `run_external.rs` in the PR as a result of running `cargo fmt` that are not entirely related to the code I modified. I can back out those changes if that is desired. # User-Facing Changes Behavior for `.nu` scripts will not change. Users will still need to ensure they have PATHEXT and filetype associations set correctly for them to work, but this will now also apply to scripts of other types. --- crates/nu-command/src/system/run_external.rs | 72 +++++++++++--------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/crates/nu-command/src/system/run_external.rs b/crates/nu-command/src/system/run_external.rs index 841a2047b2..8331c9f284 100644 --- a/crates/nu-command/src/system/run_external.rs +++ b/crates/nu-command/src/system/run_external.rs @@ -79,23 +79,30 @@ impl Command for External { let paths = nu_engine::env::path_str(engine_state, stack, call.head)?; - // On Windows, the user could have run the cmd.exe built-in "assoc" command - // Example: "assoc .nu=nuscript" and then run the cmd.exe built-in "ftype" command - // Example: "ftype nuscript=C:\path\to\nu.exe '%1' %*" and then added the nushell - // script extension ".NU" to the PATHEXT environment variable. In this case, we use - // the which command, which will find the executable with or without the extension. - // If it "which" returns true, that means that we've found the nushell script and we - // believe the user wants to use the windows association to run the script. The only + // On Windows, the user could have run the cmd.exe built-in commands "assoc" + // and "ftype" to create a file association for an arbitrary file extension. + // They then could have added that extension to the PATHEXT environment variable. + // For example, a nushell script with extension ".nu" can be set up with + // "assoc .nu=nuscript" and "ftype nuscript=C:\path\to\nu.exe '%1' %*", + // and then by adding ".NU" to PATHEXT. In this case we use the which command, + // which will find the executable with or without the extension. If "which" + // returns true, that means that we've found the script and we believe the + // user wants to use the windows association to run the script. The only // easy way to do this is to run cmd.exe with the script as an argument. - let potential_nuscript_in_windows = if cfg!(windows) { - // let's make sure it's a .nu script + // File extensions of .COM, .EXE, .BAT, and .CMD are ignored because Windows + // can run those files directly. PS1 files are also ignored and that + // extension is handled in a separate block below. + let pathext_script_in_windows = if cfg!(windows) { if let Some(executable) = which(&expanded_name, &paths, cwd.as_ref()) { let ext = executable .extension() .unwrap_or_default() .to_string_lossy() .to_uppercase(); - ext == "NU" + + !["COM", "EXE", "BAT", "CMD", "PS1"] + .iter() + .any(|c| *c == ext) } else { false } @@ -122,29 +129,28 @@ impl Command for External { // Find the absolute path to the executable. On Windows, set the // executable to "cmd.exe" if it's a CMD internal command. If the // command is not found, display a helpful error message. - let executable = if cfg!(windows) - && (is_cmd_internal_command(&name_str) || potential_nuscript_in_windows) - { - PathBuf::from("cmd.exe") - } else if cfg!(windows) && potential_powershell_script { - // If we're on Windows and we're trying to run a PowerShell script, we'll use - // `powershell.exe` to run it. We shouldn't have to check for powershell.exe because - // it's automatically installed on all modern windows systems. - PathBuf::from("powershell.exe") - } else { - // Determine the PATH to be used and then use `which` to find it - though this has no - // effect if it's an absolute path already - let Some(executable) = which(&expanded_name, &paths, cwd.as_ref()) else { - return Err(command_not_found( - &name_str, - call.head, - engine_state, - stack, - &cwd, - )); + let executable = + if cfg!(windows) && (is_cmd_internal_command(&name_str) || pathext_script_in_windows) { + PathBuf::from("cmd.exe") + } else if cfg!(windows) && potential_powershell_script { + // If we're on Windows and we're trying to run a PowerShell script, we'll use + // `powershell.exe` to run it. We shouldn't have to check for powershell.exe because + // it's automatically installed on all modern windows systems. + PathBuf::from("powershell.exe") + } else { + // Determine the PATH to be used and then use `which` to find it - though this has no + // effect if it's an absolute path already + let Some(executable) = which(&expanded_name, &paths, cwd.as_ref()) else { + return Err(command_not_found( + &name_str, + call.head, + engine_state, + stack, + &cwd, + )); + }; + executable }; - executable - }; // Create the command. let mut command = std::process::Command::new(&executable); @@ -160,7 +166,7 @@ impl Command for External { // Configure args. let args = eval_external_arguments(engine_state, stack, call_args.to_vec())?; #[cfg(windows)] - if is_cmd_internal_command(&name_str) || potential_nuscript_in_windows { + if is_cmd_internal_command(&name_str) || pathext_script_in_windows { // The /D flag disables execution of AutoRun commands from registry. // The /C flag followed by a command name instructs CMD to execute // that command and quit. From 208ebeefab613c3859fa0e4447653cc2d9de1ba6 Mon Sep 17 00:00:00 2001 From: pyz4 <42039243+pyz4@users.noreply.github.com> Date: Thu, 24 Apr 2025 17:43:28 -0400 Subject: [PATCH 16/22] feat(polars): enable parsing decimals in polars schemas (#15632) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR enables the option to set a column type to `decimal` in the `--schema` parameter of `polars into-df` and `polars into-lazy` commands. This option was already available in `polars open`, which used the underlying polars io commands that already accounted for decimal types when specified in the schema. See below for a comparison of the current and proposed implementation. ```nushell # Current Implementation > [[a b]; [1 1.618]]| polars into-df -s {a: u8, b: 'decimal<4,3>'} Error: × Error creating dataframe: Unsupported type: Decimal(Some(4), Some(3)) # Proposed Implementation > [[a b]; [1 1.618]]| polars into-df -s {a: u8, b: 'decimal<4,3>'} | polars schema ╭───┬──────────────╮ │ a │ u8 │ │ b │ decimal<4,3> │ ╰───┴──────────────╯ ``` # User-Facing Changes No breaking change. Users has the new option to specify decimal in `--schema` in `polars into-df` and `polars into-lazy`. # Tests + Formatting An example in `polars into-df` was modified to showcase the decimal type. # After Submitting --- .../src/dataframe/command/core/to_df.rs | 7 +++-- .../values/nu_dataframe/conversion.rs | 28 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/command/core/to_df.rs b/crates/nu_plugin_polars/src/dataframe/command/core/to_df.rs index 84fe8ea2ba..7fe33a9cfd 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/core/to_df.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/core/to_df.rs @@ -160,7 +160,7 @@ impl PluginCommand for ToDataFrame { }, Example { description: "Convert to a dataframe and provide a schema", - example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list}, c: list}", + example: "[[a b c e]; [1 {d: [1 2 3]} [10 11 12] 1.618]]| polars into-df -s {a: u8, b: {d: list}, c: list, e: 'decimal<4,3>'}", result: Some( NuDataFrame::try_from_series_vec(vec![ Series::new("a".into(), &[1u8]), @@ -172,11 +172,12 @@ impl PluginCommand for ToDataFrame { .expect("Struct series should not fail") }, { - let dtype = DataType::List(Box::new(DataType::String)); + let dtype = DataType::List(Box::new(DataType::UInt8)); let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))]; Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false) .expect("List series should not fail") - } + }, + Series::new("e".into(), &[1.618]), ], Span::test_data()) .expect("simple df for test should not fail") .into_value(Span::test_data()), diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs index 8b061b3945..2ba8526794 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs @@ -320,6 +320,34 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result { + let series_values: Result, _> = column + .values + .iter() + .map(|v| { + value_to_option(v, |v| match v { + Value::Float { val, .. } => Ok(*val), + Value::Int { val, .. } => Ok(*val as f64), + x => Err(ShellError::GenericError { + error: "Error converting to decimal".into(), + msg: "".into(), + span: None, + help: Some(format!("Unexpected type: {x:?}")), + inner: vec![], + }), + }) + }) + .collect(); + Series::new(name, series_values?) + .cast_with_options(&DataType::Decimal(*precision, *scale), Default::default()) + .map_err(|e| ShellError::GenericError { + error: "Error parsing decimal".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + }) + } DataType::UInt8 => { let series_values: Result, _> = column .values From 05c36d1bc76ab187559ed0e0e327bbd58fe38735 Mon Sep 17 00:00:00 2001 From: Matthias Meschede Date: Thu, 24 Apr 2025 23:44:29 +0200 Subject: [PATCH 17/22] add polars join_where command (#15635) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This adds `polars join_where` which allows joining two dataframes based on a conditions. The command can be used as: ``` ➜ let df_a = [[name cash];[Alice 5] [Bob 10]] | polars into-lazy ➜ let df_b = [[item price];[A 3] [B 7] [C 12]] | polars into-lazy ➜ $df_a | polars join_where $df_b ((polars col cash) > (polars col price)) | polars collect ╭───┬───────┬──────┬──────┬───────╮ │ # │ name │ cash │ item │ price │ ├───┼───────┼──────┼──────┼───────┤ │ 0 │ Bob │ 10 │ B │ 7 │ │ 1 │ Bob │ 10 │ A │ 3 │ │ 2 │ Alice │ 5 │ A │ 3 │ ╰───┴───────┴──────┴──────┴───────╯ ``` # User-Facing Changes - new command `polars join_where` --- crates/nu_plugin_polars/Cargo.toml | 1 + .../src/dataframe/command/data/join_where.rs | 119 ++++++++++++++++++ .../src/dataframe/command/data/mod.rs | 3 + 3 files changed, 123 insertions(+) create mode 100644 crates/nu_plugin_polars/src/dataframe/command/data/join_where.rs diff --git a/crates/nu_plugin_polars/Cargo.toml b/crates/nu_plugin_polars/Cargo.toml index 5bdff452aa..479613a728 100644 --- a/crates/nu_plugin_polars/Cargo.toml +++ b/crates/nu_plugin_polars/Cargo.toml @@ -61,6 +61,7 @@ features = [ "cloud", "concat_str", "cross_join", + "iejoin", "csv", "cum_agg", "default", diff --git a/crates/nu_plugin_polars/src/dataframe/command/data/join_where.rs b/crates/nu_plugin_polars/src/dataframe/command/data/join_where.rs new file mode 100644 index 0000000000..970a268174 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/command/data/join_where.rs @@ -0,0 +1,119 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}, + values::CustomValueSupport, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, +}; + +#[derive(Clone)] +pub struct LazyJoinWhere; + +impl PluginCommand for LazyJoinWhere { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars join_where" + } + + fn description(&self) -> &str { + "Joins a lazy frame with other lazy frame based on conditions." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("other", SyntaxShape::Any, "LazyFrame to join with") + .required("condition", SyntaxShape::Any, "Condition") + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Join two lazy dataframes with a condition", + example: r#"let df_a = ([[name cash];[Alice 5] [Bob 10]] | polars into-lazy) + let df_b = ([[item price];[A 3] [B 7] [C 12]] | polars into-lazy) + $df_a | polars join_where $df_b ((polars col cash) > (polars col price)) | polars collect"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "name".to_string(), + vec![ + Value::test_string("Bob"), + Value::test_string("Bob"), + Value::test_string("Alice"), + ], + ), + Column::new( + "cash".to_string(), + vec![Value::test_int(10), Value::test_int(10), Value::test_int(5)], + ), + Column::new( + "item".to_string(), + vec![ + Value::test_string("B"), + Value::test_string("A"), + Value::test_string("A"), + ], + ), + Column::new( + "price".to_string(), + vec![Value::test_int(7), Value::test_int(3), Value::test_int(3)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let other: Value = call.req(0)?; + let other = NuLazyFrame::try_from_value_coerce(plugin, &other)?; + let other = other.to_polars(); + + let condition: Value = call.req(1)?; + let condition = NuExpression::try_from_value(plugin, &condition)?; + let condition = condition.into_polars(); + + let pipeline_value = input.into_value(call.head)?; + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; + let from_eager = lazy.from_eager; + let lazy = lazy.to_polars(); + + let lazy = lazy + .join_builder() + .with(other) + .force_parallel(true) + .join_where(vec![condition]); + + let lazy = NuLazyFrame::new(from_eager, lazy); + lazy.to_pipeline_data(plugin, engine, call.head) + .map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&LazyJoinWhere) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs b/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs index dd6cdc4eff..f4089a03c7 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs @@ -19,6 +19,7 @@ mod first; mod flatten; mod get; mod join; +mod join_where; mod last; mod len; mod lit; @@ -61,6 +62,7 @@ pub use first::FirstDF; use flatten::LazyFlatten; pub use get::GetDF; use join::LazyJoin; +use join_where::LazyJoinWhere; pub use last::LastDF; pub use lit::ExprLit; use query_df::QueryDf; @@ -106,6 +108,7 @@ pub(crate) fn data_commands() -> Vec Date: Thu, 24 Apr 2025 17:45:36 -0400 Subject: [PATCH 18/22] fix(polars): conversion from nanoseconds to time_units in Datetime and Duration parsing (#15637) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description The current implementation improperly inverts the conversion from nanoseconds to the specified time units, resulting in nonsensical Datetime and Duration parsing and integer overflows when the specified time unit is not nanoseconds. This PR seeks to correct this conversion by changing the multiplication to an integer division. Below are examples highlighting the current and proposed implementations. ## Current Implementation Specifying a different time unit incorrectly changes the returned value. ```nushell > [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime"} ╭───┬───────────────────────╮ │ # │ a │ ├───┼───────────────────────┤ │ 0 │ 04/01/2024 12:00:00AM │ > [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime"} ╭───┬───────────────────────╮ │ # │ a │ ├───┼───────────────────────┤ │ 0 │ 06/27/2035 11:22:33PM │ <-- changing the time unit should not change the actual value > [[a]; [1day]] | polars into-df --schema {a: "duration"} ╭───┬────────────────╮ │ # │ a │ ├───┼────────────────┤ │ 0 │ 86400000000000 │ ╰───┴────────────────╯ > [[a]; [1day]] | polars into-df --schema {a: "duration"} ╭───┬──────────────────────╮ │ # │ a │ ├───┼──────────────────────┤ │ 0 │ -5833720368547758080 │ <-- i64 overflow ╰───┴──────────────────────╯ ``` ## Proposed Implementation ```nushell > [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime"} ╭───┬───────────────────────╮ │ # │ a │ ├───┼───────────────────────┤ │ 0 │ 04/01/2024 12:00:00AM │ ╰───┴───────────────────────╯ > [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime"} ╭───┬───────────────────────╮ │ # │ a │ ├───┼───────────────────────┤ │ 0 │ 04/01/2024 12:00:00AM │ ╰───┴───────────────────────╯ > [[a]; [1day]] | polars into-df --schema {a: "duration"} ╭───┬────────────────╮ │ # │ a │ ├───┼────────────────┤ │ 0 │ 86400000000000 │ ╰───┴────────────────╯ > [[a]; [1day]] | polars into-df --schema {a: "duration"} ╭───┬──────────╮ │ # │ a │ ├───┼──────────┤ │ 0 │ 86400000 │ ╰───┴──────────╯ ``` # User-Facing Changes No user-facing breaking change. Developer breaking change: to mitigate the silent overflow in nanoseconds conversion functions `nanos_from_timeunit` and `nanos_to_timeunit` (new), the function signatures were changed from `i64` to `Result`. # Tests + Formatting No additional examples were added, but I'd be happy to add a few if needed. The covering tests just didn't fit well into any examples. # After Submitting --- .../values/nu_dataframe/conversion.rs | 48 ++++++++++++++----- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs index 2ba8526794..7c32ec3e62 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs @@ -440,8 +440,8 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result Result { // If there is a timezone specified, make sure // the value is converted to it - Ok(tz - .parse::() + tz.parse::() .map(|tz| val.with_timezone(&tz)) .map_err(|e| ShellError::GenericError { error: "Error parsing timezone".into(), @@ -500,11 +499,13 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result Ok(val + (None, Value::Date { val, .. }) => val .timestamp_nanos_opt() - .map(|nanos| nanos_from_timeunit(nanos, *tu))), + .map(|nanos| nanos_to_timeunit(nanos, *tu)) + .transpose(), _ => Ok(None), } @@ -1160,7 +1161,7 @@ fn series_to_values( .map(|v| match v { Some(a) => { // elapsed time in nano/micro/milliseconds since 1970-01-01 - let nanos = nanos_from_timeunit(a, *time_unit); + let nanos = nanos_from_timeunit(a, *time_unit)?; let datetime = datetime_from_epoch_nanos(nanos, tz, span)?; Ok(Value::date(datetime, span)) } @@ -1278,7 +1279,7 @@ fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result { - let nanos = nanos_from_timeunit(*a, *time_unit); + let nanos = nanos_from_timeunit(*a, *time_unit)?; datetime_from_epoch_nanos(nanos, &tz.cloned(), span) .map(|datetime| Value::date(datetime, span)) } @@ -1365,12 +1366,35 @@ fn nanos_per_day(days: i32) -> i64 { days as i64 * NANOS_PER_DAY } -fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> i64 { - a * match time_unit { +fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> Result { + a.checked_mul(match time_unit { TimeUnit::Microseconds => 1_000, // Convert microseconds to nanoseconds TimeUnit::Milliseconds => 1_000_000, // Convert milliseconds to nanoseconds TimeUnit::Nanoseconds => 1, // Already in nanoseconds - } + }) + .ok_or_else(|| ShellError::GenericError { + error: format!("Converting from {time_unit} to nanoseconds caused an overflow"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }) +} + +fn nanos_to_timeunit(a: i64, time_unit: TimeUnit) -> Result { + // integer division (rounds to 0) + a.checked_div(match time_unit { + TimeUnit::Microseconds => 1_000i64, // Convert microseconds to nanoseconds + TimeUnit::Milliseconds => 1_000_000i64, // Convert milliseconds to nanoseconds + TimeUnit::Nanoseconds => 1i64, // Already in nanoseconds + }) + .ok_or_else(|| ShellError::GenericError { + error: format!("Converting from nanoseconds to {time_unit} caused an overflow"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }) } fn datetime_from_epoch_nanos( From 0ca5c2f1356b2f2ad57060ec39ec8cb0b6b4f789 Mon Sep 17 00:00:00 2001 From: Piepmatz Date: Fri, 25 Apr 2025 13:56:30 +0200 Subject: [PATCH 19/22] Add `cat` and `get-content` to `open`'s search terms (#15643) # Description A friend of mine started using nushell on Windows and wondered why the `cat` command wasn't available. I answered to him, that he can use `help -f` or F1 to find the command but then we both realized that neither `cat` nor `Get-Command` were part of `open`'s search terms. So I added them. # User-Facing Changes None. # Tests + Formatting - :green_circle: `toolkit fmt` - :green_circle: `toolkit clippy` - :green_circle: `toolkit test` - :green_circle: `toolkit test stdlib` # After Submitting --- crates/nu-command/src/filesystem/open.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/nu-command/src/filesystem/open.rs b/crates/nu-command/src/filesystem/open.rs index cd91f43aad..276b0b28c1 100644 --- a/crates/nu-command/src/filesystem/open.rs +++ b/crates/nu-command/src/filesystem/open.rs @@ -34,7 +34,14 @@ impl Command for Open { } fn search_terms(&self) -> Vec<&str> { - vec!["load", "read", "load_file", "read_file"] + vec![ + "load", + "read", + "load_file", + "read_file", + "cat", + "get-content", + ] } fn signature(&self) -> nu_protocol::Signature { From 11cdb94699c3717a2310d84940e806f3d8f87ecb Mon Sep 17 00:00:00 2001 From: Wind Date: Fri, 25 Apr 2025 22:00:20 +0800 Subject: [PATCH 20/22] IR: rasing reasonable error when using subexpression with `and` operator (#15623) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Fixes: #15510 I think it's introduced by #14653, which changes `and/or` to `match` expression. After looking into `compile_match`, it's important to collect the value before matching this. ```rust // Important to collect it first builder.push(Instruction::Collect { src_dst: match_reg }.into_spanned(match_expr.span))?; ``` This pr is going to apply the logic while compiling `and/or` operation. # User-Facing Changes The following will raise a reasonable error: ```nushell > (nu --testbin cococo false) and true Error: nu::shell::operator_unsupported_type × The 'and' operator does not work on values of type 'string'. ╭─[entry #7:1:2] 1 │ (nu --testbin cococo false) and true · ─┬ ─┬─ · │ ╰── does not support 'string' · ╰── string ╰──── ``` # Tests + Formatting Added 1 test. # After Submitting Maybe need to update doc https://github.com/nushell/nushell.github.io/pull/1876 --------- Co-authored-by: Stefan Holderbach --- crates/nu-engine/src/compile/operator.rs | 2 ++ tests/shell/pipeline/commands/external.rs | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/crates/nu-engine/src/compile/operator.rs b/crates/nu-engine/src/compile/operator.rs index 6e127c8d5b..2a913f8836 100644 --- a/crates/nu-engine/src/compile/operator.rs +++ b/crates/nu-engine/src/compile/operator.rs @@ -70,6 +70,8 @@ pub(crate) fn compile_binary_op( Boolean::Xor => unreachable!(), }; + // Before match against lhs_reg, it's important to collect it first to get a concrete value if there is a subexpression. + builder.push(Instruction::Collect { src_dst: lhs_reg }.into_spanned(lhs.span))?; // Short-circuit to return `lhs_reg`. `match` op does not consume `lhs_reg`. let short_circuit_label = builder.label(None); builder.r#match( diff --git a/tests/shell/pipeline/commands/external.rs b/tests/shell/pipeline/commands/external.rs index e5c6915565..c0cf8e1adb 100644 --- a/tests/shell/pipeline/commands/external.rs +++ b/tests/shell/pipeline/commands/external.rs @@ -717,3 +717,11 @@ fn external_error_with_backtrace() { assert_eq!(chained_error_cnt.len(), 0); }); } + +#[test] +fn sub_external_expression_with_and_op_should_raise_proper_error() { + let actual = nu!("(nu --testbin cococo false) and true"); + assert!(actual + .err + .contains("The 'and' operator does not work on values of type 'string'")) +} From 0389815137f6814e3041b7770819e2ad1dd74054 Mon Sep 17 00:00:00 2001 From: Bahex Date: Fri, 25 Apr 2025 18:24:44 +0300 Subject: [PATCH 21/22] docs(explore): Add ":nu" back to the help text (#15644) # Description Looks like `:nu` was forgotten about when the help system was refactored. # User-Facing Changes # Tests + Formatting # After Submitting Co-authored-by: Bahex <17417311+Bahex@users.noreply.github.com> --- crates/nu-explore/src/commands/help.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/nu-explore/src/commands/help.rs b/crates/nu-explore/src/commands/help.rs index 3083b83a9b..a0b6a4d97a 100644 --- a/crates/nu-explore/src/commands/help.rs +++ b/crates/nu-explore/src/commands/help.rs @@ -38,6 +38,7 @@ Drill down into records+tables: Press to select a cell, move around wit Expand (show all nested data): Press "e" Open this help page : Type ":help" then Open an interactive REPL: Type ":try" then + Run a Nushell command: Type ":nu " then . The data currently being explored is piped into it. Scroll up: Press "Page Up", Ctrl+B, or Alt+V Scroll down: Press "Page Down", Ctrl+F, or Ctrl+V Exit Explore: Type ":q" then , or Ctrl+D. Alternately, press or "q" until Explore exits From 2d868323b6bdb3085f9009b61d9ef97cd5851e33 Mon Sep 17 00:00:00 2001 From: Renan Ribeiro <55855728+cosineblast@users.noreply.github.com> Date: Sat, 26 Apr 2025 12:24:35 -0300 Subject: [PATCH 22/22] Inter-Job direct messaging (#15253) # Description This PR implements an experimental inter-job communication model, through direct message passing, aka "mail"ing or "dm"ing: - `job send `: Sends a message the job with the given id, the root job has id 0. Messages are stored in the recipient's "mailbox" - `job recv`: Returns a stored message, blocks if the mailbox is empty - `job flush`: Clear all messages from mailbox Additionally, messages can be sent with a numeric tag, which can then be filtered with `mail recv --tag`. This is useful for spawning jobs and receiving messages specifically from those jobs. This PR is mostly a proof of concept for how inter-job communication could look like, so people can provide feedback and suggestions Closes #15199 May close #15220 since now jobs can access their own id. # User-Facing Changes Adds, `job id`, `job send`, `job recv` and `job flush` commands. # Tests + Formatting [X] TODO: Implement tests [X] Consider rewriting some of the job-related tests to use this, to make them a bit less fragile. # After Submitting --- crates/nu-command/src/default_context.rs | 8 + .../nu-command/src/experimental/job_flush.rs | 58 +++++ crates/nu-command/src/experimental/job_id.rs | 50 ++++ .../nu-command/src/experimental/job_recv.rs | 181 +++++++++++++++ .../nu-command/src/experimental/job_send.rs | 112 +++++++++ .../nu-command/src/experimental/job_spawn.rs | 23 +- .../src/experimental/job_unfreeze.rs | 4 +- crates/nu-command/src/experimental/mod.rs | 16 ++ crates/nu-command/src/system/run_external.rs | 2 +- crates/nu-command/tests/commands/job.rs | 218 ++++++++++++++++-- crates/nu-protocol/src/engine/engine_state.rs | 30 ++- crates/nu-protocol/src/engine/jobs.rs | 173 +++++++++++++- .../nu-protocol/src/errors/shell_error/mod.rs | 23 +- crates/nu-protocol/src/process/child.rs | 2 +- 14 files changed, 853 insertions(+), 47 deletions(-) create mode 100644 crates/nu-command/src/experimental/job_flush.rs create mode 100644 crates/nu-command/src/experimental/job_id.rs create mode 100644 crates/nu-command/src/experimental/job_recv.rs create mode 100644 crates/nu-command/src/experimental/job_send.rs diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index d803225c36..f18e3eb369 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -452,10 +452,18 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState { JobSpawn, JobList, JobKill, + JobId, JobTag, Job, }; + #[cfg(not(target_family = "wasm"))] + bind_command! { + JobSend, + JobRecv, + JobFlush, + } + #[cfg(all(unix, feature = "os"))] bind_command! { JobUnfreeze, diff --git a/crates/nu-command/src/experimental/job_flush.rs b/crates/nu-command/src/experimental/job_flush.rs new file mode 100644 index 0000000000..f717cb7bae --- /dev/null +++ b/crates/nu-command/src/experimental/job_flush.rs @@ -0,0 +1,58 @@ +use nu_engine::command_prelude::*; + +#[derive(Clone)] +pub struct JobFlush; + +impl Command for JobFlush { + fn name(&self) -> &str { + "job flush" + } + + fn description(&self) -> &str { + "Clear this job's mailbox." + } + + fn extra_description(&self) -> &str { + r#" +This command removes all messages in the mailbox of the current job. +If a message is received while this command is executing, it may also be discarded. +"# + } + + fn signature(&self) -> nu_protocol::Signature { + Signature::build("job flush") + .category(Category::Experimental) + .input_output_types(vec![(Type::Nothing, Type::Nothing)]) + .allow_variants_without_examples(true) + } + + fn search_terms(&self) -> Vec<&str> { + vec![] + } + + fn run( + &self, + engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + let mut mailbox = engine_state + .current_job + .mailbox + .lock() + .expect("failed to acquire lock"); + + mailbox.clear(); + + Ok(Value::nothing(call.head).into_pipeline_data()) + } + + fn examples(&self) -> Vec { + vec![Example { + example: "job flush", + description: "Clear the mailbox of the current job.", + result: None, + }] + } +} diff --git a/crates/nu-command/src/experimental/job_id.rs b/crates/nu-command/src/experimental/job_id.rs new file mode 100644 index 0000000000..6f3be8a9eb --- /dev/null +++ b/crates/nu-command/src/experimental/job_id.rs @@ -0,0 +1,50 @@ +use nu_engine::command_prelude::*; + +#[derive(Clone)] +pub struct JobId; + +impl Command for JobId { + fn name(&self) -> &str { + "job id" + } + + fn description(&self) -> &str { + "Get id of current job." + } + + fn extra_description(&self) -> &str { + "This command returns the job id for the current background job. +The special id 0 indicates that this command was not called from a background job thread, and +was instead spawned by main nushell execution thread." + } + + fn signature(&self) -> nu_protocol::Signature { + Signature::build("job id") + .category(Category::Experimental) + .input_output_types(vec![(Type::Nothing, Type::Int)]) + } + + fn search_terms(&self) -> Vec<&str> { + vec!["self", "this", "my-id", "this-id"] + } + + fn run( + &self, + engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + let head = call.head; + + Ok(Value::int(engine_state.current_job.id.get() as i64, head).into_pipeline_data()) + } + + fn examples(&self) -> Vec { + vec![Example { + example: "job id", + description: "Get id of current job", + result: None, + }] + } +} diff --git a/crates/nu-command/src/experimental/job_recv.rs b/crates/nu-command/src/experimental/job_recv.rs new file mode 100644 index 0000000000..a19a298620 --- /dev/null +++ b/crates/nu-command/src/experimental/job_recv.rs @@ -0,0 +1,181 @@ +use std::{ + sync::mpsc::{RecvTimeoutError, TryRecvError}, + time::{Duration, Instant}, +}; + +use nu_engine::command_prelude::*; + +use nu_protocol::{ + engine::{FilterTag, Mailbox}, + Signals, +}; + +#[derive(Clone)] +pub struct JobRecv; + +const CTRL_C_CHECK_INTERVAL: Duration = Duration::from_millis(100); + +impl Command for JobRecv { + fn name(&self) -> &str { + "job recv" + } + + fn description(&self) -> &str { + "Read a message from the mailbox." + } + + fn extra_description(&self) -> &str { + r#"When messages are sent to the current process, they get stored in what is called the "mailbox". +This commands reads and returns a message from the mailbox, in a first-in-first-out fashion. +j +Messages may have numeric flags attached to them. This commands supports filtering out messages that do not satisfy a given tag, by using the `tag` flag. +If no tag is specified, this command will accept any message. + +If no message with the specified tag (if any) is available in the mailbox, this command will block the current thread until one arrives. +By default this command block indefinitely until a matching message arrives, but a timeout duration can be specified. +If a timeout duration of zero is specified, it will succeed only if there already is a message in the mailbox. + +Note: When using par-each, only one thread at a time can utilize this command. +In the case of two or more threads running this command, they will wait until other threads are done using it, +in no particular order, regardless of the specified timeout parameter. +"# + } + + fn signature(&self) -> nu_protocol::Signature { + Signature::build("job recv") + .category(Category::Experimental) + .named("tag", SyntaxShape::Int, "A tag for the message", None) + .named( + "timeout", + SyntaxShape::Duration, + "The maximum time duration to wait for.", + None, + ) + .input_output_types(vec![(Type::Nothing, Type::Any)]) + .allow_variants_without_examples(true) + } + + fn search_terms(&self) -> Vec<&str> { + vec!["receive"] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + let head = call.head; + + let tag_arg: Option> = call.get_flag(engine_state, stack, "tag")?; + + if let Some(tag) = tag_arg { + if tag.item < 0 { + return Err(ShellError::NeedsPositiveValue { span: tag.span }); + } + } + + let tag = tag_arg.map(|it| it.item as FilterTag); + + let duration: Option = call.get_flag(engine_state, stack, "timeout")?; + + let timeout = duration.map(|it| Duration::from_nanos(it as u64)); + + let mut mailbox = engine_state + .current_job + .mailbox + .lock() + .expect("failed to acquire lock"); + + if let Some(timeout) = timeout { + if timeout == Duration::ZERO { + recv_instantly(&mut mailbox, tag, head) + } else { + recv_with_time_limit(&mut mailbox, tag, engine_state.signals(), head, timeout) + } + } else { + recv_without_time_limit(&mut mailbox, tag, engine_state.signals(), head) + } + } + + fn examples(&self) -> Vec { + vec![ + Example { + example: "job recv", + description: "Block the current thread while no message arrives", + result: None, + }, + Example { + example: "job recv --timeout 10sec", + description: "Receive a message, wait for at most 10 seconds.", + result: None, + }, + Example { + example: "job recv --timeout 0sec", + description: "Get a message or fail if no message is available immediately", + result: None, + }, + ] + } +} + +fn recv_without_time_limit( + mailbox: &mut Mailbox, + tag: Option, + signals: &Signals, + span: Span, +) -> Result { + loop { + if signals.interrupted() { + return Err(ShellError::Interrupted { span }); + } + match mailbox.recv_timeout(tag, CTRL_C_CHECK_INTERVAL) { + Ok(value) => return Ok(value), + Err(RecvTimeoutError::Timeout) => {} // try again + Err(RecvTimeoutError::Disconnected) => return Err(ShellError::Interrupted { span }), + } + } +} + +fn recv_instantly( + mailbox: &mut Mailbox, + tag: Option, + span: Span, +) -> Result { + match mailbox.try_recv(tag) { + Ok(value) => Ok(value), + Err(TryRecvError::Empty) => Err(ShellError::RecvTimeout { span }), + Err(TryRecvError::Disconnected) => Err(ShellError::Interrupted { span }), + } +} + +fn recv_with_time_limit( + mailbox: &mut Mailbox, + tag: Option, + signals: &Signals, + span: Span, + timeout: Duration, +) -> Result { + let deadline = Instant::now() + timeout; + + loop { + if signals.interrupted() { + return Err(ShellError::Interrupted { span }); + } + + let time_until_deadline = deadline.saturating_duration_since(Instant::now()); + + let time_to_sleep = time_until_deadline.min(CTRL_C_CHECK_INTERVAL); + + match mailbox.recv_timeout(tag, time_to_sleep) { + Ok(value) => return Ok(value), + Err(RecvTimeoutError::Timeout) => {} // try again + Err(RecvTimeoutError::Disconnected) => return Err(ShellError::Interrupted { span }), + } + + if time_until_deadline.is_zero() { + return Err(ShellError::RecvTimeout { span }); + } + } +} diff --git a/crates/nu-command/src/experimental/job_send.rs b/crates/nu-command/src/experimental/job_send.rs new file mode 100644 index 0000000000..08495d5261 --- /dev/null +++ b/crates/nu-command/src/experimental/job_send.rs @@ -0,0 +1,112 @@ +use nu_engine::command_prelude::*; +use nu_protocol::{engine::FilterTag, JobId}; + +#[derive(Clone)] +pub struct JobSend; + +impl Command for JobSend { + fn name(&self) -> &str { + "job send" + } + + fn description(&self) -> &str { + "Send a message to the mailbox of a job." + } + + fn extra_description(&self) -> &str { + r#" +This command sends a message to a background job, which can then read sent messages +in a first-in-first-out fashion with `job recv`. When it does so, it may additionally specify a numeric filter tag, +in which case it will only read messages sent with the exact same filter tag. +In particular, the id 0 refers to the main/initial nushell thread. + +A message can be any nushell value, and streams are always collected before being sent. + +This command never blocks. +"# + } + + fn signature(&self) -> nu_protocol::Signature { + Signature::build("job send") + .category(Category::Experimental) + .required( + "id", + SyntaxShape::Int, + "The id of the job to send the message to.", + ) + .named("tag", SyntaxShape::Int, "A tag for the message", None) + .input_output_types(vec![(Type::Any, Type::Nothing)]) + .allow_variants_without_examples(true) + } + + fn search_terms(&self) -> Vec<&str> { + vec![] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let head = call.head; + + let id_arg: Spanned = call.req(engine_state, stack, 0)?; + let tag_arg: Option> = call.get_flag(engine_state, stack, "tag")?; + + let id = id_arg.item; + + if id < 0 { + return Err(ShellError::NeedsPositiveValue { span: id_arg.span }); + } + + if let Some(tag) = tag_arg { + if tag.item < 0 { + return Err(ShellError::NeedsPositiveValue { span: tag.span }); + } + } + + let tag = tag_arg.map(|it| it.item as FilterTag); + + if id == 0 { + engine_state + .root_job_sender + .send((tag, input)) + .expect("this should NEVER happen."); + } else { + let jobs = engine_state.jobs.lock().expect("failed to acquire lock"); + + if let Some(job) = jobs.lookup(JobId::new(id as usize)) { + match job { + nu_protocol::engine::Job::Thread(thread_job) => { + // it is ok to send this value while holding the lock, because + // mail channels are always unbounded, so this send never blocks + let _ = thread_job.sender.send((tag, input)); + } + nu_protocol::engine::Job::Frozen(_) => { + return Err(ShellError::JobIsFrozen { + id: id as usize, + span: id_arg.span, + }); + } + } + } else { + return Err(ShellError::JobNotFound { + id: id as usize, + span: id_arg.span, + }); + } + } + + Ok(Value::nothing(head).into_pipeline_data()) + } + + fn examples(&self) -> Vec { + vec![Example { + example: "let id = job spawn { job recv | save sent.txt }; 'hi' | job send $id", + description: "Send a message to a newly spawned job", + result: None, + }] + } +} diff --git a/crates/nu-command/src/experimental/job_spawn.rs b/crates/nu-command/src/experimental/job_spawn.rs index 37203aafb9..09ff929a8a 100644 --- a/crates/nu-command/src/experimental/job_spawn.rs +++ b/crates/nu-command/src/experimental/job_spawn.rs @@ -1,14 +1,14 @@ use std::{ sync::{ atomic::{AtomicBool, AtomicU32}, - Arc, + mpsc, Arc, Mutex, }, thread, }; use nu_engine::{command_prelude::*, ClosureEvalOnce}; use nu_protocol::{ - engine::{Closure, Job, Redirection, ThreadJob}, + engine::{Closure, CurrentJob, Job, Mailbox, Redirection, ThreadJob}, report_shell_error, OutDest, Signals, }; @@ -57,12 +57,11 @@ impl Command for JobSpawn { let closure: Closure = call.req(engine_state, stack, 0)?; let tag: Option = call.get_flag(engine_state, stack, "tag")?; + let job_stack = stack.clone(); let mut job_state = engine_state.clone(); job_state.is_interactive = false; - let job_stack = stack.clone(); - // the new job should have its ctrl-c independent of foreground let job_signals = Signals::new(Arc::new(AtomicBool::new(false))); job_state.set_signals(job_signals.clone()); @@ -75,10 +74,20 @@ impl Command for JobSpawn { let jobs = job_state.jobs.clone(); let mut jobs = jobs.lock().expect("jobs lock is poisoned!"); + let (send, recv) = mpsc::channel(); + let id = { - let thread_job = ThreadJob::new(job_signals, tag); - job_state.current_thread_job = Some(thread_job.clone()); - jobs.add_job(Job::Thread(thread_job)) + let thread_job = ThreadJob::new(job_signals, tag, send); + + let id = jobs.add_job(Job::Thread(thread_job.clone())); + + job_state.current_job = CurrentJob { + id, + background_thread_job: Some(thread_job), + mailbox: Arc::new(Mutex::new(Mailbox::new(recv))), + }; + + id }; let result = thread::Builder::new() diff --git a/crates/nu-command/src/experimental/job_unfreeze.rs b/crates/nu-command/src/experimental/job_unfreeze.rs index 67fb3c96a1..3143b31184 100644 --- a/crates/nu-command/src/experimental/job_unfreeze.rs +++ b/crates/nu-command/src/experimental/job_unfreeze.rs @@ -118,7 +118,7 @@ fn unfreeze_job( }) => { let pid = handle.pid(); - if let Some(thread_job) = &state.current_thread_job { + if let Some(thread_job) = &state.current_thread_job() { if !thread_job.try_add_pid(pid) { kill_by_pid(pid.into()).map_err(|err| { ShellError::Io(IoError::new_internal( @@ -136,7 +136,7 @@ fn unfreeze_job( .then(|| state.pipeline_externals_state.clone()), ); - if let Some(thread_job) = &state.current_thread_job { + if let Some(thread_job) = &state.current_thread_job() { thread_job.remove_pid(pid); } diff --git a/crates/nu-command/src/experimental/mod.rs b/crates/nu-command/src/experimental/mod.rs index f98d123218..9c695d9116 100644 --- a/crates/nu-command/src/experimental/mod.rs +++ b/crates/nu-command/src/experimental/mod.rs @@ -1,5 +1,6 @@ mod is_admin; mod job; +mod job_id; mod job_kill; mod job_list; mod job_spawn; @@ -8,12 +9,27 @@ mod job_tag; #[cfg(all(unix, feature = "os"))] mod job_unfreeze; +#[cfg(not(target_family = "wasm"))] +mod job_flush; +#[cfg(not(target_family = "wasm"))] +mod job_recv; +#[cfg(not(target_family = "wasm"))] +mod job_send; + pub use is_admin::IsAdmin; pub use job::Job; +pub use job_id::JobId; pub use job_kill::JobKill; pub use job_list::JobList; pub use job_spawn::JobSpawn; pub use job_tag::JobTag; +#[cfg(not(target_family = "wasm"))] +pub use job_flush::JobFlush; +#[cfg(not(target_family = "wasm"))] +pub use job_recv::JobRecv; +#[cfg(not(target_family = "wasm"))] +pub use job_send::JobSend; + #[cfg(all(unix, feature = "os"))] pub use job_unfreeze::JobUnfreeze; diff --git a/crates/nu-command/src/system/run_external.rs b/crates/nu-command/src/system/run_external.rs index 8331c9f284..180238f3f2 100644 --- a/crates/nu-command/src/system/run_external.rs +++ b/crates/nu-command/src/system/run_external.rs @@ -285,7 +285,7 @@ impl Command for External { ) })?; - if let Some(thread_job) = &engine_state.current_thread_job { + if let Some(thread_job) = engine_state.current_thread_job() { if !thread_job.try_add_pid(child.pid()) { kill_by_pid(child.pid().into()).map_err(|err| { ShellError::Io(IoError::new_internal( diff --git a/crates/nu-command/tests/commands/job.rs b/crates/nu-command/tests/commands/job.rs index 10b8346edd..608ca318c8 100644 --- a/crates/nu-command/tests/commands/job.rs +++ b/crates/nu-command/tests/commands/job.rs @@ -1,22 +1,188 @@ -use nu_test_support::{nu, playground::Playground}; +use nu_test_support::nu; #[test] -fn jobs_do_run() { - Playground::setup("job_test_1", |dirs, sandbox| { - sandbox.with_files(&[]); +fn job_send_root_job_works() { + let actual = nu!(r#" + job spawn { 'beep' | job send 0 } + job recv --timeout 10sec"#); - let actual = nu!( - cwd: dirs.root(), - r#" - rm -f a.txt; - job spawn { sleep 200ms; 'a' | save a.txt }; - let before = 'a.txt' | path exists; - sleep 400ms; - let after = 'a.txt' | path exists; - [$before, $after] | to nuon"# - ); - assert_eq!(actual.out, "[false, true]"); - }) + assert_eq!(actual.out, "beep"); +} + +#[test] +fn job_send_background_job_works() { + let actual = nu!(r#" + let job = job spawn { job recv | job send 0 } + 'boop' | job send $job + job recv --timeout 10sec"#); + + assert_eq!(actual.out, "boop"); +} + +#[test] +fn job_send_to_self_works() { + let actual = nu!(r#" + "meep" | job send 0 + job recv"#); + + assert_eq!(actual.out, "meep"); +} + +#[test] +fn job_send_to_self_from_background_works() { + let actual = nu!(r#" + job spawn { + 'beep' | job send (job id) + job recv | job send 0 + } + + job recv --timeout 10sec"#); + + assert_eq!(actual.out, "beep"); +} + +#[test] +fn job_id_of_root_job_is_zero() { + let actual = nu!(r#"job id"#); + + assert_eq!(actual.out, "0"); +} + +#[test] +fn job_id_of_background_jobs_works() { + let actual = nu!(r#" + let job1 = job spawn { job id | job send 0 } + let id1 = job recv --timeout 5sec + + let job2 = job spawn { job id | job send 0 } + let id2 = job recv --timeout 5sec + + let job3 = job spawn { job id | job send 0 } + let id3 = job recv --timeout 5sec + + [($job1 == $id1) ($job2 == $id2) ($job3 == $id3)] | to nuon + + "#); + + assert_eq!(actual.out, "[true, true, true]"); +} + +#[test] +fn untagged_job_recv_accepts_tagged_messages() { + let actual = nu!(r#" + job spawn { "boop" | job send 0 --tag 123 } + job recv --timeout 10sec + "#); + + assert_eq!(actual.out, "boop"); +} + +#[test] +fn tagged_job_recv_filters_untagged_messages() { + let actual = nu!(r#" + job spawn { "boop" | job send 0 } + job recv --tag 123 --timeout 1sec + "#); + + assert_eq!(actual.out, ""); + assert!(actual.err.contains("timeout")); +} + +#[test] +fn tagged_job_recv_filters_badly_tagged_messages() { + let actual = nu!(r#" + job spawn { "boop" | job send 0 --tag 321 } + job recv --tag 123 --timeout 1sec + "#); + + assert_eq!(actual.out, ""); + assert!(actual.err.contains("timeout")); +} + +#[test] +fn tagged_job_recv_accepts_properly_tagged_messages() { + let actual = nu!(r#" + job spawn { "boop" | job send 0 --tag 123 } + job recv --tag 123 --timeout 5sec + "#); + + assert_eq!(actual.out, "boop"); +} + +#[test] +fn filtered_messages_are_not_erased() { + let actual = nu!(r#" + "msg1" | job send 0 --tag 123 + "msg2" | job send 0 --tag 456 + "msg3" | job send 0 --tag 789 + + let first = job recv --tag 789 --timeout 5sec + let second = job recv --timeout 1sec + let third = job recv --timeout 1sec + + + [($first) ($second) ($third)] | to nuon + "#); + + assert_eq!(actual.out, r#"["msg3", "msg1", "msg2"]"#); +} + +#[test] +fn job_recv_timeout_works() { + let actual = nu!(r#" + job spawn { + sleep 2sec + "boop" | job send 0 + } + + job recv --timeout 1sec + "#); + + assert_eq!(actual.out, ""); + assert!(actual.err.contains("timeout")); +} + +#[test] +fn job_recv_timeout_zero_works() { + let actual = nu!(r#" + "hi there" | job send 0 + job recv --timeout 0sec + "#); + + assert_eq!(actual.out, "hi there"); +} + +#[test] +fn job_flush_clears_messages() { + let actual = nu!(r#" + "SALE!!!" | job send 0 + "[HYPERLINK BLOCKED]" | job send 0 + + job flush + + job recv --timeout 1sec + "#); + + assert_eq!(actual.out, ""); + assert!(actual.err.contains("timeout")); +} + +#[test] +fn job_flush_clears_filtered_messages() { + let actual = nu!(r#" + "msg1" | job send 0 --tag 123 + "msg2" | job send 0 --tag 456 + "msg3" | job send 0 --tag 789 + + job recv --tag 789 --timeout 1sec + + job flush + + job recv --timeout 1sec + "#); + + assert_eq!(actual.out, ""); + assert!(actual.err.contains("timeout")); } #[test] @@ -31,11 +197,11 @@ fn job_list_adds_jobs_correctly() { let actual = nu!(format!( r#" let list0 = job list | get id; - let job1 = job spawn {{ sleep 20ms }}; + let job1 = job spawn {{ job recv }}; let list1 = job list | get id; - let job2 = job spawn {{ sleep 20ms }}; + let job2 = job spawn {{ job recv }}; let list2 = job list | get id; - let job3 = job spawn {{ sleep 20ms }}; + let job3 = job spawn {{ job recv }}; let list3 = job list | get id; [({}), ({}), ({}), ({})] | to nuon "#, @@ -52,11 +218,13 @@ fn job_list_adds_jobs_correctly() { fn jobs_get_removed_from_list_after_termination() { let actual = nu!(format!( r#" - let job = job spawn {{ sleep 0.5sec }}; + let job = job spawn {{ job recv }}; let list0 = job list | get id; - sleep 1sec + "die!" | job send $job + + sleep 0.2sec let list1 = job list | get id; @@ -68,6 +236,8 @@ fn jobs_get_removed_from_list_after_termination() { assert_eq!(actual.out, "[true, true]"); } +// TODO: find way to communicate between process in windows +// so these tests can fail less often #[test] fn job_list_shows_pids() { let actual = nu!(format!( @@ -89,9 +259,9 @@ fn job_list_shows_pids() { fn killing_job_removes_it_from_table() { let actual = nu!(format!( r#" - let job1 = job spawn {{ sleep 100ms }} - let job2 = job spawn {{ sleep 100ms }} - let job3 = job spawn {{ sleep 100ms }} + let job1 = job spawn {{ job recv }} + let job2 = job spawn {{ job recv }} + let job3 = job spawn {{ job recv }} let list_before = job list | get id diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index 89572c9a85..3099b33f20 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -8,9 +8,9 @@ use crate::{ }, eval_const::create_nu_constant, shell_error::io::IoError, - BlockId, Category, Config, DeclId, FileId, GetSpan, Handlers, HistoryConfig, Module, ModuleId, - OverlayId, ShellError, SignalAction, Signals, Signature, Span, SpanId, Type, Value, VarId, - VirtualPathId, + BlockId, Category, Config, DeclId, FileId, GetSpan, Handlers, HistoryConfig, JobId, Module, + ModuleId, OverlayId, ShellError, SignalAction, Signals, Signature, Span, SpanId, Type, Value, + VarId, VirtualPathId, }; use fancy_regex::Regex; use lru::LruCache; @@ -22,6 +22,8 @@ use std::{ path::PathBuf, sync::{ atomic::{AtomicBool, AtomicU32, Ordering}, + mpsc::channel, + mpsc::Sender, Arc, Mutex, MutexGuard, PoisonError, }, }; @@ -31,7 +33,7 @@ type PoisonDebuggerError<'a> = PoisonError>>; #[cfg(feature = "plugin")] use crate::{PluginRegistryFile, PluginRegistryItem, RegisteredPlugin}; -use super::{Jobs, ThreadJob}; +use super::{CurrentJob, Jobs, Mail, Mailbox, ThreadJob}; #[derive(Clone, Debug)] pub enum VirtualPath { @@ -117,7 +119,9 @@ pub struct EngineState { pub jobs: Arc>, // The job being executed with this engine state, or None if main thread - pub current_thread_job: Option, + pub current_job: CurrentJob, + + pub root_job_sender: Sender, // When there are background jobs running, the interactive behavior of `exit` changes depending on // the value of this flag: @@ -141,6 +145,8 @@ pub const UNKNOWN_SPAN_ID: SpanId = SpanId::new(0); impl EngineState { pub fn new() -> Self { + let (send, recv) = channel::(); + Self { files: vec![], virtual_paths: vec![], @@ -196,7 +202,12 @@ impl EngineState { is_debugging: IsDebugging::new(false), debugger: Arc::new(Mutex::new(Box::new(NoopDebugger))), jobs: Arc::new(Mutex::new(Jobs::default())), - current_thread_job: None, + current_job: CurrentJob { + id: JobId::new(0), + background_thread_job: None, + mailbox: Arc::new(Mutex::new(Mailbox::new(recv))), + }, + root_job_sender: send, exit_warning_given: Arc::new(AtomicBool::new(false)), } } @@ -1081,7 +1092,12 @@ impl EngineState { // Determines whether the current state is being held by a background job pub fn is_background_job(&self) -> bool { - self.current_thread_job.is_some() + self.current_job.background_thread_job.is_some() + } + + // Gets the thread job entry + pub fn current_thread_job(&self) -> Option<&ThreadJob> { + self.current_job.background_thread_job.as_ref() } } diff --git a/crates/nu-protocol/src/engine/jobs.rs b/crates/nu-protocol/src/engine/jobs.rs index 8e64e46f7f..71c18a4c83 100644 --- a/crates/nu-protocol/src/engine/jobs.rs +++ b/crates/nu-protocol/src/engine/jobs.rs @@ -1,11 +1,17 @@ use std::{ - collections::{HashMap, HashSet}, - sync::{Arc, Mutex}, + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, + sync::{ + mpsc::{Receiver, RecvTimeoutError, Sender, TryRecvError}, + Arc, Mutex, + }, }; +#[cfg(not(target_family = "wasm"))] +use std::time::{Duration, Instant}; + use nu_system::{kill_by_pid, UnfreezeHandle}; -use crate::Signals; +use crate::{PipelineData, Signals}; use crate::JobId; @@ -139,13 +145,15 @@ pub struct ThreadJob { signals: Signals, pids: Arc>>, tag: Option, + pub sender: Sender, } impl ThreadJob { - pub fn new(signals: Signals, tag: Option) -> Self { + pub fn new(signals: Signals, tag: Option, sender: Sender) -> Self { ThreadJob { signals, pids: Arc::new(Mutex::new(HashSet::default())), + sender, tag, } } @@ -238,3 +246,160 @@ impl FrozenJob { } } } + +/// Stores the information about the background job currently being executed by this thread, if any +#[derive(Clone)] +pub struct CurrentJob { + pub id: JobId, + + // The background thread job associated with this thread. + // If None, it indicates this thread is currently the main job + pub background_thread_job: Option, + + // note: although the mailbox is Mutex'd, it is only ever accessed + // by the current job's threads + pub mailbox: Arc>, +} + +// The storage for unread messages +// +// Messages are initially sent over a mpsc channel, +// and may then be stored in a IgnoredMail struct when +// filtered out by a tag. +pub struct Mailbox { + receiver: Receiver, + ignored_mail: IgnoredMail, +} + +impl Mailbox { + pub fn new(receiver: Receiver) -> Self { + Mailbox { + receiver, + ignored_mail: IgnoredMail::default(), + } + } + + #[cfg(not(target_family = "wasm"))] + pub fn recv_timeout( + &mut self, + filter_tag: Option, + timeout: Duration, + ) -> Result { + if let Some(value) = self.ignored_mail.pop(filter_tag) { + Ok(value) + } else { + let mut waited_so_far = Duration::ZERO; + let mut before = Instant::now(); + + while waited_so_far < timeout { + let (tag, value) = self.receiver.recv_timeout(timeout - waited_so_far)?; + + if filter_tag.is_none() || filter_tag == tag { + return Ok(value); + } else { + self.ignored_mail.add((tag, value)); + let now = Instant::now(); + waited_so_far += now - before; + before = now; + } + } + + Err(RecvTimeoutError::Timeout) + } + } + + #[cfg(not(target_family = "wasm"))] + pub fn try_recv( + &mut self, + filter_tag: Option, + ) -> Result { + if let Some(value) = self.ignored_mail.pop(filter_tag) { + Ok(value) + } else { + loop { + let (tag, value) = self.receiver.try_recv()?; + + if filter_tag.is_none() || filter_tag == tag { + return Ok(value); + } else { + self.ignored_mail.add((tag, value)); + } + } + } + } + + pub fn clear(&mut self) { + self.ignored_mail.clear(); + + while self.receiver.try_recv().is_ok() {} + } +} + +// A data structure used to store messages which were received, but currently ignored by a tag filter +// messages are added and popped in a first-in-first-out matter. +#[derive(Default)] +struct IgnoredMail { + next_id: usize, + messages: BTreeMap, + by_tag: HashMap>, +} + +pub type FilterTag = u64; +pub type Mail = (Option, PipelineData); + +impl IgnoredMail { + pub fn add(&mut self, (tag, value): Mail) { + let id = self.next_id; + self.next_id += 1; + + self.messages.insert(id, (tag, value)); + + if let Some(tag) = tag { + self.by_tag.entry(tag).or_default().insert(id); + } + } + + pub fn pop(&mut self, tag: Option) -> Option { + if let Some(tag) = tag { + self.pop_oldest_with_tag(tag) + } else { + self.pop_oldest() + } + } + + pub fn clear(&mut self) { + self.messages.clear(); + self.by_tag.clear(); + } + + fn pop_oldest(&mut self) -> Option { + let (id, (tag, value)) = self.messages.pop_first()?; + + if let Some(tag) = tag { + let needs_cleanup = if let Some(ids) = self.by_tag.get_mut(&tag) { + ids.remove(&id); + ids.is_empty() + } else { + false + }; + + if needs_cleanup { + self.by_tag.remove(&tag); + } + } + + Some(value) + } + + fn pop_oldest_with_tag(&mut self, tag: FilterTag) -> Option { + let ids = self.by_tag.get_mut(&tag)?; + + let id = ids.pop_first()?; + + if ids.is_empty() { + self.by_tag.remove(&tag); + } + + Some(self.messages.remove(&id)?.1) + } +} diff --git a/crates/nu-protocol/src/errors/shell_error/mod.rs b/crates/nu-protocol/src/errors/shell_error/mod.rs index 84c23cd208..9ff49f36d8 100644 --- a/crates/nu-protocol/src/errors/shell_error/mod.rs +++ b/crates/nu-protocol/src/errors/shell_error/mod.rs @@ -1370,7 +1370,7 @@ On Windows, this would be %USERPROFILE%\AppData\Roaming"# #[error("Job {id} is not frozen")] #[diagnostic( - code(nu::shell::os_disabled), + code(nu::shell::job_not_frozen), help("You tried to unfreeze a job which is not frozen") )] JobNotFrozen { @@ -1379,6 +1379,27 @@ On Windows, this would be %USERPROFILE%\AppData\Roaming"# span: Span, }, + #[error("The job {id} is frozen")] + #[diagnostic( + code(nu::shell::job_is_frozen), + help("This operation cannot be performed because the job is frozen") + )] + JobIsFrozen { + id: usize, + #[label = "This job is frozen"] + span: Span, + }, + + #[error("No message was received in the requested time interval")] + #[diagnostic( + code(nu::shell::recv_timeout), + help("No message arrived within the specified time limit") + )] + RecvTimeout { + #[label = "timeout"] + span: Span, + }, + #[error(transparent)] #[diagnostic(transparent)] ChainedError(ChainedError), diff --git a/crates/nu-protocol/src/process/child.rs b/crates/nu-protocol/src/process/child.rs index 069cb71204..b431a11df8 100644 --- a/crates/nu-protocol/src/process/child.rs +++ b/crates/nu-protocol/src/process/child.rs @@ -194,7 +194,7 @@ impl PostWaitCallback { child_pid: Option, tag: Option, ) -> Self { - let this_job = engine_state.current_thread_job.clone(); + let this_job = engine_state.current_thread_job().cloned(); let jobs = engine_state.jobs.clone(); let is_interactive = engine_state.is_interactive;