From f82c43f850e966d826facb74ca25210dd51292a6 Mon Sep 17 00:00:00 2001 From: Bruce Weirdan Date: Wed, 31 Jul 2024 23:35:41 +0200 Subject: [PATCH] Consider numbers to be part of a word in `split words` (#13502) # Description Before this change, `"hash sha256 123 ok" | split words` would return `[hash sha ok]` - which is surprising to say the least. Now it will return `[hash sha256 123 ok]`. Refs: https://discord.com/channels/601130461678272522/615253963645911060/1268151658572025856 # User-Facing Changes `split words` will no longer remove digits. # Tests + Formatting Added a test for this specific case. # After Submitting --- crates/nu-command/src/strings/split/words.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/nu-command/src/strings/split/words.rs b/crates/nu-command/src/strings/split/words.rs index 6cb5562a70..b95a06f65c 100644 --- a/crates/nu-command/src/strings/split/words.rs +++ b/crates/nu-command/src/strings/split/words.rs @@ -187,7 +187,7 @@ fn split_words_helper(v: &Value, word_length: Option, span: Span, graphem // [^[:alpha:]\'] = do not match any uppercase or lowercase letters or apostrophes // [^\p{L}\'] = do not match any unicode uppercase or lowercase letters or apostrophes // Let's go with the unicode one in hopes that it works on more than just ascii characters - let regex_replace = Regex::new(r"[^\p{L}\']").expect("regular expression error"); + let regex_replace = Regex::new(r"[^\p{L}\p{N}\']").expect("regular expression error"); let v_span = v.span(); match v { @@ -422,4 +422,9 @@ mod test { test_examples(SubCommand {}) } + #[test] + fn mixed_letter_number() { + let actual = nu!(r#"echo "a1 b2 c3" | split words | str join ','"#); + assert_eq!(actual.out, "a1,b2,c3"); + } }