From 33d0537cae506b1cf1454ff3a5c9b15336e0fb6a Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Sat, 29 Jun 2024 16:12:34 -0500 Subject: [PATCH] add `str deunicode` command (#13270) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Sometimes it's helpful to deal with only ASCII. This command will take a unicode string as input and convert it to ASCII using the deunicode crate. ```nushell ❯ "A…B" | str deunicode A...B ``` # User-Facing Changes # Tests + Formatting # After Submitting --- Cargo.lock | 9 +- Cargo.toml | 1 + crates/nu-command/Cargo.toml | 1 + crates/nu-command/src/default_context.rs | 1 + .../nu-command/src/strings/str_/deunicode.rs | 98 +++++++++++++++++++ crates/nu-command/src/strings/str_/mod.rs | 2 + 6 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 crates/nu-command/src/strings/str_/deunicode.rs diff --git a/Cargo.lock b/Cargo.lock index b82951b2b7..e2652a4178 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -377,7 +377,7 @@ dependencies = [ "bitflags 2.5.0", "cexpr", "clang-sys", - "itertools 0.11.0", + "itertools 0.12.1", "lazy_static", "lazycell", "proc-macro2", @@ -1148,6 +1148,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "deunicode" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00" + [[package]] name = "dialoguer" version = "0.11.0" @@ -2967,6 +2973,7 @@ dependencies = [ "chrono-tz 0.8.6", "crossterm", "csv", + "deunicode", "dialoguer", "digest", "dirs-next", diff --git a/Cargo.toml b/Cargo.toml index c88eee88ea..f79476e7e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,7 @@ crossbeam-channel = "0.5.8" crossterm = "0.27" csv = "1.3" ctrlc = "3.4" +deunicode = "1.6.0" dialoguer = { default-features = false, version = "0.11" } digest = { default-features = false, version = "0.10" } dirs-next = "2.0" diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 474742ae0a..9c49a69ff5 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -42,6 +42,7 @@ chrono-humanize = { workspace = true } chrono-tz = { workspace = true } crossterm = { workspace = true } csv = { workspace = true } +deunicode = { workspace = true } dialoguer = { workspace = true, default-features = false, features = ["fuzzy-select"] } digest = { workspace = true, default-features = false } dtparse = { workspace = true } diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 0901d56588..847d2349ed 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -189,6 +189,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState { Str, StrCapitalize, StrContains, + StrDeunicode, StrDistance, StrDowncase, StrEndswith, diff --git a/crates/nu-command/src/strings/str_/deunicode.rs b/crates/nu-command/src/strings/str_/deunicode.rs new file mode 100644 index 0000000000..0b70cf2003 --- /dev/null +++ b/crates/nu-command/src/strings/str_/deunicode.rs @@ -0,0 +1,98 @@ +use deunicode::deunicode; +use nu_cmd_base::input_handler::{operate, CellPathOnlyArgs}; +use nu_engine::command_prelude::*; +use nu_protocol::engine::StateWorkingSet; + +#[derive(Clone)] +pub struct SubCommand; + +impl Command for SubCommand { + fn name(&self) -> &str { + "str deunicode" + } + + fn signature(&self) -> Signature { + Signature::build("str deunicode") + .input_output_types(vec![(Type::String, Type::String)]) + .category(Category::Strings) + } + + fn usage(&self) -> &str { + "Convert Unicode string to pure ASCII." + } + + fn search_terms(&self) -> Vec<&str> { + vec!["convert", "ascii"] + } + + fn is_const(&self) -> bool { + true + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let cell_paths: Vec = call.rest(engine_state, stack, 0)?; + let args = CellPathOnlyArgs::from(cell_paths); + + operate(action, args, input, call.head, engine_state.ctrlc.clone()) + } + + fn run_const( + &self, + working_set: &StateWorkingSet, + call: &Call, + input: PipelineData, + ) -> Result { + let cell_paths: Vec = call.rest_const(working_set, 0)?; + let args = CellPathOnlyArgs::from(cell_paths); + + operate( + action, + args, + input, + call.head, + working_set.permanent().ctrlc.clone(), + ) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "deunicode a string", + example: "'A…B' | str deunicode", + result: Some(Value::test_string("A...B")), + }] + } +} + +fn action(input: &Value, _args: &CellPathOnlyArgs, head: Span) -> Value { + match input { + Value::String { val, .. } => Value::string(deunicode(val), head), + Value::Error { .. } => input.clone(), + _ => Value::error( + ShellError::OnlySupportsThisInputType { + exp_input_type: "string".into(), + wrong_type: input.get_type().to_string(), + dst_span: head, + src_span: input.span(), + }, + head, + ), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(SubCommand {}) + } +} diff --git a/crates/nu-command/src/strings/str_/mod.rs b/crates/nu-command/src/strings/str_/mod.rs index fb34ace833..b9290ada2c 100644 --- a/crates/nu-command/src/strings/str_/mod.rs +++ b/crates/nu-command/src/strings/str_/mod.rs @@ -1,5 +1,6 @@ mod case; mod contains; +mod deunicode; mod distance; mod ends_with; mod expand; @@ -15,6 +16,7 @@ mod trim; pub use case::*; pub use contains::SubCommand as StrContains; +pub use deunicode::SubCommand as StrDeunicode; pub use distance::SubCommand as StrDistance; pub use ends_with::SubCommand as StrEndswith; pub use expand::SubCommand as StrExpand;