From 6fbc76bc0f9a0105b2f5b86244718cc57f70614a Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Tue, 23 Aug 2022 08:53:14 -0500 Subject: [PATCH] add edit distance/levenshtein command (#6383) * add edit distance/levenshtein command * change output to a record * update test --- crates/nu-command/src/default_context.rs | 1 + .../nu-command/src/strings/str_/distance.rs | 134 ++++++++++++++++++ crates/nu-command/src/strings/str_/mod.rs | 2 + 3 files changed, 137 insertions(+) create mode 100644 crates/nu-command/src/strings/str_/distance.rs diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 17b38828c..06e38b925 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -190,6 +190,7 @@ pub fn create_default_context() -> EngineState { StrCapitalize, StrCollect, StrContains, + StrDistance, StrDowncase, StrEndswith, StrReplace, diff --git a/crates/nu-command/src/strings/str_/distance.rs b/crates/nu-command/src/strings/str_/distance.rs new file mode 100644 index 000000000..a7acaaa30 --- /dev/null +++ b/crates/nu-command/src/strings/str_/distance.rs @@ -0,0 +1,134 @@ +use nu_engine::CallExt; +use nu_protocol::{ + ast::{Call, CellPath}, + engine::{Command, EngineState, Stack}, + levenshtein_distance, Category, Example, PipelineData, ShellError, Signature, Span, Spanned, + SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct SubCommand; + +impl Command for SubCommand { + fn name(&self) -> &str { + "str distance" + } + + fn signature(&self) -> Signature { + Signature::build("str distance") + .required( + "compare-string", + SyntaxShape::String, + "the first string to compare", + ) + .rest( + "rest", + SyntaxShape::CellPath, + "optionally check if string contains pattern by column paths", + ) + .category(Category::Strings) + } + + fn usage(&self) -> &str { + "compare to strings and return the edit distance/levenshtein distance" + } + + fn search_terms(&self) -> Vec<&str> { + vec!["edit", "distance", "levenshtein"] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + operate(engine_state, stack, call, input) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "get the edit distance between two strings", + example: "'nushell' | str distance 'nutshell'", + result: Some(Value::Record { + cols: vec!["distance".to_string()], + vals: vec![Value::Int { + val: 1, + span: Span::test_data(), + }], + span: Span::test_data(), + }), + }] + } +} + +fn operate( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let head = call.head; + let compare_string: Spanned = call.req(engine_state, stack, 0)?; + let column_paths: Vec = call.rest(engine_state, stack, 1)?; + + input.map( + move |v| { + if column_paths.is_empty() { + action(&v, &compare_string.item, head) + } else { + let mut ret = v; + for path in &column_paths { + let c = compare_string.item.clone(); + let r = ret.update_cell_path( + &path.members, + Box::new(move |old| action(old, &c, head)), + ); + if let Err(error) = r { + return Value::Error { error }; + } + } + ret + } + }, + engine_state.ctrlc.clone(), + ) +} + +fn action(input: &Value, compare_string: &str, head: Span) -> Value { + match &input { + Value::String { val, .. } => { + let distance = levenshtein_distance(val, compare_string); + Value::Record { + cols: vec!["distance".to_string()], + vals: vec![Value::Int { + val: distance as i64, + span: head, + }], + span: head, + } + } + other => Value::Error { + error: ShellError::UnsupportedInput( + format!( + "Input's type is {}. This command only works with strings.", + other.get_type() + ), + head, + ), + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(SubCommand {}) + } +} diff --git a/crates/nu-command/src/strings/str_/mod.rs b/crates/nu-command/src/strings/str_/mod.rs index 559946e42..cbe3ee005 100644 --- a/crates/nu-command/src/strings/str_/mod.rs +++ b/crates/nu-command/src/strings/str_/mod.rs @@ -1,6 +1,7 @@ mod case; mod collect; mod contains; +mod distance; mod ends_with; mod index_of; mod length; @@ -15,6 +16,7 @@ mod trim; pub use case::*; pub use collect::*; pub use contains::SubCommand as StrContains; +pub use distance::SubCommand as StrDistance; pub use ends_with::SubCommand as StrEndswith; pub use index_of::SubCommand as StrIndexOf; pub use length::SubCommand as StrLength;