add edit distance/levenshtein command (#6383)

* add edit distance/levenshtein command

* change output to a record

* update test
This commit is contained in:
Darren Schroeder 2022-08-23 08:53:14 -05:00 committed by GitHub
parent 884382bac4
commit 6fbc76bc0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 137 additions and 0 deletions

View File

@ -190,6 +190,7 @@ pub fn create_default_context() -> EngineState {
StrCapitalize,
StrCollect,
StrContains,
StrDistance,
StrDowncase,
StrEndswith,
StrReplace,

View File

@ -0,0 +1,134 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::{Call, CellPath},
engine::{Command, EngineState, Stack},
levenshtein_distance, Category, Example, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Value,
};
#[derive(Clone)]
pub struct SubCommand;
impl Command for SubCommand {
fn name(&self) -> &str {
"str distance"
}
fn signature(&self) -> Signature {
Signature::build("str distance")
.required(
"compare-string",
SyntaxShape::String,
"the first string to compare",
)
.rest(
"rest",
SyntaxShape::CellPath,
"optionally check if string contains pattern by column paths",
)
.category(Category::Strings)
}
fn usage(&self) -> &str {
"compare to strings and return the edit distance/levenshtein distance"
}
fn search_terms(&self) -> Vec<&str> {
vec!["edit", "distance", "levenshtein"]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
operate(engine_state, stack, call, input)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "get the edit distance between two strings",
example: "'nushell' | str distance 'nutshell'",
result: Some(Value::Record {
cols: vec!["distance".to_string()],
vals: vec![Value::Int {
val: 1,
span: Span::test_data(),
}],
span: Span::test_data(),
}),
}]
}
}
fn operate(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let head = call.head;
let compare_string: Spanned<String> = call.req(engine_state, stack, 0)?;
let column_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
input.map(
move |v| {
if column_paths.is_empty() {
action(&v, &compare_string.item, head)
} else {
let mut ret = v;
for path in &column_paths {
let c = compare_string.item.clone();
let r = ret.update_cell_path(
&path.members,
Box::new(move |old| action(old, &c, head)),
);
if let Err(error) = r {
return Value::Error { error };
}
}
ret
}
},
engine_state.ctrlc.clone(),
)
}
fn action(input: &Value, compare_string: &str, head: Span) -> Value {
match &input {
Value::String { val, .. } => {
let distance = levenshtein_distance(val, compare_string);
Value::Record {
cols: vec!["distance".to_string()],
vals: vec![Value::Int {
val: distance as i64,
span: head,
}],
span: head,
}
}
other => Value::Error {
error: ShellError::UnsupportedInput(
format!(
"Input's type is {}. This command only works with strings.",
other.get_type()
),
head,
),
},
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(SubCommand {})
}
}

View File

@ -1,6 +1,7 @@
mod case;
mod collect;
mod contains;
mod distance;
mod ends_with;
mod index_of;
mod length;
@ -15,6 +16,7 @@ mod trim;
pub use case::*;
pub use collect::*;
pub use contains::SubCommand as StrContains;
pub use distance::SubCommand as StrDistance;
pub use ends_with::SubCommand as StrEndswith;
pub use index_of::SubCommand as StrIndexOf;
pub use length::SubCommand as StrLength;