diff --git a/Cargo.lock b/Cargo.lock index b0829c455b..f666225efe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2749,15 +2749,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "natural" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e6b44f8ddc659cde3555e0140d3441ad26cb03a1410774af1f9a19097c1867" -dependencies = [ - "rust-stemmers", -] - [[package]] name = "neso" version = "0.5.0" @@ -2920,7 +2911,6 @@ dependencies = [ "itertools", "log 0.4.11", "meval", - "natural", "nu-data", "nu-errors", "nu-parser", @@ -3068,7 +3058,6 @@ dependencies = [ "indexmap", "itertools", "log 0.4.11", - "natural", "nu-errors", "nu-source", "num-bigint 0.3.0", @@ -4478,16 +4467,6 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e52c148ef37f8c375d49d5a73aa70713125b7f19095948a923f80afdeb22ec2" -[[package]] -name = "rust-stemmers" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" -dependencies = [ - "serde 1.0.115", - "serde_derive", -] - [[package]] name = "rust_decimal" version = "0.10.2" diff --git a/crates/nu-cli/Cargo.toml b/crates/nu-cli/Cargo.toml index bb44415dac..52f654a71e 100644 --- a/crates/nu-cli/Cargo.toml +++ b/crates/nu-cli/Cargo.toml @@ -55,7 +55,6 @@ indexmap = {version = "1.6.0", features = ["serde-1"]} itertools = "0.9.0" log = "0.4.11" meval = "0.2.0" -natural = "0.5.0" num-bigint = {version = "0.3.0", features = ["serde"]} num-format = {version = "0.4.0", features = ["with-num-bigint"]} num-traits = "0.2.12" diff --git a/crates/nu-cli/src/evaluate/evaluator.rs b/crates/nu-cli/src/evaluate/evaluator.rs index 84005a71e6..9c29da9be5 100644 --- a/crates/nu-cli/src/evaluate/evaluator.rs +++ b/crates/nu-cli/src/evaluate/evaluator.rs @@ -1,5 +1,6 @@ use crate::command_registry::CommandRegistry; use crate::commands::classified::block::run_block; +use crate::did_you_mean; use crate::evaluate::operator::apply_operator; use crate::prelude::*; use async_recursion::async_recursion; @@ -148,24 +149,18 @@ pub(crate) async fn evaluate_baseline_expr( match next { Err(err) => { - let possibilities = item.data_descriptors(); + if let UnspannedPathMember::String(_name) = &member.unspanned { + let possible_matches = did_you_mean(&item, member.as_string()); - if let UnspannedPathMember::String(name) = &member.unspanned { - let mut possible_matches: Vec<_> = possibilities - .iter() - .map(|x| (natural::distance::levenshtein_distance(x, &name), x)) - .collect(); - - possible_matches.sort(); - - if !possible_matches.is_empty() { - return Err(ShellError::labeled_error( - "Unknown column", - format!("did you mean '{}'?", possible_matches[0].1), - &member.span, - )); - } else { - return Err(err); + match possible_matches { + Some(p) => { + return Err(ShellError::labeled_error( + "Unknown column", + format!("did you mean '{}'?", p[0]), + &member.span, + )); + } + None => return Err(err), } } } diff --git a/crates/nu-protocol/Cargo.toml b/crates/nu-protocol/Cargo.toml index 195c874a82..28ff20765c 100644 --- a/crates/nu-protocol/Cargo.toml +++ b/crates/nu-protocol/Cargo.toml @@ -20,7 +20,6 @@ getset = "0.1.1" indexmap = {version = "1.6.0", features = ["serde-1"]} itertools = "0.9.0" log = "0.4.11" -natural = "0.5.0" nu-errors = {path = "../nu-errors", version = "0.20.0"} nu-source = {path = "../nu-source", version = "0.20.0"} num-bigint = {version = "0.3.0", features = ["serde"]} diff --git a/crates/nu-protocol/src/lib.rs b/crates/nu-protocol/src/lib.rs index 3f65f3fd0f..91b78bbcf1 100644 --- a/crates/nu-protocol/src/lib.rs +++ b/crates/nu-protocol/src/lib.rs @@ -18,8 +18,9 @@ pub use crate::signature::{NamedType, PositionalType, Signature}; pub use crate::syntax_shape::SyntaxShape; pub use crate::type_name::{PrettyType, ShellTypeName, SpannedTypeName}; pub use crate::type_shape::{Row as RowType, Type}; -pub use crate::value::column_path::{did_you_mean, ColumnPath, PathMember, UnspannedPathMember}; +pub use crate::value::column_path::{ColumnPath, PathMember, UnspannedPathMember}; pub use crate::value::dict::{Dictionary, TaggedDictBuilder}; +pub use crate::value::did_you_mean::did_you_mean; pub use crate::value::evaluate::Scope; pub use crate::value::primitive::Primitive; pub use crate::value::primitive::{format_date, format_duration, format_primitive}; diff --git a/crates/nu-protocol/src/value.rs b/crates/nu-protocol/src/value.rs index be4e2dba3b..d64fb730aa 100644 --- a/crates/nu-protocol/src/value.rs +++ b/crates/nu-protocol/src/value.rs @@ -2,6 +2,7 @@ pub mod column_path; mod convert; mod debug; pub mod dict; +pub mod did_you_mean; pub mod evaluate; pub mod iter; pub mod primitive; diff --git a/crates/nu-protocol/src/value/column_path.rs b/crates/nu-protocol/src/value/column_path.rs index dc632bc32a..0375b011dd 100644 --- a/crates/nu-protocol/src/value/column_path.rs +++ b/crates/nu-protocol/src/value/column_path.rs @@ -1,4 +1,3 @@ -use crate::Value; use derive_new::new; use getset::Getters; use nu_source::{b, span_for_spanned_list, DebugDocBuilder, HasFallibleSpan, PrettyDebug, Span}; @@ -112,65 +111,3 @@ impl PathMember { } } } - -/// Prepares a list of "sounds like" matches (using edit distance) for the string you're trying to find -pub fn did_you_mean(obj_source: &Value, field_tried: String) -> Option> { - let possibilities = obj_source.data_descriptors(); - - let mut possible_matches: Vec<_> = possibilities - .into_iter() - .map(|word| { - let edit_distance = natural::distance::levenshtein_distance(&word, &field_tried); - (edit_distance, word) - }) - .collect(); - - if !possible_matches.is_empty() { - possible_matches.sort(); - let words_matched: Vec = possible_matches.into_iter().map(|m| m.1).collect(); - Some(words_matched) - } else { - None - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::UntaggedValue; - use indexmap::indexmap; - use nu_source::Tag; - - #[test] - fn did_you_mean_returns_possible_column_matches() { - let value = UntaggedValue::row(indexmap! { - "dog".to_string() => UntaggedValue::int(1).into(), - "cat".to_string() => UntaggedValue::int(1).into(), - "alt".to_string() => UntaggedValue::int(1).into(), - }); - - let source = Value { - tag: Tag::unknown(), - value, - }; - - assert_eq!( - Some(vec![ - "cat".to_string(), - "alt".to_string(), - "dog".to_string() - ]), - did_you_mean(&source, "hat".to_string()) - ) - } - - #[test] - fn did_you_mean_returns_no_matches_when_empty() { - let empty_source = Value { - tag: Tag::unknown(), - value: UntaggedValue::row(indexmap! {}), - }; - - assert_eq!(None, did_you_mean(&empty_source, "hat".to_string())) - } -} diff --git a/crates/nu-protocol/src/value/did_you_mean.rs b/crates/nu-protocol/src/value/did_you_mean.rs new file mode 100644 index 0000000000..bd2eb7a548 --- /dev/null +++ b/crates/nu-protocol/src/value/did_you_mean.rs @@ -0,0 +1,94 @@ +use crate::Value; +use std::cmp; + +/// Prepares a list of "sounds like" matches (using edit distance) for the string you're trying to find +pub fn did_you_mean(obj_source: &Value, field_tried: String) -> Option> { + let possibilities = obj_source.data_descriptors(); + + let mut possible_matches: Vec<_> = possibilities + .into_iter() + .map(|word| { + let edit_distance = levenshtein_distance(&word, &field_tried); + (edit_distance, word) + }) + .collect(); + + if !possible_matches.is_empty() { + possible_matches.sort(); + let words_matched: Vec = possible_matches.into_iter().map(|m| m.1).collect(); + Some(words_matched) + } else { + None + } +} + +/// Borrowed from https://crates.io/crates/natural +fn levenshtein_distance(str1: &str, str2: &str) -> usize { + let n = str1.len(); + let m = str2.len(); + + let mut current: Vec = (0..n + 1).collect(); + let a_vec: Vec = str1.chars().collect(); + let b_vec: Vec = str2.chars().collect(); + + for i in 1..m + 1 { + let previous = current; + current = vec![0; n + 1]; + current[0] = i; + for j in 1..n + 1 { + let add = previous[j] + 1; + let delete = current[j - 1] + 1; + let mut change = previous[j - 1]; + if a_vec[j - 1] != b_vec[i - 1] { + change += 1 + } + current[j] = min3(add, delete, change); + } + } + current[n] +} + +fn min3(a: T, b: T, c: T) -> T { + cmp::min(a, cmp::min(b, c)) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::UntaggedValue; + use indexmap::indexmap; + use nu_source::Tag; + + #[test] + fn did_you_mean_returns_possible_column_matches() { + let value = UntaggedValue::row(indexmap! { + "dog".to_string() => UntaggedValue::int(1).into(), + "cat".to_string() => UntaggedValue::int(1).into(), + "alt".to_string() => UntaggedValue::int(1).into(), + }); + + let source = Value { + tag: Tag::unknown(), + value, + }; + + assert_eq!( + Some(vec![ + "cat".to_string(), + "alt".to_string(), + "dog".to_string() + ]), + did_you_mean(&source, "hat".to_string()) + ) + } + + #[test] + fn did_you_mean_returns_no_matches_when_empty() { + let empty_source = Value { + tag: Tag::unknown(), + value: UntaggedValue::row(indexmap! {}), + }; + + assert_eq!(None, did_you_mean(&empty_source, "hat".to_string())) + } +}