did_you_mean without dependency (#2610)

This commit is contained in:
Chris Gillespie 2020-09-28 20:32:29 -07:00 committed by GitHub
parent 18909ec14a
commit cf43b74f26
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 109 additions and 104 deletions

21
Cargo.lock generated
View File

@ -2749,15 +2749,6 @@ dependencies = [
"tempfile", "tempfile",
] ]
[[package]]
name = "natural"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65e6b44f8ddc659cde3555e0140d3441ad26cb03a1410774af1f9a19097c1867"
dependencies = [
"rust-stemmers",
]
[[package]] [[package]]
name = "neso" name = "neso"
version = "0.5.0" version = "0.5.0"
@ -2920,7 +2911,6 @@ dependencies = [
"itertools", "itertools",
"log 0.4.11", "log 0.4.11",
"meval", "meval",
"natural",
"nu-data", "nu-data",
"nu-errors", "nu-errors",
"nu-parser", "nu-parser",
@ -3068,7 +3058,6 @@ dependencies = [
"indexmap", "indexmap",
"itertools", "itertools",
"log 0.4.11", "log 0.4.11",
"natural",
"nu-errors", "nu-errors",
"nu-source", "nu-source",
"num-bigint 0.3.0", "num-bigint 0.3.0",
@ -4478,16 +4467,6 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e52c148ef37f8c375d49d5a73aa70713125b7f19095948a923f80afdeb22ec2" checksum = "3e52c148ef37f8c375d49d5a73aa70713125b7f19095948a923f80afdeb22ec2"
[[package]]
name = "rust-stemmers"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54"
dependencies = [
"serde 1.0.115",
"serde_derive",
]
[[package]] [[package]]
name = "rust_decimal" name = "rust_decimal"
version = "0.10.2" version = "0.10.2"

View File

@ -55,7 +55,6 @@ indexmap = {version = "1.6.0", features = ["serde-1"]}
itertools = "0.9.0" itertools = "0.9.0"
log = "0.4.11" log = "0.4.11"
meval = "0.2.0" meval = "0.2.0"
natural = "0.5.0"
num-bigint = {version = "0.3.0", features = ["serde"]} num-bigint = {version = "0.3.0", features = ["serde"]}
num-format = {version = "0.4.0", features = ["with-num-bigint"]} num-format = {version = "0.4.0", features = ["with-num-bigint"]}
num-traits = "0.2.12" num-traits = "0.2.12"

View File

@ -1,5 +1,6 @@
use crate::command_registry::CommandRegistry; use crate::command_registry::CommandRegistry;
use crate::commands::classified::block::run_block; use crate::commands::classified::block::run_block;
use crate::did_you_mean;
use crate::evaluate::operator::apply_operator; use crate::evaluate::operator::apply_operator;
use crate::prelude::*; use crate::prelude::*;
use async_recursion::async_recursion; use async_recursion::async_recursion;
@ -148,24 +149,18 @@ pub(crate) async fn evaluate_baseline_expr(
match next { match next {
Err(err) => { Err(err) => {
let possibilities = item.data_descriptors(); if let UnspannedPathMember::String(_name) = &member.unspanned {
let possible_matches = did_you_mean(&item, member.as_string());
if let UnspannedPathMember::String(name) = &member.unspanned { match possible_matches {
let mut possible_matches: Vec<_> = possibilities Some(p) => {
.iter() return Err(ShellError::labeled_error(
.map(|x| (natural::distance::levenshtein_distance(x, &name), x)) "Unknown column",
.collect(); format!("did you mean '{}'?", p[0]),
&member.span,
possible_matches.sort(); ));
}
if !possible_matches.is_empty() { None => return Err(err),
return Err(ShellError::labeled_error(
"Unknown column",
format!("did you mean '{}'?", possible_matches[0].1),
&member.span,
));
} else {
return Err(err);
} }
} }
} }

View File

@ -20,7 +20,6 @@ getset = "0.1.1"
indexmap = {version = "1.6.0", features = ["serde-1"]} indexmap = {version = "1.6.0", features = ["serde-1"]}
itertools = "0.9.0" itertools = "0.9.0"
log = "0.4.11" log = "0.4.11"
natural = "0.5.0"
nu-errors = {path = "../nu-errors", version = "0.20.0"} nu-errors = {path = "../nu-errors", version = "0.20.0"}
nu-source = {path = "../nu-source", version = "0.20.0"} nu-source = {path = "../nu-source", version = "0.20.0"}
num-bigint = {version = "0.3.0", features = ["serde"]} num-bigint = {version = "0.3.0", features = ["serde"]}

View File

@ -18,8 +18,9 @@ pub use crate::signature::{NamedType, PositionalType, Signature};
pub use crate::syntax_shape::SyntaxShape; pub use crate::syntax_shape::SyntaxShape;
pub use crate::type_name::{PrettyType, ShellTypeName, SpannedTypeName}; pub use crate::type_name::{PrettyType, ShellTypeName, SpannedTypeName};
pub use crate::type_shape::{Row as RowType, Type}; pub use crate::type_shape::{Row as RowType, Type};
pub use crate::value::column_path::{did_you_mean, ColumnPath, PathMember, UnspannedPathMember}; pub use crate::value::column_path::{ColumnPath, PathMember, UnspannedPathMember};
pub use crate::value::dict::{Dictionary, TaggedDictBuilder}; pub use crate::value::dict::{Dictionary, TaggedDictBuilder};
pub use crate::value::did_you_mean::did_you_mean;
pub use crate::value::evaluate::Scope; pub use crate::value::evaluate::Scope;
pub use crate::value::primitive::Primitive; pub use crate::value::primitive::Primitive;
pub use crate::value::primitive::{format_date, format_duration, format_primitive}; pub use crate::value::primitive::{format_date, format_duration, format_primitive};

View File

@ -2,6 +2,7 @@ pub mod column_path;
mod convert; mod convert;
mod debug; mod debug;
pub mod dict; pub mod dict;
pub mod did_you_mean;
pub mod evaluate; pub mod evaluate;
pub mod iter; pub mod iter;
pub mod primitive; pub mod primitive;

View File

@ -1,4 +1,3 @@
use crate::Value;
use derive_new::new; use derive_new::new;
use getset::Getters; use getset::Getters;
use nu_source::{b, span_for_spanned_list, DebugDocBuilder, HasFallibleSpan, PrettyDebug, Span}; use nu_source::{b, span_for_spanned_list, DebugDocBuilder, HasFallibleSpan, PrettyDebug, Span};
@ -112,65 +111,3 @@ impl PathMember {
} }
} }
} }
/// Prepares a list of "sounds like" matches (using edit distance) for the string you're trying to find
pub fn did_you_mean(obj_source: &Value, field_tried: String) -> Option<Vec<String>> {
let possibilities = obj_source.data_descriptors();
let mut possible_matches: Vec<_> = possibilities
.into_iter()
.map(|word| {
let edit_distance = natural::distance::levenshtein_distance(&word, &field_tried);
(edit_distance, word)
})
.collect();
if !possible_matches.is_empty() {
possible_matches.sort();
let words_matched: Vec<String> = possible_matches.into_iter().map(|m| m.1).collect();
Some(words_matched)
} else {
None
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::UntaggedValue;
use indexmap::indexmap;
use nu_source::Tag;
#[test]
fn did_you_mean_returns_possible_column_matches() {
let value = UntaggedValue::row(indexmap! {
"dog".to_string() => UntaggedValue::int(1).into(),
"cat".to_string() => UntaggedValue::int(1).into(),
"alt".to_string() => UntaggedValue::int(1).into(),
});
let source = Value {
tag: Tag::unknown(),
value,
};
assert_eq!(
Some(vec![
"cat".to_string(),
"alt".to_string(),
"dog".to_string()
]),
did_you_mean(&source, "hat".to_string())
)
}
#[test]
fn did_you_mean_returns_no_matches_when_empty() {
let empty_source = Value {
tag: Tag::unknown(),
value: UntaggedValue::row(indexmap! {}),
};
assert_eq!(None, did_you_mean(&empty_source, "hat".to_string()))
}
}

View File

@ -0,0 +1,94 @@
use crate::Value;
use std::cmp;
/// Prepares a list of "sounds like" matches (using edit distance) for the string you're trying to find
pub fn did_you_mean(obj_source: &Value, field_tried: String) -> Option<Vec<String>> {
let possibilities = obj_source.data_descriptors();
let mut possible_matches: Vec<_> = possibilities
.into_iter()
.map(|word| {
let edit_distance = levenshtein_distance(&word, &field_tried);
(edit_distance, word)
})
.collect();
if !possible_matches.is_empty() {
possible_matches.sort();
let words_matched: Vec<String> = possible_matches.into_iter().map(|m| m.1).collect();
Some(words_matched)
} else {
None
}
}
/// Borrowed from https://crates.io/crates/natural
fn levenshtein_distance(str1: &str, str2: &str) -> usize {
let n = str1.len();
let m = str2.len();
let mut current: Vec<usize> = (0..n + 1).collect();
let a_vec: Vec<char> = str1.chars().collect();
let b_vec: Vec<char> = str2.chars().collect();
for i in 1..m + 1 {
let previous = current;
current = vec![0; n + 1];
current[0] = i;
for j in 1..n + 1 {
let add = previous[j] + 1;
let delete = current[j - 1] + 1;
let mut change = previous[j - 1];
if a_vec[j - 1] != b_vec[i - 1] {
change += 1
}
current[j] = min3(add, delete, change);
}
}
current[n]
}
fn min3<T: Ord>(a: T, b: T, c: T) -> T {
cmp::min(a, cmp::min(b, c))
}
#[cfg(test)]
mod test {
use super::*;
use crate::UntaggedValue;
use indexmap::indexmap;
use nu_source::Tag;
#[test]
fn did_you_mean_returns_possible_column_matches() {
let value = UntaggedValue::row(indexmap! {
"dog".to_string() => UntaggedValue::int(1).into(),
"cat".to_string() => UntaggedValue::int(1).into(),
"alt".to_string() => UntaggedValue::int(1).into(),
});
let source = Value {
tag: Tag::unknown(),
value,
};
assert_eq!(
Some(vec![
"cat".to_string(),
"alt".to_string(),
"dog".to_string()
]),
did_you_mean(&source, "hat".to_string())
)
}
#[test]
fn did_you_mean_returns_no_matches_when_empty() {
let empty_source = Value {
tag: Tag::unknown(),
value: UntaggedValue::row(indexmap! {}),
};
assert_eq!(None, did_you_mean(&empty_source, "hat".to_string()))
}
}