Suggest existing variables on not found (#8902)

This commit is contained in:
sam schick 2023-05-02 11:17:14 -04:00 committed by GitHub
parent 517dc6d39e
commit d45e9671d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 184 additions and 107 deletions

View File

@ -16,8 +16,8 @@ use nu_protocol::{
Operator, PathMember, Pattern, Pipeline, PipelineElement, RangeInclusion, RangeOperator, Operator, PathMember, Pattern, Pipeline, PipelineElement, RangeInclusion, RangeOperator,
}, },
engine::StateWorkingSet, engine::StateWorkingSet,
span, BlockId, Flag, ParseError, PositionalArg, Signature, Span, Spanned, SyntaxShape, Type, span, BlockId, DidYouMean, Flag, ParseError, PositionalArg, Signature, Span, Spanned,
Unit, VarId, ENV_VARIABLE_ID, IN_VARIABLE_ID, SyntaxShape, Type, Unit, VarId, ENV_VARIABLE_ID, IN_VARIABLE_ID,
}; };
use crate::parse_keywords::{ use crate::parse_keywords::{
@ -1820,9 +1820,7 @@ pub fn parse_variable_expr(working_set: &mut StateWorkingSet, span: Span) -> Exp
}; };
} }
let id = parse_variable(working_set, span); if let Some(id) = parse_variable(working_set, span) {
if let Some(id) = id {
Expression { Expression {
expr: Expr::Var(id), expr: Expr::Var(id),
span, span,
@ -1830,7 +1828,9 @@ pub fn parse_variable_expr(working_set: &mut StateWorkingSet, span: Span) -> Exp
custom_completion: None, custom_completion: None,
} }
} else { } else {
working_set.error(ParseError::VariableNotFound(span)); let ws = &*working_set;
let suggestion = DidYouMean::new(&ws.list_variables(), ws.get_span_contents(span));
working_set.error(ParseError::VariableNotFound(suggestion, span));
garbage(span) garbage(span)
} }
} }

View File

@ -1955,7 +1955,7 @@ mod input_types {
assert!(matches!( assert!(matches!(
working_set.parse_errors.first(), working_set.parse_errors.first(),
Some(ParseError::VariableNotFound(_)) Some(ParseError::VariableNotFound(_, _))
)); ));
} }
@ -1974,7 +1974,7 @@ mod input_types {
assert!(matches!( assert!(matches!(
working_set.parse_errors.first(), working_set.parse_errors.first(),
Some(ParseError::VariableNotFound(_)) Some(ParseError::VariableNotFound(_, _))
)); ));
} }
} }

View File

@ -0,0 +1,84 @@
pub fn did_you_mean<S: AsRef<str>>(possibilities: &[S], input: &str) -> Option<String> {
let possibilities: Vec<&str> = possibilities.iter().map(|s| s.as_ref()).collect();
let suggestion =
crate::lev_distance::find_best_match_for_name_with_substrings(&possibilities, input, None)
.map(|s| s.to_string());
if let Some(suggestion) = &suggestion {
if suggestion.len() == 1 && suggestion.to_lowercase() != input.to_lowercase() {
return None;
}
}
suggestion
}
#[cfg(test)]
mod tests {
use super::did_you_mean;
#[test]
fn did_you_mean_examples() {
let all_cases = [
(
vec!["a", "b"],
vec![
("a", Some("a"), ""),
("A", Some("a"), ""),
(
"c",
None,
"Not helpful to suggest an arbitrary choice when none are close",
),
("ccccccccccccccccccccccc", None, "Not helpful to suggest an arbitrary choice when none are close"),
],
),
(
vec!["OS", "PWD", "PWDPWDPWDPWD"],
vec![
("pwd", Some("PWD"), "Exact case insensitive match yields a match"),
("pwdpwdpwdpwd", Some("PWDPWDPWDPWD"), "Exact case insensitive match yields a match"),
("PWF", Some("PWD"), "One-letter typo yields a match"),
("pwf", None, "Case difference plus typo yields no match"),
("Xwdpwdpwdpwd", None, "Case difference plus typo yields no match"),
]
),
(
vec!["foo", "bar", "baz"],
vec![
("fox", Some("foo"), ""),
("FOO", Some("foo"), ""),
("FOX", None, ""),
(
"ccc",
None,
"Not helpful to suggest an arbitrary choice when none are close",
),
(
"zzz",
None,
"'baz' does share a character, but rustc rule is edit distance must be <= 1/3 of the length of the user input",
),
],
),
(
vec!["aaaaaa"],
vec![
("XXaaaa", Some("aaaaaa"), "Distance of 2 out of 6 chars: close enough to meet rustc's rule"),
("XXXaaa", None, "Distance of 3 out of 6 chars: not close enough to meet rustc's rule"),
("XaaaaX", Some("aaaaaa"), "Distance of 2 out of 6 chars: close enough to meet rustc's rule"),
("XXaaaaXX", None, "Distance of 4 out of 6 chars: not close enough to meet rustc's rule")
]
),
];
for (possibilities, cases) in all_cases {
for (input, expected_suggestion, discussion) in cases {
let suggestion = did_you_mean(&possibilities, input);
assert_eq!(
suggestion.as_deref(),
expected_suggestion,
"Expected the following reasoning to hold but it did not: '{discussion}'"
);
}
}
}
}

View File

@ -1543,6 +1543,24 @@ impl<'a> StateWorkingSet<'a> {
num_permanent_vars + self.delta.vars.len() num_permanent_vars + self.delta.vars.len()
} }
pub fn list_variables(&self) -> Vec<&[u8]> {
let mut removed_overlays = vec![];
let mut variables = HashSet::new();
for scope_frame in self.delta.scope.iter() {
for overlay_frame in scope_frame.active_overlays(&mut removed_overlays) {
variables.extend(overlay_frame.vars.keys().map(|k| &k[..]));
}
}
let permanent_vars = self
.permanent_state
.active_overlays(&removed_overlays)
.flat_map(|overlay_frame| overlay_frame.vars.keys().map(|k| &k[..]));
variables.extend(permanent_vars);
variables.into_iter().collect()
}
pub fn find_variable(&self, name: &[u8]) -> Option<VarId> { pub fn find_variable(&self, name: &[u8]) -> Option<VarId> {
let mut removed_overlays = vec![]; let mut removed_overlays = vec![];
@ -1589,7 +1607,6 @@ impl<'a> StateWorkingSet<'a> {
mutable: bool, mutable: bool,
) -> VarId { ) -> VarId {
let next_id = self.next_var_id(); let next_id = self.next_var_id();
// correct name if necessary // correct name if necessary
if !name.starts_with(b"$") { if !name.starts_with(b"$") {
name.insert(0, b'$'); name.insert(0, b'$');
@ -2097,6 +2114,8 @@ fn build_usage(comment_lines: &[&[u8]]) -> (String, String) {
#[cfg(test)] #[cfg(test)]
mod engine_state_tests { mod engine_state_tests {
use std::str::{from_utf8, Utf8Error};
use super::*; use super::*;
#[test] #[test]
@ -2139,4 +2158,25 @@ mod engine_state_tests {
Ok(()) Ok(())
} }
#[test]
fn list_variables() -> Result<(), Utf8Error> {
let varname = "something";
let varname_with_sigil = "$".to_owned() + varname;
let engine_state = EngineState::new();
let mut working_set = StateWorkingSet::new(&engine_state);
working_set.add_variable(
varname.as_bytes().into(),
Span { start: 0, end: 1 },
Type::Int,
false,
);
let variables = working_set
.list_variables()
.into_iter()
.map(|v| from_utf8(v))
.collect::<Result<Vec<&str>, Utf8Error>>()?;
assert_eq!(variables, vec![varname_with_sigil]);
Ok(())
}
} }

View File

@ -25,11 +25,7 @@ pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
return None; return None;
} }
if n == 0 || m == 0 { if n == 0 || m == 0 {
return if min_dist <= limit { return Some(min_dist);
Some(min_dist)
} else {
None
};
} }
let mut dcol: Vec<_> = (0..=m).collect(); let mut dcol: Vec<_> = (0..=m).collect();
@ -57,6 +53,10 @@ pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
} }
} }
/// Finds the Levenshtein distance between two strings.
pub fn levenshtein_distance(a: &str, b: &str) -> usize {
lev_distance(a, b, usize::max_value()).unwrap_or(usize::max_value())
}
/// Provides a word similarity score between two words that accounts for substrings being more /// Provides a word similarity score between two words that accounts for substrings being more
/// meaningful than a typical Levenshtein distance. The lower the score, the closer the match. /// meaningful than a typical Levenshtein distance. The lower the score, the closer the match.
/// 0 is an identical match. /// 0 is an identical match.

View File

@ -2,6 +2,7 @@ mod alias;
pub mod ast; pub mod ast;
pub mod cli_error; pub mod cli_error;
pub mod config; pub mod config;
mod did_you_mean;
pub mod engine; pub mod engine;
mod example; mod example;
mod exportable; mod exportable;
@ -24,12 +25,14 @@ mod variable;
pub use alias::*; pub use alias::*;
pub use cli_error::*; pub use cli_error::*;
pub use config::*; pub use config::*;
pub use did_you_mean::did_you_mean;
pub use engine::{ENV_VARIABLE_ID, IN_VARIABLE_ID, NU_VARIABLE_ID}; pub use engine::{ENV_VARIABLE_ID, IN_VARIABLE_ID, NU_VARIABLE_ID};
pub use example::*; pub use example::*;
pub use exportable::*; pub use exportable::*;
pub use id::*; pub use id::*;
pub use lev_distance::levenshtein_distance;
pub use module::*; pub use module::*;
pub use parse_error::ParseError; pub use parse_error::{DidYouMean, ParseError};
pub use pipeline_data::*; pub use pipeline_data::*;
#[cfg(feature = "plugin")] #[cfg(feature = "plugin")]
pub use plugin_signature::*; pub use plugin_signature::*;

View File

@ -1,4 +1,9 @@
use crate::{Span, Type}; use std::{
fmt::Display,
str::{from_utf8, Utf8Error},
};
use crate::{did_you_mean, Span, Type};
use miette::Diagnostic; use miette::Diagnostic;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use thiserror::Error; use thiserror::Error;
@ -162,7 +167,7 @@ pub enum ParseError {
#[error("Variable not found.")] #[error("Variable not found.")]
#[diagnostic(code(nu::parser::variable_not_found))] #[diagnostic(code(nu::parser::variable_not_found))]
VariableNotFound(#[label = "variable not found"] Span), VariableNotFound(DidYouMean, #[label = "variable not found. {0}"] Span),
#[error("Variable name not supported.")] #[error("Variable name not supported.")]
#[diagnostic(code(nu::parser::variable_not_valid))] #[diagnostic(code(nu::parser::variable_not_valid))]
@ -442,7 +447,7 @@ impl ParseError {
ParseError::CaptureOfMutableVar(s) => *s, ParseError::CaptureOfMutableVar(s) => *s,
ParseError::IncorrectValue(_, s, _) => *s, ParseError::IncorrectValue(_, s, _) => *s,
ParseError::MultipleRestParams(s) => *s, ParseError::MultipleRestParams(s) => *s,
ParseError::VariableNotFound(s) => *s, ParseError::VariableNotFound(_, s) => *s,
ParseError::VariableNotValid(s) => *s, ParseError::VariableNotValid(s) => *s,
ParseError::AliasNotValid(s) => *s, ParseError::AliasNotValid(s) => *s,
ParseError::CommandDefNotValid(s) => *s, ParseError::CommandDefNotValid(s) => *s,
@ -498,3 +503,37 @@ impl ParseError {
} }
} }
} }
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DidYouMean(Option<String>);
fn did_you_mean_impl(possibilities_bytes: &[&[u8]], input_bytes: &[u8]) -> Option<String> {
let input = from_utf8(input_bytes).ok()?;
let possibilities = possibilities_bytes
.iter()
.map(|p| from_utf8(p))
.collect::<Result<Vec<&str>, Utf8Error>>()
.ok()?;
did_you_mean(&possibilities, input)
}
impl DidYouMean {
pub fn new(possibilities_bytes: &[&[u8]], input_bytes: &[u8]) -> DidYouMean {
DidYouMean(did_you_mean_impl(possibilities_bytes, input_bytes))
}
}
impl From<Option<String>> for DidYouMean {
fn from(value: Option<String>) -> Self {
Self(value)
}
}
impl Display for DidYouMean {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(suggestion) = &self.0 {
write!(f, "Did you mean '{}'?", suggestion)
} else {
write!(f, "")
}
}
}

View File

@ -1100,92 +1100,3 @@ impl From<Box<dyn std::error::Error + Send + Sync>> for ShellError {
pub fn into_code(err: &ShellError) -> Option<String> { pub fn into_code(err: &ShellError) -> Option<String> {
err.code().map(|code| code.to_string()) err.code().map(|code| code.to_string())
} }
pub fn did_you_mean<S: AsRef<str>>(possibilities: &[S], input: &str) -> Option<String> {
let possibilities: Vec<&str> = possibilities.iter().map(|s| s.as_ref()).collect();
let suggestion =
crate::lev_distance::find_best_match_for_name_with_substrings(&possibilities, input, None)
.map(|s| s.to_string());
if let Some(suggestion) = &suggestion {
if suggestion.len() == 1 && suggestion.to_lowercase() != input.to_lowercase() {
return None;
}
}
suggestion
}
pub fn levenshtein_distance(a: &str, b: &str) -> usize {
crate::lev_distance::lev_distance(a, b, usize::max_value())
.expect("It is impossible to exceed the supplied limit since all types involved are usize.")
}
#[cfg(test)]
mod tests {
use super::did_you_mean;
#[test]
fn did_you_mean_examples() {
let all_cases = [
(
vec!["a", "b"],
vec![
("a", Some("a"), ""),
("A", Some("a"), ""),
(
"c",
None,
"Not helpful to suggest an arbitrary choice when none are close",
),
("ccccccccccccccccccccccc", None, "Not helpful to suggest an arbitrary choice when none are close"),
],
),
(
vec!["OS", "PWD", "PWDPWDPWDPWD"],
vec![
("pwd", Some("PWD"), "Exact case insensitive match yields a match"),
("pwdpwdpwdpwd", Some("PWDPWDPWDPWD"), "Exact case insensitive match yields a match"),
("PWF", Some("PWD"), "One-letter typo yields a match"),
("pwf", None, "Case difference plus typo yields no match"),
("Xwdpwdpwdpwd", None, "Case difference plus typo yields no match"),
]
),
(
vec!["foo", "bar", "baz"],
vec![
("fox", Some("foo"), ""),
("FOO", Some("foo"), ""),
("FOX", None, ""),
(
"ccc",
None,
"Not helpful to suggest an arbitrary choice when none are close",
),
(
"zzz",
None,
"'baz' does share a character, but rustc rule is edit distance must be <= 1/3 of the length of the user input",
),
],
),
(
vec!["aaaaaa"],
vec![
("XXaaaa", Some("aaaaaa"), "Distance of 2 out of 6 chars: close enough to meet rustc's rule"),
("XXXaaa", None, "Distance of 3 out of 6 chars: not close enough to meet rustc's rule"),
("XaaaaX", Some("aaaaaa"), "Distance of 2 out of 6 chars: close enough to meet rustc's rule"),
("XXaaaaXX", None, "Distance of 4 out of 6 chars: not close enough to meet rustc's rule")
]
),
];
for (possibilities, cases) in all_cases {
for (input, expected_suggestion, discussion) in cases {
let suggestion = did_you_mean(&possibilities, input);
assert_eq!(
suggestion.as_deref(),
expected_suggestion,
"Expected the following reasoning to hold but it did not: '{discussion}'"
);
}
}
}
}