diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 9f4127d12..ccc6bca93 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -16,8 +16,8 @@ use nu_protocol::{ Operator, PathMember, Pattern, Pipeline, PipelineElement, RangeInclusion, RangeOperator, }, engine::StateWorkingSet, - span, BlockId, Flag, ParseError, PositionalArg, Signature, Span, Spanned, SyntaxShape, Type, - Unit, VarId, ENV_VARIABLE_ID, IN_VARIABLE_ID, + span, BlockId, DidYouMean, Flag, ParseError, PositionalArg, Signature, Span, Spanned, + SyntaxShape, Type, Unit, VarId, ENV_VARIABLE_ID, IN_VARIABLE_ID, }; use crate::parse_keywords::{ @@ -1820,9 +1820,7 @@ pub fn parse_variable_expr(working_set: &mut StateWorkingSet, span: Span) -> Exp }; } - let id = parse_variable(working_set, span); - - if let Some(id) = id { + if let Some(id) = parse_variable(working_set, span) { Expression { expr: Expr::Var(id), span, @@ -1830,7 +1828,9 @@ pub fn parse_variable_expr(working_set: &mut StateWorkingSet, span: Span) -> Exp custom_completion: None, } } else { - working_set.error(ParseError::VariableNotFound(span)); + let ws = &*working_set; + let suggestion = DidYouMean::new(&ws.list_variables(), ws.get_span_contents(span)); + working_set.error(ParseError::VariableNotFound(suggestion, span)); garbage(span) } } diff --git a/crates/nu-parser/tests/test_parser.rs b/crates/nu-parser/tests/test_parser.rs index c49ebb430..2659460bb 100644 --- a/crates/nu-parser/tests/test_parser.rs +++ b/crates/nu-parser/tests/test_parser.rs @@ -1955,7 +1955,7 @@ mod input_types { assert!(matches!( working_set.parse_errors.first(), - Some(ParseError::VariableNotFound(_)) + Some(ParseError::VariableNotFound(_, _)) )); } @@ -1974,7 +1974,7 @@ mod input_types { assert!(matches!( working_set.parse_errors.first(), - Some(ParseError::VariableNotFound(_)) + Some(ParseError::VariableNotFound(_, _)) )); } } diff --git a/crates/nu-protocol/src/did_you_mean.rs b/crates/nu-protocol/src/did_you_mean.rs new file mode 100644 index 000000000..4996c8328 --- /dev/null +++ b/crates/nu-protocol/src/did_you_mean.rs @@ -0,0 +1,84 @@ +pub fn did_you_mean>(possibilities: &[S], input: &str) -> Option { + let possibilities: Vec<&str> = possibilities.iter().map(|s| s.as_ref()).collect(); + let suggestion = + crate::lev_distance::find_best_match_for_name_with_substrings(&possibilities, input, None) + .map(|s| s.to_string()); + if let Some(suggestion) = &suggestion { + if suggestion.len() == 1 && suggestion.to_lowercase() != input.to_lowercase() { + return None; + } + } + suggestion +} + +#[cfg(test)] +mod tests { + + use super::did_you_mean; + + #[test] + fn did_you_mean_examples() { + let all_cases = [ + ( + vec!["a", "b"], + vec![ + ("a", Some("a"), ""), + ("A", Some("a"), ""), + ( + "c", + None, + "Not helpful to suggest an arbitrary choice when none are close", + ), + ("ccccccccccccccccccccccc", None, "Not helpful to suggest an arbitrary choice when none are close"), + ], + ), + ( + vec!["OS", "PWD", "PWDPWDPWDPWD"], + vec![ + ("pwd", Some("PWD"), "Exact case insensitive match yields a match"), + ("pwdpwdpwdpwd", Some("PWDPWDPWDPWD"), "Exact case insensitive match yields a match"), + ("PWF", Some("PWD"), "One-letter typo yields a match"), + ("pwf", None, "Case difference plus typo yields no match"), + ("Xwdpwdpwdpwd", None, "Case difference plus typo yields no match"), + ] + ), + ( + vec!["foo", "bar", "baz"], + vec![ + ("fox", Some("foo"), ""), + ("FOO", Some("foo"), ""), + ("FOX", None, ""), + ( + "ccc", + None, + "Not helpful to suggest an arbitrary choice when none are close", + ), + ( + "zzz", + None, + "'baz' does share a character, but rustc rule is edit distance must be <= 1/3 of the length of the user input", + ), + ], + ), + ( + vec!["aaaaaa"], + vec![ + ("XXaaaa", Some("aaaaaa"), "Distance of 2 out of 6 chars: close enough to meet rustc's rule"), + ("XXXaaa", None, "Distance of 3 out of 6 chars: not close enough to meet rustc's rule"), + ("XaaaaX", Some("aaaaaa"), "Distance of 2 out of 6 chars: close enough to meet rustc's rule"), + ("XXaaaaXX", None, "Distance of 4 out of 6 chars: not close enough to meet rustc's rule") + ] + ), + ]; + for (possibilities, cases) in all_cases { + for (input, expected_suggestion, discussion) in cases { + let suggestion = did_you_mean(&possibilities, input); + assert_eq!( + suggestion.as_deref(), + expected_suggestion, + "Expected the following reasoning to hold but it did not: '{discussion}'" + ); + } + } + } +} diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index dfce6cdee..e2cd1eba5 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -1543,6 +1543,24 @@ impl<'a> StateWorkingSet<'a> { num_permanent_vars + self.delta.vars.len() } + pub fn list_variables(&self) -> Vec<&[u8]> { + let mut removed_overlays = vec![]; + let mut variables = HashSet::new(); + for scope_frame in self.delta.scope.iter() { + for overlay_frame in scope_frame.active_overlays(&mut removed_overlays) { + variables.extend(overlay_frame.vars.keys().map(|k| &k[..])); + } + } + + let permanent_vars = self + .permanent_state + .active_overlays(&removed_overlays) + .flat_map(|overlay_frame| overlay_frame.vars.keys().map(|k| &k[..])); + + variables.extend(permanent_vars); + variables.into_iter().collect() + } + pub fn find_variable(&self, name: &[u8]) -> Option { let mut removed_overlays = vec![]; @@ -1589,7 +1607,6 @@ impl<'a> StateWorkingSet<'a> { mutable: bool, ) -> VarId { let next_id = self.next_var_id(); - // correct name if necessary if !name.starts_with(b"$") { name.insert(0, b'$'); @@ -2097,6 +2114,8 @@ fn build_usage(comment_lines: &[&[u8]]) -> (String, String) { #[cfg(test)] mod engine_state_tests { + use std::str::{from_utf8, Utf8Error}; + use super::*; #[test] @@ -2139,4 +2158,25 @@ mod engine_state_tests { Ok(()) } + + #[test] + fn list_variables() -> Result<(), Utf8Error> { + let varname = "something"; + let varname_with_sigil = "$".to_owned() + varname; + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + working_set.add_variable( + varname.as_bytes().into(), + Span { start: 0, end: 1 }, + Type::Int, + false, + ); + let variables = working_set + .list_variables() + .into_iter() + .map(|v| from_utf8(v)) + .collect::, Utf8Error>>()?; + assert_eq!(variables, vec![varname_with_sigil]); + Ok(()) + } } diff --git a/crates/nu-protocol/src/lev_distance.rs b/crates/nu-protocol/src/lev_distance.rs index 274c0bea1..ced34e9bc 100644 --- a/crates/nu-protocol/src/lev_distance.rs +++ b/crates/nu-protocol/src/lev_distance.rs @@ -25,11 +25,7 @@ pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option { return None; } if n == 0 || m == 0 { - return if min_dist <= limit { - Some(min_dist) - } else { - None - }; + return Some(min_dist); } let mut dcol: Vec<_> = (0..=m).collect(); @@ -57,6 +53,10 @@ pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option { } } +/// Finds the Levenshtein distance between two strings. +pub fn levenshtein_distance(a: &str, b: &str) -> usize { + lev_distance(a, b, usize::max_value()).unwrap_or(usize::max_value()) +} /// Provides a word similarity score between two words that accounts for substrings being more /// meaningful than a typical Levenshtein distance. The lower the score, the closer the match. /// 0 is an identical match. diff --git a/crates/nu-protocol/src/lib.rs b/crates/nu-protocol/src/lib.rs index 4b9d2361e..ad695a1b9 100644 --- a/crates/nu-protocol/src/lib.rs +++ b/crates/nu-protocol/src/lib.rs @@ -2,6 +2,7 @@ mod alias; pub mod ast; pub mod cli_error; pub mod config; +mod did_you_mean; pub mod engine; mod example; mod exportable; @@ -24,12 +25,14 @@ mod variable; pub use alias::*; pub use cli_error::*; pub use config::*; +pub use did_you_mean::did_you_mean; pub use engine::{ENV_VARIABLE_ID, IN_VARIABLE_ID, NU_VARIABLE_ID}; pub use example::*; pub use exportable::*; pub use id::*; +pub use lev_distance::levenshtein_distance; pub use module::*; -pub use parse_error::ParseError; +pub use parse_error::{DidYouMean, ParseError}; pub use pipeline_data::*; #[cfg(feature = "plugin")] pub use plugin_signature::*; diff --git a/crates/nu-protocol/src/parse_error.rs b/crates/nu-protocol/src/parse_error.rs index 2696e8a16..c2d842d36 100644 --- a/crates/nu-protocol/src/parse_error.rs +++ b/crates/nu-protocol/src/parse_error.rs @@ -1,4 +1,9 @@ -use crate::{Span, Type}; +use std::{ + fmt::Display, + str::{from_utf8, Utf8Error}, +}; + +use crate::{did_you_mean, Span, Type}; use miette::Diagnostic; use serde::{Deserialize, Serialize}; use thiserror::Error; @@ -162,7 +167,7 @@ pub enum ParseError { #[error("Variable not found.")] #[diagnostic(code(nu::parser::variable_not_found))] - VariableNotFound(#[label = "variable not found"] Span), + VariableNotFound(DidYouMean, #[label = "variable not found. {0}"] Span), #[error("Variable name not supported.")] #[diagnostic(code(nu::parser::variable_not_valid))] @@ -442,7 +447,7 @@ impl ParseError { ParseError::CaptureOfMutableVar(s) => *s, ParseError::IncorrectValue(_, s, _) => *s, ParseError::MultipleRestParams(s) => *s, - ParseError::VariableNotFound(s) => *s, + ParseError::VariableNotFound(_, s) => *s, ParseError::VariableNotValid(s) => *s, ParseError::AliasNotValid(s) => *s, ParseError::CommandDefNotValid(s) => *s, @@ -498,3 +503,37 @@ impl ParseError { } } } + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct DidYouMean(Option); + +fn did_you_mean_impl(possibilities_bytes: &[&[u8]], input_bytes: &[u8]) -> Option { + let input = from_utf8(input_bytes).ok()?; + let possibilities = possibilities_bytes + .iter() + .map(|p| from_utf8(p)) + .collect::, Utf8Error>>() + .ok()?; + did_you_mean(&possibilities, input) +} +impl DidYouMean { + pub fn new(possibilities_bytes: &[&[u8]], input_bytes: &[u8]) -> DidYouMean { + DidYouMean(did_you_mean_impl(possibilities_bytes, input_bytes)) + } +} + +impl From> for DidYouMean { + fn from(value: Option) -> Self { + Self(value) + } +} + +impl Display for DidYouMean { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(suggestion) = &self.0 { + write!(f, "Did you mean '{}'?", suggestion) + } else { + write!(f, "") + } + } +} diff --git a/crates/nu-protocol/src/shell_error.rs b/crates/nu-protocol/src/shell_error.rs index 7ecc5b381..31c6b41a9 100644 --- a/crates/nu-protocol/src/shell_error.rs +++ b/crates/nu-protocol/src/shell_error.rs @@ -1100,92 +1100,3 @@ impl From> for ShellError { pub fn into_code(err: &ShellError) -> Option { err.code().map(|code| code.to_string()) } - -pub fn did_you_mean>(possibilities: &[S], input: &str) -> Option { - let possibilities: Vec<&str> = possibilities.iter().map(|s| s.as_ref()).collect(); - let suggestion = - crate::lev_distance::find_best_match_for_name_with_substrings(&possibilities, input, None) - .map(|s| s.to_string()); - if let Some(suggestion) = &suggestion { - if suggestion.len() == 1 && suggestion.to_lowercase() != input.to_lowercase() { - return None; - } - } - suggestion -} - -pub fn levenshtein_distance(a: &str, b: &str) -> usize { - crate::lev_distance::lev_distance(a, b, usize::max_value()) - .expect("It is impossible to exceed the supplied limit since all types involved are usize.") -} - -#[cfg(test)] -mod tests { - use super::did_you_mean; - - #[test] - fn did_you_mean_examples() { - let all_cases = [ - ( - vec!["a", "b"], - vec![ - ("a", Some("a"), ""), - ("A", Some("a"), ""), - ( - "c", - None, - "Not helpful to suggest an arbitrary choice when none are close", - ), - ("ccccccccccccccccccccccc", None, "Not helpful to suggest an arbitrary choice when none are close"), - ], - ), - ( - vec!["OS", "PWD", "PWDPWDPWDPWD"], - vec![ - ("pwd", Some("PWD"), "Exact case insensitive match yields a match"), - ("pwdpwdpwdpwd", Some("PWDPWDPWDPWD"), "Exact case insensitive match yields a match"), - ("PWF", Some("PWD"), "One-letter typo yields a match"), - ("pwf", None, "Case difference plus typo yields no match"), - ("Xwdpwdpwdpwd", None, "Case difference plus typo yields no match"), - ] - ), - ( - vec!["foo", "bar", "baz"], - vec![ - ("fox", Some("foo"), ""), - ("FOO", Some("foo"), ""), - ("FOX", None, ""), - ( - "ccc", - None, - "Not helpful to suggest an arbitrary choice when none are close", - ), - ( - "zzz", - None, - "'baz' does share a character, but rustc rule is edit distance must be <= 1/3 of the length of the user input", - ), - ], - ), - ( - vec!["aaaaaa"], - vec![ - ("XXaaaa", Some("aaaaaa"), "Distance of 2 out of 6 chars: close enough to meet rustc's rule"), - ("XXXaaa", None, "Distance of 3 out of 6 chars: not close enough to meet rustc's rule"), - ("XaaaaX", Some("aaaaaa"), "Distance of 2 out of 6 chars: close enough to meet rustc's rule"), - ("XXaaaaXX", None, "Distance of 4 out of 6 chars: not close enough to meet rustc's rule") - ] - ), - ]; - for (possibilities, cases) in all_cases { - for (input, expected_suggestion, discussion) in cases { - let suggestion = did_you_mean(&possibilities, input); - assert_eq!( - suggestion.as_deref(), - expected_suggestion, - "Expected the following reasoning to hold but it did not: '{discussion}'" - ); - } - } - } -}