Improve case insensitivity consistency (#10884)

# Description

Add an extension trait `IgnoreCaseExt` to nu_utils which adds some case
insensitivity helpers, and use them throughout nu to improve the
handling of case insensitivity. Proper case folding is done via unicase,
which is already a dependency via mime_guess from nu-command.

In actuality a lot of code still does `to_lowercase`, because unicase
only provides immediate comparison and doesn't expose a `to_folded_case`
yet. And since we do a lot of `contains`/`starts_with`/`ends_with`, it's
not sufficient to just have `eq_ignore_case`. But if we get access in
the future, this makes us ready to use it with a change in one place.

Plus, it's clearer what the purpose is at the call site to call
`to_folded_case` instead of `to_lowercase` if it's exclusively for the
purpose of case insensitive comparison, even if it just does
`to_lowercase` still.

# User-Facing Changes

- Some commands that were supposed to be case insensitive remained only
insensitive to ASCII case (a-z), and now are case insensitive w.r.t.
non-ASCII characters as well.

# Tests + Formatting

- 🟢 `toolkit fmt`
- 🟢 `toolkit clippy`
- 🟢 `toolkit test`
- 🟢 `toolkit test stdlib`

---------

Co-authored-by: Stefan Holderbach <sholderbach@users.noreply.github.com>
This commit is contained in:
Christopher Durham
2023-11-08 17:58:54 -05:00
committed by GitHub
parent aed4b626b8
commit 0f600bc3f5
35 changed files with 176 additions and 122 deletions

View File

@ -143,7 +143,7 @@ fn fill(
let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
let alignment = if let Some(arg) = alignment_arg {
match arg.to_lowercase().as_str() {
match arg.to_ascii_lowercase().as_str() {
"l" | "left" => FillAlignment::Left,
"r" | "right" => FillAlignment::Right,
"c" | "center" | "m" | "middle" => FillAlignment::Middle,

View File

@ -116,13 +116,13 @@ fn into_bool(
}
fn string_to_boolean(s: &str, span: Span) -> Result<bool, ShellError> {
match s.trim().to_lowercase().as_str() {
match s.trim().to_ascii_lowercase().as_str() {
"true" => Ok(true),
"false" => Ok(false),
o => {
let val = o.parse::<f64>();
match val {
Ok(f) => Ok(f.abs() >= f64::EPSILON),
Ok(f) => Ok(f != 0.0),
Err(_) => Err(ShellError::CantConvert {
to_type: "boolean".to_string(),
from_type: "string".to_string(),

View File

@ -46,7 +46,7 @@ impl Zone {
}
}
fn from_string(s: String) -> Self {
match s.to_lowercase().as_str() {
match s.to_ascii_lowercase().as_str() {
"utc" | "u" => Self::Utc,
"local" | "l" => Self::Local,
_ => Self::Error,

View File

@ -26,7 +26,7 @@ pub fn datetime_in_timezone(
None => Err(ParseErrorKind::OutOfRange),
},
Err(ParseErrorKind::Invalid) => {
if s.to_lowercase() == "local" {
if s.eq_ignore_ascii_case("local") {
Ok(dt.with_timezone(Local::now().offset()))
} else {
let tz: Tz = parse_timezone_internal(s)?;

View File

@ -10,6 +10,7 @@ use nu_protocol::{
record, Category, Config, Example, IntoInterruptiblePipelineData, IntoPipelineData, ListStream,
PipelineData, Record, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use nu_utils::IgnoreCaseExt;
#[derive(Clone)]
pub struct Find;
@ -318,7 +319,9 @@ fn highlight_terms_in_record_with_search_columns(
}
fn contains_ignore_case(string: &str, substring: &str) -> bool {
string.to_lowercase().contains(&substring.to_lowercase())
string
.to_folded_case()
.contains(&substring.to_folded_case())
}
fn find_with_rest_and_highlight(

View File

@ -5,6 +5,7 @@ use nu_protocol::{
record, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData,
Record, ShellError, Signature, Span, Type, Value,
};
use nu_utils::IgnoreCaseExt;
use std::cmp::Ordering;
#[derive(Clone)]
@ -220,14 +221,14 @@ fn sort_record(
b.0.clone()
};
// Convert to lowercase if case-insensitive
// Fold case if case-insensitive
let left = if insensitive {
left_res.to_ascii_lowercase()
left_res.to_folded_case()
} else {
left_res
};
let right = if insensitive {
right_res.to_ascii_lowercase()
right_res.to_folded_case()
} else {
right_res
};
@ -235,7 +236,7 @@ fn sort_record(
if natural {
compare_str(left, right)
} else {
left.partial_cmp(&right).unwrap_or(Ordering::Equal)
left.cmp(&right)
}
});
@ -262,28 +263,24 @@ pub fn sort(
let span_a = a.span();
let span_b = b.span();
if insensitive {
let lowercase_left = match a {
Value::String { val, .. } => {
Value::string(val.to_ascii_lowercase(), span_a)
}
let folded_left = match a {
Value::String { val, .. } => Value::string(val.to_folded_case(), span_a),
_ => a.clone(),
};
let lowercase_right = match b {
Value::String { val, .. } => {
Value::string(val.to_ascii_lowercase(), span_b)
}
let folded_right = match b {
Value::String { val, .. } => Value::string(val.to_folded_case(), span_b),
_ => b.clone(),
};
if natural {
match (lowercase_left.as_string(), lowercase_right.as_string()) {
match (folded_left.as_string(), folded_right.as_string()) {
(Ok(left), Ok(right)) => compare_str(left, right),
_ => Ordering::Equal,
}
} else {
lowercase_left
.partial_cmp(&lowercase_right)
folded_left
.partial_cmp(&folded_right)
.unwrap_or(Ordering::Equal)
}
} else if natural {
@ -326,23 +323,23 @@ pub fn process(
let result = if insensitive {
let span_left = left_res.span();
let span_right = right_res.span();
let lowercase_left = match left_res {
Value::String { val, .. } => Value::string(val.to_ascii_lowercase(), span_left),
let folded_left = match left_res {
Value::String { val, .. } => Value::string(val.to_folded_case(), span_left),
_ => left_res,
};
let lowercase_right = match right_res {
Value::String { val, .. } => Value::string(val.to_ascii_lowercase(), span_right),
let folded_right = match right_res {
Value::String { val, .. } => Value::string(val.to_folded_case(), span_right),
_ => right_res,
};
if natural {
match (lowercase_left.as_string(), lowercase_right.as_string()) {
match (folded_left.as_string(), folded_right.as_string()) {
(Ok(left), Ok(right)) => compare_str(left, right),
_ => Ordering::Equal,
}
} else {
lowercase_left
.partial_cmp(&lowercase_right)
folded_left
.partial_cmp(&folded_right)
.unwrap_or(Ordering::Equal)
}
} else {

View File

@ -6,6 +6,7 @@ use nu_protocol::{
record, Category, Example, IntoPipelineData, PipelineData, PipelineMetadata, ShellError,
Signature, Span, Type, Value,
};
use nu_utils::IgnoreCaseExt;
use std::collections::hash_map::IntoIter;
use std::collections::HashMap;
@ -172,7 +173,7 @@ impl ValueCounter {
ValueCounter {
val,
val_to_compare: if flag_ignore_case {
clone_to_lowercase(&vals_to_compare.with_span(Span::unknown()))
clone_to_folded_case(&vals_to_compare.with_span(Span::unknown()))
} else {
vals_to_compare.with_span(Span::unknown())
},
@ -182,17 +183,17 @@ impl ValueCounter {
}
}
fn clone_to_lowercase(value: &Value) -> Value {
fn clone_to_folded_case(value: &Value) -> Value {
let span = value.span();
match value {
Value::String { val: s, .. } => Value::string(s.clone().to_lowercase(), span),
Value::String { val: s, .. } => Value::string(s.clone().to_folded_case(), span),
Value::List { vals: vec, .. } => {
Value::list(vec.iter().map(clone_to_lowercase).collect(), span)
Value::list(vec.iter().map(clone_to_folded_case).collect(), span)
}
Value::Record { val: record, .. } => Value::record(
record
.iter()
.map(|(k, v)| (k.to_owned(), clone_to_lowercase(v)))
.map(|(k, v)| (k.to_owned(), clone_to_folded_case(v)))
.collect(),
span,
),

View File

@ -150,9 +150,9 @@ used as the next argument to the closure, otherwise generation stops.
let mut err = None;
for (k, v) in iter {
if k.to_lowercase() == "out" {
if k.eq_ignore_ascii_case("out") {
out = Some(v);
} else if k.to_lowercase() == "next" {
} else if k.eq_ignore_ascii_case("next") {
next = Some(v);
} else {
let error = ShellError::GenericError(

View File

@ -162,9 +162,9 @@ used as the next argument to the closure, otherwise generation stops.
let mut err = None;
for (k, v) in iter {
if k.to_lowercase() == "out" {
if k.eq_ignore_ascii_case("out") {
out = Some(v);
} else if k.to_lowercase() == "next" {
} else if k.eq_ignore_ascii_case("next") {
next = Some(v);
} else {
let error = ShellError::GenericError(

View File

@ -10,6 +10,7 @@ use nu_protocol::{
span, Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value,
};
use nu_utils::IgnoreCaseExt;
#[derive(Clone)]
pub struct Help;
@ -144,7 +145,7 @@ pub fn highlight_search_in_table(
highlight_style: &Style,
) -> Result<Vec<Value>, ShellError> {
let orig_search_string = search_string;
let search_string = search_string.to_lowercase();
let search_string = search_string.to_folded_case();
let mut matches = vec![];
for record in table {
@ -168,7 +169,7 @@ pub fn highlight_search_in_table(
}
let span = val.span();
if let Value::String { val: s, .. } = val {
if s.to_lowercase().contains(&search_string) {
if s.to_folded_case().contains(&search_string) {
*val = Value::string(
highlight_search_string(
s,

View File

@ -134,7 +134,7 @@ fn build_help_commands(engine_state: &EngineState, span: Span) -> Vec<Value> {
let usage = sig.usage;
let search_terms = sig.search_terms;
let command_type = format!("{:?}", decl.command_type()).to_lowercase();
let command_type = format!("{:?}", decl.command_type()).to_ascii_lowercase();
// Build table of parameters
let param_table = {

View File

@ -345,9 +345,9 @@ fn get_keycode_name(head: Span, code: &KeyCode) -> (Value, Value) {
let (typ, code) = match code {
KeyCode::F(n) => ("f", n.to_string()),
KeyCode::Char(c) => ("char", c.to_string()),
KeyCode::Media(m) => ("media", format!("{m:?}").to_lowercase()),
KeyCode::Modifier(m) => ("modifier", format!("{m:?}").to_lowercase()),
_ => ("other", format!("{code:?}").to_lowercase()),
KeyCode::Media(m) => ("media", format!("{m:?}").to_ascii_lowercase()),
KeyCode::Modifier(m) => ("modifier", format!("{m:?}").to_ascii_lowercase()),
_ => ("other", format!("{code:?}").to_ascii_lowercase()),
};
(Value::string(typ, head), Value::string(code, head))
}
@ -365,7 +365,7 @@ fn parse_modifiers(head: Span, modifiers: &KeyModifiers) -> Value {
let parsed_modifiers = ALL_MODIFIERS
.iter()
.filter(|m| modifiers.contains(**m))
.map(|m| format!("{m:?}").to_lowercase())
.map(|m| format!("{m:?}").to_ascii_lowercase())
.map(|string| Value::string(string, head))
.collect();

View File

@ -1,6 +1,7 @@
use alphanumeric_sort::compare_str;
use nu_engine::column::nonexistent_column;
use nu_protocol::{ShellError, Span, Value};
use nu_utils::IgnoreCaseExt;
use std::cmp::Ordering;
// This module includes sorting functionality that is useful in sort-by and elsewhere.
@ -125,28 +126,24 @@ pub fn sort(
if insensitive {
let span_a = a.span();
let span_b = b.span();
let lowercase_left = match a {
Value::String { val, .. } => {
Value::string(val.to_ascii_lowercase(), span_a)
}
let folded_left = match a {
Value::String { val, .. } => Value::string(val.to_folded_case(), span_a),
_ => a.clone(),
};
let lowercase_right = match b {
Value::String { val, .. } => {
Value::string(val.to_ascii_lowercase(), span_b)
}
let folded_right = match b {
Value::String { val, .. } => Value::string(val.to_folded_case(), span_b),
_ => b.clone(),
};
if natural {
match (lowercase_left.as_string(), lowercase_right.as_string()) {
match (folded_left.as_string(), folded_right.as_string()) {
(Ok(left), Ok(right)) => compare_str(left, right),
_ => Ordering::Equal,
}
} else {
lowercase_left
.partial_cmp(&lowercase_right)
folded_left
.partial_cmp(&folded_right)
.unwrap_or(Ordering::Equal)
}
} else if natural {
@ -189,23 +186,23 @@ pub fn compare(
let result = if insensitive {
let span_left = left_res.span();
let span_right = right_res.span();
let lowercase_left = match left_res {
Value::String { val, .. } => Value::string(val.to_ascii_lowercase(), span_left),
let folded_left = match left_res {
Value::String { val, .. } => Value::string(val.to_folded_case(), span_left),
_ => left_res,
};
let lowercase_right = match right_res {
Value::String { val, .. } => Value::string(val.to_ascii_lowercase(), span_right),
let folded_right = match right_res {
Value::String { val, .. } => Value::string(val.to_folded_case(), span_right),
_ => right_res,
};
if natural {
match (lowercase_left.as_string(), lowercase_right.as_string()) {
match (folded_left.as_string(), folded_right.as_string()) {
(Ok(left), Ok(right)) => compare_str(left, right),
_ => Ordering::Equal,
}
} else {
lowercase_left
.partial_cmp(&lowercase_right)
folded_left
.partial_cmp(&folded_right)
.unwrap_or(Ordering::Equal)
}
} else if natural {

View File

@ -28,7 +28,7 @@ pub fn decode(
bytes: &[u8],
) -> Result<Value, ShellError> {
// Workaround for a bug in the Encodings Specification.
let encoding = if encoding_name.item.to_lowercase() == "utf16" {
let encoding = if encoding_name.item.eq_ignore_ascii_case("utf16") {
parse_encoding(encoding_name.span, "utf-16")
} else {
parse_encoding(encoding_name.span, &encoding_name.item)
@ -45,7 +45,7 @@ pub fn encode(
ignore_errors: bool,
) -> Result<Value, ShellError> {
// Workaround for a bug in the Encodings Specification.
let encoding = if encoding_name.item.to_lowercase() == "utf16" {
let encoding = if encoding_name.item.eq_ignore_ascii_case("utf16") {
parse_encoding(encoding_name.span, "utf-16")
} else {
parse_encoding(encoding_name.span, &encoding_name.item)
@ -69,7 +69,7 @@ pub fn encode(
fn parse_encoding(span: Span, label: &str) -> Result<&'static Encoding, ShellError> {
// Workaround for a bug in the Encodings Specification.
let label = if label.to_lowercase() == "utf16" {
let label = if label.eq_ignore_ascii_case("utf16") {
"utf-16"
} else {
label

View File

@ -7,6 +7,7 @@ use nu_protocol::record;
use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use nu_utils::IgnoreCaseExt;
#[derive(Clone)]
pub struct SubCommand;
@ -153,11 +154,11 @@ fn action(
match case_insensitive {
true => {
if *not_contain {
!val.to_lowercase()
.contains(substring.to_lowercase().as_str())
!val.to_folded_case()
.contains(substring.to_folded_case().as_str())
} else {
val.to_lowercase()
.contains(substring.to_lowercase().as_str())
val.to_folded_case()
.contains(substring.to_folded_case().as_str())
}
}
false => {

View File

@ -5,6 +5,7 @@ use nu_protocol::ast::CellPath;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::Category;
use nu_protocol::{Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value};
use nu_utils::IgnoreCaseExt;
struct Arguments {
substring: String,
@ -98,7 +99,8 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
match input {
Value::String { val: s, .. } => {
let ends_with = if args.case_insensitive {
s.to_lowercase().ends_with(&args.substring.to_lowercase())
s.to_folded_case()
.ends_with(&args.substring.to_folded_case())
} else {
s.ends_with(&args.substring)
};

View File

@ -6,6 +6,7 @@ use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::Category;
use nu_protocol::Spanned;
use nu_protocol::{Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value};
use nu_utils::IgnoreCaseExt;
struct Arguments {
substring: String,
@ -111,7 +112,7 @@ fn action(
match input {
Value::String { val: s, .. } => {
let starts_with = if *case_insensitive {
s.to_lowercase().starts_with(&substring.to_lowercase())
s.to_folded_case().starts_with(&substring.to_folded_case())
} else {
s.starts_with(substring)
};

View File

@ -9,6 +9,7 @@ use nu_protocol::{
SyntaxShape, Type, Value,
};
use nu_system::ForegroundProcess;
use nu_utils::IgnoreCaseExt;
use os_pipe::PipeReader;
use pathdiff::diff_paths;
use std::collections::HashMap;
@ -223,10 +224,10 @@ impl ExternalCommand {
const CMD_INTERNAL_COMMANDS: [&str; 9] = [
"ASSOC", "CLS", "ECHO", "FTYPE", "MKLINK", "PAUSE", "START", "VER", "VOL",
];
let command_name_upper = self.name.item.to_uppercase();
let command_name = &self.name.item;
let looks_like_cmd_internal = CMD_INTERNAL_COMMANDS
.iter()
.any(|&cmd| command_name_upper == cmd);
.any(|&cmd| command_name.eq_ignore_ascii_case(cmd));
if looks_like_cmd_internal {
let (cmd, new_reader) = self.create_process(&input, true, head)?;
@ -252,9 +253,10 @@ impl ExternalCommand {
which::which_in(&self.name.item, Some(path_with_cwd), cwd)
{
if let Some(file_name) = which_path.file_name() {
let file_name_upper =
file_name.to_string_lossy().to_uppercase();
if file_name_upper != command_name_upper {
if !file_name
.to_string_lossy()
.eq_ignore_case(command_name)
{
// which-rs found an executable file with a slightly different name
// than the one the user tried. Let's try running it
let mut new_command = self.clone();
@ -767,11 +769,11 @@ fn trim_expand_and_apply_arg(
/// Given an invalid command name, try to suggest an alternative
fn suggest_command(attempted_command: &str, engine_state: &EngineState) -> Option<String> {
let commands = engine_state.get_signatures(false);
let command_name_lower = attempted_command.to_lowercase();
let command_folded_case = attempted_command.to_folded_case();
let search_term_match = commands.iter().find(|sig| {
sig.search_terms
.iter()
.any(|term| term.to_lowercase() == command_name_lower)
.any(|term| term.to_folded_case() == command_folded_case)
});
match search_term_match {
Some(sig) => Some(sig.name.clone()),