mirror of
https://github.com/nushell/nushell.git
synced 2025-08-16 17:51:42 +02:00
feat!: Explicit cell-path case sensitivity syntax (#15692)
Related: - #15683 - #14551 - #849 - #12701 - #11527 # Description Currently various commands have differing behavior regarding cell-paths ```nushell {a: 1, A: 2} | get a A # => ╭───┬───╮ # => │ 0 │ 2 │ # => │ 1 │ 2 │ # => ╰───┴───╯ {a: 1, A: 2} | select a A # => ╭───┬───╮ # => │ a │ 1 │ # => │ A │ 2 │ # => ╰───┴───╯ {A: 1} | update a 2 # => Error: nu:🐚:column_not_found # => # => × Cannot find column 'a' # => ╭─[entry #62:1:1] # => 1 │ {A: 1} | update a 2 # => · ───┬── ┬ # => · │ ╰── cannot find column 'a' # => · ╰── value originates here # => ╰──── ``` Proposal: making cell-path access case-sensitive by default and adding new syntax for case-insensitive parts, similar to optional (?) parts. ```nushell {FOO: BAR}.foo # => Error: nu:🐚:name_not_found # => # => × Name not found # => ╭─[entry #60:1:21] # => 1 │ {FOO: BAR}.foo # => · ─┬─ # => · ╰── did you mean 'FOO'? # => ╰──── {FOO: BAR}.foo! # => BAR ``` This would solve the problem of case sensitivity for all commands without causing an explosion of flags _and_ make it more granular Assigning to a field using a case-insensitive path is case-preserving. ```nushell mut val = {FOO: "I'm FOO"}; $val # => ╭─────┬─────────╮ # => │ FOO │ I'm FOO │ # => ╰─────┴─────────╯ $val.foo! = "I'm still FOO"; $val # => ╭─────┬───────────────╮ # => │ FOO │ I'm still FOO │ # => ╰─────┴───────────────╯ ``` For `update`, case-insensitive is case-preserving. ```nushell {FOO: 1} | update foo! { $in + 1 } # => ╭─────┬───╮ # => │ FOO │ 2 │ # => ╰─────┴───╯ ``` `insert` can insert values into nested values so accessing into existing columns is case-insensitive, but creating new columns uses the cell-path as it is. So `insert foo! ...` and `insert FOO! ...` would work exactly as they do without `!` ```nushell {FOO: {quox: 0}} # => ╭─────┬──────────────╮ # => │ │ ╭──────┬───╮ │ # => │ FOO │ │ quox │ 0 │ │ # => │ │ ╰──────┴───╯ │ # => ╰─────┴──────────────╯ {FOO: {quox: 0}} | insert foo.bar 1 # => ╭─────┬──────────────╮ # => │ │ ╭──────┬───╮ │ # => │ FOO │ │ quox │ 0 │ │ # => │ │ ╰──────┴───╯ │ # => │ │ ╭─────┬───╮ │ # => │ foo │ │ bar │ 1 │ │ # => │ │ ╰─────┴───╯ │ # => ╰─────┴──────────────╯ {FOO: {quox: 0}} | insert foo!.bar 1 # => ╭─────┬──────────────╮ # => │ │ ╭──────┬───╮ │ # => │ FOO │ │ quox │ 0 │ │ # => │ │ │ bar │ 1 │ │ # => │ │ ╰──────┴───╯ │ # => ╰─────┴──────────────╯ ``` `upsert` is tricky, depending on the input, the data might end up with different column names in rows. We can either forbid case-insensitive cell-paths for `upsert` or trust the user to keep their data in a sensible shape. This would be a breaking change as it would make existing cell-path accesses case-sensitive, however the case-sensitivity is already inconsistent and any attempt at making it consistent would be a breaking change. > What about `$env`? 1. Initially special case it so it keeps its current behavior. 2. Accessing environment variables with non-matching paths gives a deprecation warning urging users to either use exact casing or use the new explicit case-sensitivity syntax 3. Eventuall remove `$env`'s special case, making `$env` accesses case-sensitive by default as well. > `$env.ENV_CONVERSIONS`? In addition to `from_string` and `to_string` add an optional field to opt into case insensitive/preserving behavior. # User-Facing Changes - `get`, `where` and other previously case-insensitive commands are now case-sensitive by default. - `get`'s `--sensitive` flag removed, similar to `--ignore-errors` there is now an `--ignore-case` flag that treats all parts of the cell-path as case-insensitive. - Users can explicitly choose the case case-sensitivity of cell-path accesses or commands. # Tests + Formatting Existing tests required minimal modification. ***However, new tests are not yet added***. - 🟢 toolkit fmt - 🟢 toolkit clippy - 🟢 toolkit test - 🟢 toolkit test stdlib # After Submitting - Update the website to include the new syntax - Update [tree-sitter-nu](https://github.com/nushell/tree-sitter-nu) --------- Co-authored-by: Bahex <17417311+Bahex@users.noreply.github.com>
This commit is contained in:
@ -15,7 +15,7 @@ use nu_engine::DIR_VAR_PARSER_INFO;
|
||||
use nu_protocol::{
|
||||
BlockId, DeclId, DidYouMean, ENV_VARIABLE_ID, FilesizeUnit, Flag, IN_VARIABLE_ID, ParseError,
|
||||
PositionalArg, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, VarId, ast::*,
|
||||
engine::StateWorkingSet, eval_const::eval_constant,
|
||||
casing::Casing, engine::StateWorkingSet, eval_const::eval_constant,
|
||||
};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
@ -1799,7 +1799,7 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Option<Expr
|
||||
&contents[..dotdot_pos[0]],
|
||||
span.start,
|
||||
&[],
|
||||
&[b'.', b'?'],
|
||||
&[b'.', b'?', b'!'],
|
||||
true,
|
||||
);
|
||||
if let Some(_err) = err {
|
||||
@ -2317,9 +2317,55 @@ pub fn parse_cell_path(
|
||||
expect_dot: bool,
|
||||
) -> Vec<PathMember> {
|
||||
enum TokenType {
|
||||
Dot, // .
|
||||
QuestionOrDot, // ? or .
|
||||
PathMember, // an int or string, like `1` or `foo`
|
||||
Dot, // .
|
||||
DotOrSign, // . or ? or !
|
||||
DotOrExclamation, // . or !
|
||||
DotOrQuestion, // . or ?
|
||||
PathMember, // an int or string, like `1` or `foo`
|
||||
}
|
||||
|
||||
enum ModifyMember {
|
||||
No,
|
||||
Optional,
|
||||
Insensitive,
|
||||
}
|
||||
|
||||
impl TokenType {
|
||||
fn expect(&mut self, byte: u8) -> Result<ModifyMember, &'static str> {
|
||||
match (&*self, byte) {
|
||||
(Self::PathMember, _) => {
|
||||
*self = Self::DotOrSign;
|
||||
Ok(ModifyMember::No)
|
||||
}
|
||||
(
|
||||
Self::Dot | Self::DotOrSign | Self::DotOrExclamation | Self::DotOrQuestion,
|
||||
b'.',
|
||||
) => {
|
||||
*self = Self::PathMember;
|
||||
Ok(ModifyMember::No)
|
||||
}
|
||||
(Self::DotOrSign, b'!') => {
|
||||
*self = Self::DotOrQuestion;
|
||||
Ok(ModifyMember::Insensitive)
|
||||
}
|
||||
(Self::DotOrSign, b'?') => {
|
||||
*self = Self::DotOrExclamation;
|
||||
Ok(ModifyMember::Optional)
|
||||
}
|
||||
(Self::DotOrSign, _) => Err(". or ! or ?"),
|
||||
(Self::DotOrExclamation, b'!') => {
|
||||
*self = Self::Dot;
|
||||
Ok(ModifyMember::Insensitive)
|
||||
}
|
||||
(Self::DotOrExclamation, _) => Err(". or !"),
|
||||
(Self::DotOrQuestion, b'?') => {
|
||||
*self = Self::Dot;
|
||||
Ok(ModifyMember::Optional)
|
||||
}
|
||||
(Self::DotOrQuestion, _) => Err(". or ?"),
|
||||
(Self::Dot, _) => Err("."),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parsing a cell path is essentially a state machine, and this is the state
|
||||
@ -2334,69 +2380,68 @@ pub fn parse_cell_path(
|
||||
for path_element in tokens {
|
||||
let bytes = working_set.get_span_contents(path_element.span);
|
||||
|
||||
match expected_token {
|
||||
TokenType::Dot => {
|
||||
if bytes.len() != 1 || bytes[0] != b'.' {
|
||||
working_set.error(ParseError::Expected(".", path_element.span));
|
||||
return tail;
|
||||
}
|
||||
expected_token = TokenType::PathMember;
|
||||
}
|
||||
TokenType::QuestionOrDot => {
|
||||
if bytes.len() == 1 && bytes[0] == b'.' {
|
||||
expected_token = TokenType::PathMember;
|
||||
} else if bytes.len() == 1 && bytes[0] == b'?' {
|
||||
if let Some(last) = tail.last_mut() {
|
||||
match last {
|
||||
PathMember::String { optional, .. } => *optional = true,
|
||||
PathMember::Int { optional, .. } => *optional = true,
|
||||
}
|
||||
}
|
||||
expected_token = TokenType::Dot;
|
||||
} else {
|
||||
working_set.error(ParseError::Expected(". or ?", path_element.span));
|
||||
return tail;
|
||||
}
|
||||
}
|
||||
TokenType::PathMember => {
|
||||
let starting_error_count = working_set.parse_errors.len();
|
||||
// both parse_int and parse_string require their source to be non-empty
|
||||
// all cases where `bytes` is empty is an error
|
||||
let Some((&first, rest)) = bytes.split_first() else {
|
||||
working_set.error(ParseError::Expected("string", path_element.span));
|
||||
return tail;
|
||||
};
|
||||
let single_char = rest.is_empty();
|
||||
|
||||
let expr = parse_int(working_set, path_element.span);
|
||||
working_set.parse_errors.truncate(starting_error_count);
|
||||
if let TokenType::PathMember = expected_token {
|
||||
let starting_error_count = working_set.parse_errors.len();
|
||||
|
||||
match expr {
|
||||
Expression {
|
||||
expr: Expr::Int(val),
|
||||
span,
|
||||
..
|
||||
} => tail.push(PathMember::Int {
|
||||
val: val as usize,
|
||||
span,
|
||||
optional: false,
|
||||
}),
|
||||
_ => {
|
||||
let result = parse_string(working_set, path_element.span);
|
||||
match result {
|
||||
Expression {
|
||||
expr: Expr::String(string),
|
||||
let expr = parse_int(working_set, path_element.span);
|
||||
working_set.parse_errors.truncate(starting_error_count);
|
||||
|
||||
match expr {
|
||||
Expression {
|
||||
expr: Expr::Int(val),
|
||||
span,
|
||||
..
|
||||
} => tail.push(PathMember::Int {
|
||||
val: val as usize,
|
||||
span,
|
||||
optional: false,
|
||||
}),
|
||||
_ => {
|
||||
let result = parse_string(working_set, path_element.span);
|
||||
match result {
|
||||
Expression {
|
||||
expr: Expr::String(string),
|
||||
span,
|
||||
..
|
||||
} => {
|
||||
tail.push(PathMember::String {
|
||||
val: string,
|
||||
span,
|
||||
..
|
||||
} => {
|
||||
tail.push(PathMember::String {
|
||||
val: string,
|
||||
span,
|
||||
optional: false,
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
working_set
|
||||
.error(ParseError::Expected("string", path_element.span));
|
||||
return tail;
|
||||
}
|
||||
optional: false,
|
||||
casing: Casing::Sensitive,
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
working_set.error(ParseError::Expected("string", path_element.span));
|
||||
return tail;
|
||||
}
|
||||
}
|
||||
}
|
||||
expected_token = TokenType::QuestionOrDot;
|
||||
}
|
||||
expected_token = TokenType::DotOrSign;
|
||||
} else {
|
||||
match expected_token.expect(if single_char { first } else { b' ' }) {
|
||||
Ok(modify) => {
|
||||
if let Some(last) = tail.last_mut() {
|
||||
match modify {
|
||||
ModifyMember::No => {}
|
||||
ModifyMember::Optional => last.make_optional(),
|
||||
ModifyMember::Insensitive => last.make_insensitive(),
|
||||
}
|
||||
};
|
||||
}
|
||||
Err(expected) => {
|
||||
working_set.error(ParseError::Expected(expected, path_element.span));
|
||||
return tail;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2407,7 +2452,13 @@ pub fn parse_cell_path(
|
||||
pub fn parse_simple_cell_path(working_set: &mut StateWorkingSet, span: Span) -> Expression {
|
||||
let source = working_set.get_span_contents(span);
|
||||
|
||||
let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.', b'?'], true);
|
||||
let (tokens, err) = lex(
|
||||
source,
|
||||
span.start,
|
||||
&[b'\n', b'\r'],
|
||||
&[b'.', b'?', b'!'],
|
||||
true,
|
||||
);
|
||||
if let Some(err) = err {
|
||||
working_set.error(err)
|
||||
}
|
||||
@ -2433,7 +2484,13 @@ pub fn parse_full_cell_path(
|
||||
let full_cell_span = span;
|
||||
let source = working_set.get_span_contents(span);
|
||||
|
||||
let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.', b'?'], true);
|
||||
let (tokens, err) = lex(
|
||||
source,
|
||||
span.start,
|
||||
&[b'\n', b'\r'],
|
||||
&[b'.', b'?', b'!'],
|
||||
true,
|
||||
);
|
||||
if let Some(err) = err {
|
||||
working_set.error(err)
|
||||
}
|
||||
|
Reference in New Issue
Block a user