Parser panic for signature with multibyte char for short flag #8821 (#8849)

Hey I'm a developer and I'm still new to nushell and rust but I would
like to learn more about both. This is my first PR for this project.

The intent of my change is to allow to use multibyte utf-8 characters in
commands short flags.
This commit is contained in:
federico viscomi 2023-04-10 22:52:51 +01:00 committed by GitHub
parent c3678764b4
commit eb4d19fb9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 66 additions and 47 deletions

View File

@ -32,6 +32,7 @@ use log::trace;
use std::{ use std::{
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
num::ParseIntError, num::ParseIntError,
str,
}; };
#[cfg(feature = "plugin")] #[cfg(feature = "plugin")]
@ -451,12 +452,13 @@ fn parse_short_flags(
let arg_contents = working_set.get_span_contents(arg_span); let arg_contents = working_set.get_span_contents(arg_span);
if arg_contents.starts_with(b"-") && arg_contents.len() > 1 { if let Ok(arg_contents_uft8_ref) = str::from_utf8(arg_contents) {
let short_flags = &arg_contents[1..]; if arg_contents_uft8_ref.starts_with('-') && arg_contents_uft8_ref.len() > 1 {
let short_flags = &arg_contents_uft8_ref[1..];
let mut found_short_flags = vec![]; let mut found_short_flags = vec![];
let mut unmatched_short_flags = vec![]; let mut unmatched_short_flags = vec![];
for short_flag in short_flags.iter().enumerate() { for short_flag in short_flags.chars().enumerate() {
let short_flag_char = char::from(*short_flag.1); let short_flag_char = short_flag.1;
let orig = arg_span; let orig = arg_span;
let short_flag_span = Span::new( let short_flag_span = Span::new(
orig.start + 1 + short_flag.0, orig.start + 1 + short_flag.0,
@ -479,7 +481,9 @@ fn parse_short_flags(
// check to see if we have a negative number // check to see if we have a negative number
if let Some(positional) = sig.get_positional(positional_idx) { if let Some(positional) = sig.get_positional(positional_idx) {
if positional.shape == SyntaxShape::Int || positional.shape == SyntaxShape::Number { if positional.shape == SyntaxShape::Int
|| positional.shape == SyntaxShape::Number
{
if String::from_utf8_lossy(arg_contents).parse::<f64>().is_ok() { if String::from_utf8_lossy(arg_contents).parse::<f64>().is_ok() {
return None; return None;
} else if let Some(first) = unmatched_short_flags.first() { } else if let Some(first) = unmatched_short_flags.first() {
@ -525,6 +529,10 @@ fn parse_short_flags(
} else { } else {
None None
} }
} else {
working_set.error(ParseError::NonUtf8(arg_span));
None
}
} }
fn first_kw_idx( fn first_kw_idx(

View File

@ -237,3 +237,14 @@ fn numberlike_command_name() {
assert!(actual.err.contains("backticks")); assert!(actual.err.contains("backticks"));
} }
#[test]
fn call_command_with_non_ascii_argument() {
let actual = nu!(cwd: "tests/parsing/samples",
r#"
def nu-arg [--umlaut(-ö): int] {}
nu-arg -ö 42
"#);
assert_eq!(actual.err.len(), 0);
}