improve parsing of values with units (#9190)

closes #9111 

# Description

this pr improves parsing of values with units (`filesizes`, `durations`
and any other **future values**) by:

1. allowing underscores in the value part
```nu
> 42kb          # okay
> 42_sec        # okay
> 1_000_000mib  # okay
> 69k_b         # not okay, underscores not allowed in the unit
```

2. improving error messages involving these values
```nu
> sleep 40-sec

# before
Error: nu::parser::parse_mismatch

  × Parse mismatch during operation.
   ╭─[entry #42:1:1]
 1 │ sleep 40-sec
   ·       ──┬──
   ·         ╰── expected duration with valid units
   ╰────

# now
Error:
  × duration value must be a number
   ╭─[entry #41:1:1]
 1 │ sleep 40-sec
   ·       ─┬─
   ·        ╰── not a number
   ╰────
```

3. unifying parsing of these values. now all of these use one function

# User-Facing Changes

filesizes and durations can now have underscores for readability
This commit is contained in:
mike 2023-05-18 02:54:35 +03:00 committed by GitHub
parent 6a0c88d516
commit 0e4729b203
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 195 additions and 190 deletions

View File

@ -1,5 +1,5 @@
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_parser::parse_duration_bytes; use nu_parser::{parse_unit_value, DURATION_UNIT_GROUPS};
use nu_protocol::{ use nu_protocol::{
ast::{Call, CellPath, Expr}, ast::{Call, CellPath, Expr},
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -409,7 +409,13 @@ fn convert_str_from_unit_to_unit(
} }
fn string_to_duration(s: &str, span: Span, value_span: Span) -> Result<i64, ShellError> { fn string_to_duration(s: &str, span: Span, value_span: Span) -> Result<i64, ShellError> {
if let Some(expression) = parse_duration_bytes(s.as_bytes(), span) { if let Some(Ok(expression)) = parse_unit_value(
s.as_bytes(),
span,
DURATION_UNIT_GROUPS,
Type::Duration,
|x| x,
) {
if let Expr::ValueWithUnit(value, unit) = expression.expr { if let Expr::ValueWithUnit(value, unit) = expression.expr {
if let Expr::Int(x) = value.expr { if let Expr::Int(x) = value.expr {
match unit.item { match unit.item {
@ -445,7 +451,13 @@ fn string_to_unit_duration(
span: Span, span: Span,
value_span: Span, value_span: Span,
) -> Result<(&str, i64), ShellError> { ) -> Result<(&str, i64), ShellError> {
if let Some(expression) = parse_duration_bytes(s.as_bytes(), span) { if let Some(Ok(expression)) = parse_unit_value(
s.as_bytes(),
span,
DURATION_UNIT_GROUPS,
Type::Duration,
|x| x,
) {
if let Expr::ValueWithUnit(value, unit) = expression.expr { if let Expr::ValueWithUnit(value, unit) = expression.expr {
if let Expr::Int(x) = value.expr { if let Expr::Int(x) = value.expr {
match unit.item { match unit.item {

View File

@ -19,8 +19,8 @@ pub use lite_parser::{lite_parse, LiteBlock, LiteElement};
pub use parse_keywords::*; pub use parse_keywords::*;
pub use parser::{ pub use parser::{
is_math_expression_like, parse, parse_block, parse_duration_bytes, parse_expression, is_math_expression_like, parse, parse_block, parse_expression, parse_external_call,
parse_external_call, trim_quotes, trim_quotes_str, unescape_unquote_string, parse_unit_value, trim_quotes, trim_quotes_str, unescape_unquote_string, DURATION_UNIT_GROUPS,
}; };
#[cfg(feature = "plugin")] #[cfg(feature = "plugin")]

View File

@ -2199,21 +2199,160 @@ pub fn parse_duration(working_set: &mut StateWorkingSet, span: Span) -> Expressi
let bytes = working_set.get_span_contents(span); let bytes = working_set.get_span_contents(span);
match parse_duration_bytes(bytes, span) { match parse_unit_value(bytes, span, DURATION_UNIT_GROUPS, Type::Duration, |x| x) {
Some(expression) => expression, Some(Ok(expr)) => expr,
Some(Err(mk_err_for)) => {
working_set.error(mk_err_for("duration"));
garbage(span)
}
None => { None => {
working_set.error(ParseError::Expected( working_set.error(ParseError::Expected(
"duration with valid units".into(), "duration with valid units".into(),
span, span,
)); ));
garbage(span) garbage(span)
} }
} }
} }
/// Parse a unit type, eg '10kb'
pub fn parse_filesize(working_set: &mut StateWorkingSet, span: Span) -> Expression {
trace!("parsing: filesize");
let bytes = working_set.get_span_contents(span);
match parse_unit_value(bytes, span, FILESIZE_UNIT_GROUPS, Type::Filesize, |x| {
x.to_uppercase()
}) {
Some(Ok(expr)) => expr,
Some(Err(mk_err_for)) => {
working_set.error(mk_err_for("filesize"));
garbage(span)
}
None => {
working_set.error(ParseError::Expected(
"filesize with valid units".into(),
span,
));
garbage(span)
}
}
}
type ParseUnitResult<'res> = Result<Expression, Box<dyn Fn(&'res str) -> ParseError>>;
type UnitGroup<'unit> = (Unit, &'unit str, Option<(Unit, i64)>);
pub fn parse_unit_value<'res>(
bytes: &[u8],
span: Span,
unit_groups: &[UnitGroup],
ty: Type,
transform: fn(String) -> String,
) -> Option<ParseUnitResult<'res>> {
if bytes.len() < 2
|| !(bytes[0].is_ascii_digit() || (bytes[0] == b'-' && bytes[1].is_ascii_digit()))
{
return None;
}
let value = transform(String::from_utf8_lossy(bytes).into());
if let Some((unit, name, convert)) = unit_groups.iter().find(|x| value.ends_with(x.1)) {
let lhs_len = value.len() - name.len();
let lhs = strip_underscores(value[..lhs_len].as_bytes());
let lhs_span = Span::new(span.start, span.start + lhs_len);
let unit_span = Span::new(span.start + lhs_len, span.end);
let (decimal_part, number_part) = modf(match lhs.parse::<f64>() {
Ok(it) => it,
Err(_) => {
let mk_err = move |name| {
ParseError::LabeledError(
format!("{name} value must be a number"),
"not a number".into(),
lhs_span,
)
};
return Some(Err(Box::new(mk_err)));
}
});
let (num, unit) = match convert {
Some(convert_to) => (
((number_part * convert_to.1 as f64) + (decimal_part * convert_to.1 as f64)) as i64,
convert_to.0,
),
None => (number_part as i64, *unit),
};
trace!("-- found {} {:?}", num, unit);
let expr = Expression {
expr: Expr::ValueWithUnit(
Box::new(Expression {
expr: Expr::Int(num),
span: lhs_span,
ty: Type::Number,
custom_completion: None,
}),
Spanned {
item: unit,
span: unit_span,
},
),
span,
ty,
custom_completion: None,
};
Some(Ok(expr))
} else {
None
}
}
pub const FILESIZE_UNIT_GROUPS: &[UnitGroup] = &[
(Unit::Kilobyte, "KB", Some((Unit::Byte, 1000))),
(Unit::Megabyte, "MB", Some((Unit::Kilobyte, 1000))),
(Unit::Gigabyte, "GB", Some((Unit::Megabyte, 1000))),
(Unit::Terabyte, "TB", Some((Unit::Gigabyte, 1000))),
(Unit::Petabyte, "PB", Some((Unit::Terabyte, 1000))),
(Unit::Exabyte, "EB", Some((Unit::Petabyte, 1000))),
(Unit::Zettabyte, "ZB", Some((Unit::Exabyte, 1000))),
(Unit::Kibibyte, "KIB", Some((Unit::Byte, 1024))),
(Unit::Mebibyte, "MIB", Some((Unit::Kibibyte, 1024))),
(Unit::Gibibyte, "GIB", Some((Unit::Mebibyte, 1024))),
(Unit::Tebibyte, "TIB", Some((Unit::Gibibyte, 1024))),
(Unit::Pebibyte, "PIB", Some((Unit::Tebibyte, 1024))),
(Unit::Exbibyte, "EIB", Some((Unit::Pebibyte, 1024))),
(Unit::Zebibyte, "ZIB", Some((Unit::Exbibyte, 1024))),
(Unit::Byte, "B", None),
];
pub const DURATION_UNIT_GROUPS: &[UnitGroup] = &[
(Unit::Nanosecond, "ns", None),
(Unit::Microsecond, "us", Some((Unit::Nanosecond, 1000))),
(
// µ Micro Sign
Unit::Microsecond,
"\u{00B5}s",
Some((Unit::Nanosecond, 1000)),
),
(
// μ Greek small letter Mu
Unit::Microsecond,
"\u{03BC}s",
Some((Unit::Nanosecond, 1000)),
),
(Unit::Millisecond, "ms", Some((Unit::Microsecond, 1000))),
(Unit::Second, "sec", Some((Unit::Millisecond, 1000))),
(Unit::Minute, "min", Some((Unit::Second, 60))),
(Unit::Hour, "hr", Some((Unit::Minute, 60))),
(Unit::Day, "day", Some((Unit::Minute, 1440))),
(Unit::Week, "wk", Some((Unit::Day, 7))),
];
// Borrowed from libm at https://github.com/rust-lang/libm/blob/master/src/math/modf.rs // Borrowed from libm at https://github.com/rust-lang/libm/blob/master/src/math/modf.rs
pub fn modf(x: f64) -> (f64, f64) { fn modf(x: f64) -> (f64, f64) {
let rv2: f64; let rv2: f64;
let mut u = x.to_bits(); let mut u = x.to_bits();
let e = ((u >> 52 & 0x7ff) as i32) - 0x3ff; let e = ((u >> 52 & 0x7ff) as i32) - 0x3ff;
@ -2247,187 +2386,6 @@ pub fn modf(x: f64) -> (f64, f64) {
(x - rv2, rv2) (x - rv2, rv2)
} }
pub fn parse_duration_bytes(num_with_unit_bytes: &[u8], span: Span) -> Option<Expression> {
if num_with_unit_bytes.is_empty()
|| (!num_with_unit_bytes[0].is_ascii_digit() && num_with_unit_bytes[0] != b'-')
{
return None;
}
let num_with_unit = String::from_utf8_lossy(num_with_unit_bytes).to_string();
let unit_groups = [
(Unit::Nanosecond, "ns", None),
(Unit::Microsecond, "us", Some((Unit::Nanosecond, 1000))),
(
// µ Micro Sign
Unit::Microsecond,
"\u{00B5}s",
Some((Unit::Nanosecond, 1000)),
),
(
// μ Greek small letter Mu
Unit::Microsecond,
"\u{03BC}s",
Some((Unit::Nanosecond, 1000)),
),
(Unit::Millisecond, "ms", Some((Unit::Microsecond, 1000))),
(Unit::Second, "sec", Some((Unit::Millisecond, 1000))),
(Unit::Minute, "min", Some((Unit::Second, 60))),
(Unit::Hour, "hr", Some((Unit::Minute, 60))),
(Unit::Day, "day", Some((Unit::Minute, 1440))),
(Unit::Week, "wk", Some((Unit::Day, 7))),
];
if let Some(unit) = unit_groups.iter().find(|&x| num_with_unit.ends_with(x.1)) {
let mut lhs = num_with_unit;
for _ in 0..unit.1.chars().count() {
lhs.pop();
}
let (decimal_part, number_part) = modf(match lhs.parse::<f64>() {
Ok(x) => x,
Err(_) => return None,
});
let (num, unit_to_use) = match unit.2 {
Some(unit_to_convert_to) => (
Some(
((number_part * unit_to_convert_to.1 as f64)
+ (decimal_part * unit_to_convert_to.1 as f64)) as i64,
),
unit_to_convert_to.0,
),
None => (Some(number_part as i64), unit.0),
};
if let Some(x) = num {
trace!("-- found {} {:?}", x, unit_to_use);
let lhs_span = Span::new(span.start, span.start + lhs.len());
let unit_span = Span::new(span.start + lhs.len(), span.end);
return Some(Expression {
expr: Expr::ValueWithUnit(
Box::new(Expression {
expr: Expr::Int(x),
span: lhs_span,
ty: Type::Number,
custom_completion: None,
}),
Spanned {
item: unit_to_use,
span: unit_span,
},
),
span,
ty: Type::Duration,
custom_completion: None,
});
}
}
None
}
/// Parse a unit type, eg '10kb'
pub fn parse_filesize(working_set: &mut StateWorkingSet, span: Span) -> Expression {
trace!("parsing: filesize");
let bytes = working_set.get_span_contents(span);
//todo: parse_filesize_bytes should distinguish between not-that-type and syntax error in units
match parse_filesize_bytes(bytes, span) {
Some(expression) => expression,
None => {
working_set.error(ParseError::Expected(
"filesize with valid units".into(),
span,
));
garbage(span)
}
}
}
pub fn parse_filesize_bytes(num_with_unit_bytes: &[u8], span: Span) -> Option<Expression> {
if num_with_unit_bytes.is_empty()
|| (!num_with_unit_bytes[0].is_ascii_digit() && num_with_unit_bytes[0] != b'-')
{
return None;
}
let num_with_unit = String::from_utf8_lossy(num_with_unit_bytes).to_string();
let uppercase_num_with_unit = num_with_unit.to_uppercase();
let unit_groups = [
(Unit::Kilobyte, "KB", Some((Unit::Byte, 1000))),
(Unit::Megabyte, "MB", Some((Unit::Kilobyte, 1000))),
(Unit::Gigabyte, "GB", Some((Unit::Megabyte, 1000))),
(Unit::Terabyte, "TB", Some((Unit::Gigabyte, 1000))),
(Unit::Petabyte, "PB", Some((Unit::Terabyte, 1000))),
(Unit::Exabyte, "EB", Some((Unit::Petabyte, 1000))),
(Unit::Zettabyte, "ZB", Some((Unit::Exabyte, 1000))),
(Unit::Kibibyte, "KIB", Some((Unit::Byte, 1024))),
(Unit::Mebibyte, "MIB", Some((Unit::Kibibyte, 1024))),
(Unit::Gibibyte, "GIB", Some((Unit::Mebibyte, 1024))),
(Unit::Tebibyte, "TIB", Some((Unit::Gibibyte, 1024))),
(Unit::Pebibyte, "PIB", Some((Unit::Tebibyte, 1024))),
(Unit::Exbibyte, "EIB", Some((Unit::Pebibyte, 1024))),
(Unit::Zebibyte, "ZIB", Some((Unit::Exbibyte, 1024))),
(Unit::Byte, "B", None),
];
if let Some(unit) = unit_groups
.iter()
.find(|&x| uppercase_num_with_unit.ends_with(x.1))
{
let mut lhs = num_with_unit;
for _ in 0..unit.1.len() {
lhs.pop();
}
let (decimal_part, number_part) = modf(match lhs.parse::<f64>() {
Ok(x) => x,
Err(_) => return None,
});
let (num, unit_to_use) = match unit.2 {
Some(unit_to_convert_to) => (
Some(
((number_part * unit_to_convert_to.1 as f64)
+ (decimal_part * unit_to_convert_to.1 as f64)) as i64,
),
unit_to_convert_to.0,
),
None => (Some(number_part as i64), unit.0),
};
if let Some(x) = num {
trace!("-- found {} {:?}", x, unit_to_use);
let lhs_span = Span::new(span.start, span.start + lhs.len());
let unit_span = Span::new(span.start + lhs.len(), span.end);
return Some(Expression {
expr: Expr::ValueWithUnit(
Box::new(Expression {
expr: Expr::Int(x),
span: lhs_span,
ty: Type::Number,
custom_completion: None,
}),
Spanned {
item: unit_to_use,
span: unit_span,
},
),
span,
ty: Type::Filesize,
custom_completion: None,
});
}
}
None
}
pub fn parse_glob_pattern(working_set: &mut StateWorkingSet, span: Span) -> Expression { pub fn parse_glob_pattern(working_set: &mut StateWorkingSet, span: Span) -> Expression {
let bytes = working_set.get_span_contents(span); let bytes = working_set.get_span_contents(span);
let (token, err) = unescape_unquote_string(bytes, span); let (token, err) = unescape_unquote_string(bytes, span);

View File

@ -526,3 +526,38 @@ fn extern_errors_with_no_space_between_params_and_name_1() -> TestResult {
fn extern_errors_with_no_space_between_params_and_name_2() -> TestResult { fn extern_errors_with_no_space_between_params_and_name_2() -> TestResult {
fail_test("extern cmd(--flag)", "expected space") fail_test("extern cmd(--flag)", "expected space")
} }
#[test]
fn duration_with_underscores_1() -> TestResult {
run_test("420_min", "7hr")
}
#[test]
fn duration_with_underscores_2() -> TestResult {
run_test("1_000_000sec", "1wk 4day 13hr 46min 40sec")
}
#[test]
fn duration_with_underscores_3() -> TestResult {
fail_test("1_000_d_ay", "executable was not found")
}
#[test]
fn duration_with_faulty_number() -> TestResult {
fail_test("sleep 4-ms", "duration value must be a number")
}
#[test]
fn filesize_with_underscores_1() -> TestResult {
run_test("420_mb", "400.5 MiB")
}
#[test]
fn filesize_with_underscores_2() -> TestResult {
run_test("1_000_000B", "976.6 KiB")
}
#[test]
fn filesize_with_underscores_3() -> TestResult {
fail_test("42m_b", "executable was not found")
}