mirror of
https://github.com/nushell/nushell.git
synced 2024-11-21 16:03:19 +01:00
improve parsing of values with units (#9190)
closes #9111 # Description this pr improves parsing of values with units (`filesizes`, `durations` and any other **future values**) by: 1. allowing underscores in the value part ```nu > 42kb # okay > 42_sec # okay > 1_000_000mib # okay > 69k_b # not okay, underscores not allowed in the unit ``` 2. improving error messages involving these values ```nu > sleep 40-sec # before Error: nu::parser::parse_mismatch × Parse mismatch during operation. ╭─[entry #42:1:1] 1 │ sleep 40-sec · ──┬── · ╰── expected duration with valid units ╰──── # now Error: × duration value must be a number ╭─[entry #41:1:1] 1 │ sleep 40-sec · ─┬─ · ╰── not a number ╰──── ``` 3. unifying parsing of these values. now all of these use one function # User-Facing Changes filesizes and durations can now have underscores for readability
This commit is contained in:
parent
6a0c88d516
commit
0e4729b203
@ -1,5 +1,5 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_parser::parse_duration_bytes;
|
||||
use nu_parser::{parse_unit_value, DURATION_UNIT_GROUPS};
|
||||
use nu_protocol::{
|
||||
ast::{Call, CellPath, Expr},
|
||||
engine::{Command, EngineState, Stack},
|
||||
@ -409,7 +409,13 @@ fn convert_str_from_unit_to_unit(
|
||||
}
|
||||
|
||||
fn string_to_duration(s: &str, span: Span, value_span: Span) -> Result<i64, ShellError> {
|
||||
if let Some(expression) = parse_duration_bytes(s.as_bytes(), span) {
|
||||
if let Some(Ok(expression)) = parse_unit_value(
|
||||
s.as_bytes(),
|
||||
span,
|
||||
DURATION_UNIT_GROUPS,
|
||||
Type::Duration,
|
||||
|x| x,
|
||||
) {
|
||||
if let Expr::ValueWithUnit(value, unit) = expression.expr {
|
||||
if let Expr::Int(x) = value.expr {
|
||||
match unit.item {
|
||||
@ -445,7 +451,13 @@ fn string_to_unit_duration(
|
||||
span: Span,
|
||||
value_span: Span,
|
||||
) -> Result<(&str, i64), ShellError> {
|
||||
if let Some(expression) = parse_duration_bytes(s.as_bytes(), span) {
|
||||
if let Some(Ok(expression)) = parse_unit_value(
|
||||
s.as_bytes(),
|
||||
span,
|
||||
DURATION_UNIT_GROUPS,
|
||||
Type::Duration,
|
||||
|x| x,
|
||||
) {
|
||||
if let Expr::ValueWithUnit(value, unit) = expression.expr {
|
||||
if let Expr::Int(x) = value.expr {
|
||||
match unit.item {
|
||||
|
@ -19,8 +19,8 @@ pub use lite_parser::{lite_parse, LiteBlock, LiteElement};
|
||||
pub use parse_keywords::*;
|
||||
|
||||
pub use parser::{
|
||||
is_math_expression_like, parse, parse_block, parse_duration_bytes, parse_expression,
|
||||
parse_external_call, trim_quotes, trim_quotes_str, unescape_unquote_string,
|
||||
is_math_expression_like, parse, parse_block, parse_expression, parse_external_call,
|
||||
parse_unit_value, trim_quotes, trim_quotes_str, unescape_unquote_string, DURATION_UNIT_GROUPS,
|
||||
};
|
||||
|
||||
#[cfg(feature = "plugin")]
|
||||
|
@ -2199,21 +2199,160 @@ pub fn parse_duration(working_set: &mut StateWorkingSet, span: Span) -> Expressi
|
||||
|
||||
let bytes = working_set.get_span_contents(span);
|
||||
|
||||
match parse_duration_bytes(bytes, span) {
|
||||
Some(expression) => expression,
|
||||
match parse_unit_value(bytes, span, DURATION_UNIT_GROUPS, Type::Duration, |x| x) {
|
||||
Some(Ok(expr)) => expr,
|
||||
Some(Err(mk_err_for)) => {
|
||||
working_set.error(mk_err_for("duration"));
|
||||
garbage(span)
|
||||
}
|
||||
None => {
|
||||
working_set.error(ParseError::Expected(
|
||||
"duration with valid units".into(),
|
||||
span,
|
||||
));
|
||||
|
||||
garbage(span)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a unit type, eg '10kb'
|
||||
pub fn parse_filesize(working_set: &mut StateWorkingSet, span: Span) -> Expression {
|
||||
trace!("parsing: filesize");
|
||||
|
||||
let bytes = working_set.get_span_contents(span);
|
||||
|
||||
match parse_unit_value(bytes, span, FILESIZE_UNIT_GROUPS, Type::Filesize, |x| {
|
||||
x.to_uppercase()
|
||||
}) {
|
||||
Some(Ok(expr)) => expr,
|
||||
Some(Err(mk_err_for)) => {
|
||||
working_set.error(mk_err_for("filesize"));
|
||||
garbage(span)
|
||||
}
|
||||
None => {
|
||||
working_set.error(ParseError::Expected(
|
||||
"filesize with valid units".into(),
|
||||
span,
|
||||
));
|
||||
garbage(span)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type ParseUnitResult<'res> = Result<Expression, Box<dyn Fn(&'res str) -> ParseError>>;
|
||||
type UnitGroup<'unit> = (Unit, &'unit str, Option<(Unit, i64)>);
|
||||
|
||||
pub fn parse_unit_value<'res>(
|
||||
bytes: &[u8],
|
||||
span: Span,
|
||||
unit_groups: &[UnitGroup],
|
||||
ty: Type,
|
||||
transform: fn(String) -> String,
|
||||
) -> Option<ParseUnitResult<'res>> {
|
||||
if bytes.len() < 2
|
||||
|| !(bytes[0].is_ascii_digit() || (bytes[0] == b'-' && bytes[1].is_ascii_digit()))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let value = transform(String::from_utf8_lossy(bytes).into());
|
||||
|
||||
if let Some((unit, name, convert)) = unit_groups.iter().find(|x| value.ends_with(x.1)) {
|
||||
let lhs_len = value.len() - name.len();
|
||||
let lhs = strip_underscores(value[..lhs_len].as_bytes());
|
||||
let lhs_span = Span::new(span.start, span.start + lhs_len);
|
||||
let unit_span = Span::new(span.start + lhs_len, span.end);
|
||||
|
||||
let (decimal_part, number_part) = modf(match lhs.parse::<f64>() {
|
||||
Ok(it) => it,
|
||||
Err(_) => {
|
||||
let mk_err = move |name| {
|
||||
ParseError::LabeledError(
|
||||
format!("{name} value must be a number"),
|
||||
"not a number".into(),
|
||||
lhs_span,
|
||||
)
|
||||
};
|
||||
return Some(Err(Box::new(mk_err)));
|
||||
}
|
||||
});
|
||||
|
||||
let (num, unit) = match convert {
|
||||
Some(convert_to) => (
|
||||
((number_part * convert_to.1 as f64) + (decimal_part * convert_to.1 as f64)) as i64,
|
||||
convert_to.0,
|
||||
),
|
||||
None => (number_part as i64, *unit),
|
||||
};
|
||||
|
||||
trace!("-- found {} {:?}", num, unit);
|
||||
let expr = Expression {
|
||||
expr: Expr::ValueWithUnit(
|
||||
Box::new(Expression {
|
||||
expr: Expr::Int(num),
|
||||
span: lhs_span,
|
||||
ty: Type::Number,
|
||||
custom_completion: None,
|
||||
}),
|
||||
Spanned {
|
||||
item: unit,
|
||||
span: unit_span,
|
||||
},
|
||||
),
|
||||
span,
|
||||
ty,
|
||||
custom_completion: None,
|
||||
};
|
||||
|
||||
Some(Ok(expr))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub const FILESIZE_UNIT_GROUPS: &[UnitGroup] = &[
|
||||
(Unit::Kilobyte, "KB", Some((Unit::Byte, 1000))),
|
||||
(Unit::Megabyte, "MB", Some((Unit::Kilobyte, 1000))),
|
||||
(Unit::Gigabyte, "GB", Some((Unit::Megabyte, 1000))),
|
||||
(Unit::Terabyte, "TB", Some((Unit::Gigabyte, 1000))),
|
||||
(Unit::Petabyte, "PB", Some((Unit::Terabyte, 1000))),
|
||||
(Unit::Exabyte, "EB", Some((Unit::Petabyte, 1000))),
|
||||
(Unit::Zettabyte, "ZB", Some((Unit::Exabyte, 1000))),
|
||||
(Unit::Kibibyte, "KIB", Some((Unit::Byte, 1024))),
|
||||
(Unit::Mebibyte, "MIB", Some((Unit::Kibibyte, 1024))),
|
||||
(Unit::Gibibyte, "GIB", Some((Unit::Mebibyte, 1024))),
|
||||
(Unit::Tebibyte, "TIB", Some((Unit::Gibibyte, 1024))),
|
||||
(Unit::Pebibyte, "PIB", Some((Unit::Tebibyte, 1024))),
|
||||
(Unit::Exbibyte, "EIB", Some((Unit::Pebibyte, 1024))),
|
||||
(Unit::Zebibyte, "ZIB", Some((Unit::Exbibyte, 1024))),
|
||||
(Unit::Byte, "B", None),
|
||||
];
|
||||
|
||||
pub const DURATION_UNIT_GROUPS: &[UnitGroup] = &[
|
||||
(Unit::Nanosecond, "ns", None),
|
||||
(Unit::Microsecond, "us", Some((Unit::Nanosecond, 1000))),
|
||||
(
|
||||
// µ Micro Sign
|
||||
Unit::Microsecond,
|
||||
"\u{00B5}s",
|
||||
Some((Unit::Nanosecond, 1000)),
|
||||
),
|
||||
(
|
||||
// μ Greek small letter Mu
|
||||
Unit::Microsecond,
|
||||
"\u{03BC}s",
|
||||
Some((Unit::Nanosecond, 1000)),
|
||||
),
|
||||
(Unit::Millisecond, "ms", Some((Unit::Microsecond, 1000))),
|
||||
(Unit::Second, "sec", Some((Unit::Millisecond, 1000))),
|
||||
(Unit::Minute, "min", Some((Unit::Second, 60))),
|
||||
(Unit::Hour, "hr", Some((Unit::Minute, 60))),
|
||||
(Unit::Day, "day", Some((Unit::Minute, 1440))),
|
||||
(Unit::Week, "wk", Some((Unit::Day, 7))),
|
||||
];
|
||||
|
||||
// Borrowed from libm at https://github.com/rust-lang/libm/blob/master/src/math/modf.rs
|
||||
pub fn modf(x: f64) -> (f64, f64) {
|
||||
fn modf(x: f64) -> (f64, f64) {
|
||||
let rv2: f64;
|
||||
let mut u = x.to_bits();
|
||||
let e = ((u >> 52 & 0x7ff) as i32) - 0x3ff;
|
||||
@ -2247,187 +2386,6 @@ pub fn modf(x: f64) -> (f64, f64) {
|
||||
(x - rv2, rv2)
|
||||
}
|
||||
|
||||
pub fn parse_duration_bytes(num_with_unit_bytes: &[u8], span: Span) -> Option<Expression> {
|
||||
if num_with_unit_bytes.is_empty()
|
||||
|| (!num_with_unit_bytes[0].is_ascii_digit() && num_with_unit_bytes[0] != b'-')
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let num_with_unit = String::from_utf8_lossy(num_with_unit_bytes).to_string();
|
||||
let unit_groups = [
|
||||
(Unit::Nanosecond, "ns", None),
|
||||
(Unit::Microsecond, "us", Some((Unit::Nanosecond, 1000))),
|
||||
(
|
||||
// µ Micro Sign
|
||||
Unit::Microsecond,
|
||||
"\u{00B5}s",
|
||||
Some((Unit::Nanosecond, 1000)),
|
||||
),
|
||||
(
|
||||
// μ Greek small letter Mu
|
||||
Unit::Microsecond,
|
||||
"\u{03BC}s",
|
||||
Some((Unit::Nanosecond, 1000)),
|
||||
),
|
||||
(Unit::Millisecond, "ms", Some((Unit::Microsecond, 1000))),
|
||||
(Unit::Second, "sec", Some((Unit::Millisecond, 1000))),
|
||||
(Unit::Minute, "min", Some((Unit::Second, 60))),
|
||||
(Unit::Hour, "hr", Some((Unit::Minute, 60))),
|
||||
(Unit::Day, "day", Some((Unit::Minute, 1440))),
|
||||
(Unit::Week, "wk", Some((Unit::Day, 7))),
|
||||
];
|
||||
|
||||
if let Some(unit) = unit_groups.iter().find(|&x| num_with_unit.ends_with(x.1)) {
|
||||
let mut lhs = num_with_unit;
|
||||
for _ in 0..unit.1.chars().count() {
|
||||
lhs.pop();
|
||||
}
|
||||
|
||||
let (decimal_part, number_part) = modf(match lhs.parse::<f64>() {
|
||||
Ok(x) => x,
|
||||
Err(_) => return None,
|
||||
});
|
||||
|
||||
let (num, unit_to_use) = match unit.2 {
|
||||
Some(unit_to_convert_to) => (
|
||||
Some(
|
||||
((number_part * unit_to_convert_to.1 as f64)
|
||||
+ (decimal_part * unit_to_convert_to.1 as f64)) as i64,
|
||||
),
|
||||
unit_to_convert_to.0,
|
||||
),
|
||||
None => (Some(number_part as i64), unit.0),
|
||||
};
|
||||
|
||||
if let Some(x) = num {
|
||||
trace!("-- found {} {:?}", x, unit_to_use);
|
||||
|
||||
let lhs_span = Span::new(span.start, span.start + lhs.len());
|
||||
let unit_span = Span::new(span.start + lhs.len(), span.end);
|
||||
return Some(Expression {
|
||||
expr: Expr::ValueWithUnit(
|
||||
Box::new(Expression {
|
||||
expr: Expr::Int(x),
|
||||
span: lhs_span,
|
||||
ty: Type::Number,
|
||||
custom_completion: None,
|
||||
}),
|
||||
Spanned {
|
||||
item: unit_to_use,
|
||||
span: unit_span,
|
||||
},
|
||||
),
|
||||
span,
|
||||
ty: Type::Duration,
|
||||
custom_completion: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Parse a unit type, eg '10kb'
|
||||
pub fn parse_filesize(working_set: &mut StateWorkingSet, span: Span) -> Expression {
|
||||
trace!("parsing: filesize");
|
||||
|
||||
let bytes = working_set.get_span_contents(span);
|
||||
|
||||
//todo: parse_filesize_bytes should distinguish between not-that-type and syntax error in units
|
||||
match parse_filesize_bytes(bytes, span) {
|
||||
Some(expression) => expression,
|
||||
None => {
|
||||
working_set.error(ParseError::Expected(
|
||||
"filesize with valid units".into(),
|
||||
span,
|
||||
));
|
||||
|
||||
garbage(span)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_filesize_bytes(num_with_unit_bytes: &[u8], span: Span) -> Option<Expression> {
|
||||
if num_with_unit_bytes.is_empty()
|
||||
|| (!num_with_unit_bytes[0].is_ascii_digit() && num_with_unit_bytes[0] != b'-')
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let num_with_unit = String::from_utf8_lossy(num_with_unit_bytes).to_string();
|
||||
let uppercase_num_with_unit = num_with_unit.to_uppercase();
|
||||
let unit_groups = [
|
||||
(Unit::Kilobyte, "KB", Some((Unit::Byte, 1000))),
|
||||
(Unit::Megabyte, "MB", Some((Unit::Kilobyte, 1000))),
|
||||
(Unit::Gigabyte, "GB", Some((Unit::Megabyte, 1000))),
|
||||
(Unit::Terabyte, "TB", Some((Unit::Gigabyte, 1000))),
|
||||
(Unit::Petabyte, "PB", Some((Unit::Terabyte, 1000))),
|
||||
(Unit::Exabyte, "EB", Some((Unit::Petabyte, 1000))),
|
||||
(Unit::Zettabyte, "ZB", Some((Unit::Exabyte, 1000))),
|
||||
(Unit::Kibibyte, "KIB", Some((Unit::Byte, 1024))),
|
||||
(Unit::Mebibyte, "MIB", Some((Unit::Kibibyte, 1024))),
|
||||
(Unit::Gibibyte, "GIB", Some((Unit::Mebibyte, 1024))),
|
||||
(Unit::Tebibyte, "TIB", Some((Unit::Gibibyte, 1024))),
|
||||
(Unit::Pebibyte, "PIB", Some((Unit::Tebibyte, 1024))),
|
||||
(Unit::Exbibyte, "EIB", Some((Unit::Pebibyte, 1024))),
|
||||
(Unit::Zebibyte, "ZIB", Some((Unit::Exbibyte, 1024))),
|
||||
(Unit::Byte, "B", None),
|
||||
];
|
||||
|
||||
if let Some(unit) = unit_groups
|
||||
.iter()
|
||||
.find(|&x| uppercase_num_with_unit.ends_with(x.1))
|
||||
{
|
||||
let mut lhs = num_with_unit;
|
||||
for _ in 0..unit.1.len() {
|
||||
lhs.pop();
|
||||
}
|
||||
|
||||
let (decimal_part, number_part) = modf(match lhs.parse::<f64>() {
|
||||
Ok(x) => x,
|
||||
Err(_) => return None,
|
||||
});
|
||||
|
||||
let (num, unit_to_use) = match unit.2 {
|
||||
Some(unit_to_convert_to) => (
|
||||
Some(
|
||||
((number_part * unit_to_convert_to.1 as f64)
|
||||
+ (decimal_part * unit_to_convert_to.1 as f64)) as i64,
|
||||
),
|
||||
unit_to_convert_to.0,
|
||||
),
|
||||
None => (Some(number_part as i64), unit.0),
|
||||
};
|
||||
|
||||
if let Some(x) = num {
|
||||
trace!("-- found {} {:?}", x, unit_to_use);
|
||||
|
||||
let lhs_span = Span::new(span.start, span.start + lhs.len());
|
||||
let unit_span = Span::new(span.start + lhs.len(), span.end);
|
||||
return Some(Expression {
|
||||
expr: Expr::ValueWithUnit(
|
||||
Box::new(Expression {
|
||||
expr: Expr::Int(x),
|
||||
span: lhs_span,
|
||||
ty: Type::Number,
|
||||
custom_completion: None,
|
||||
}),
|
||||
Spanned {
|
||||
item: unit_to_use,
|
||||
span: unit_span,
|
||||
},
|
||||
),
|
||||
span,
|
||||
ty: Type::Filesize,
|
||||
custom_completion: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn parse_glob_pattern(working_set: &mut StateWorkingSet, span: Span) -> Expression {
|
||||
let bytes = working_set.get_span_contents(span);
|
||||
let (token, err) = unescape_unquote_string(bytes, span);
|
||||
|
@ -526,3 +526,38 @@ fn extern_errors_with_no_space_between_params_and_name_1() -> TestResult {
|
||||
fn extern_errors_with_no_space_between_params_and_name_2() -> TestResult {
|
||||
fail_test("extern cmd(--flag)", "expected space")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duration_with_underscores_1() -> TestResult {
|
||||
run_test("420_min", "7hr")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duration_with_underscores_2() -> TestResult {
|
||||
run_test("1_000_000sec", "1wk 4day 13hr 46min 40sec")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duration_with_underscores_3() -> TestResult {
|
||||
fail_test("1_000_d_ay", "executable was not found")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duration_with_faulty_number() -> TestResult {
|
||||
fail_test("sleep 4-ms", "duration value must be a number")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filesize_with_underscores_1() -> TestResult {
|
||||
run_test("420_mb", "400.5 MiB")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filesize_with_underscores_2() -> TestResult {
|
||||
run_test("1_000_000B", "976.6 KiB")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filesize_with_underscores_3() -> TestResult {
|
||||
fail_test("42m_b", "executable was not found")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user