mirror of
https://github.com/nushell/nushell.git
synced 2025-06-20 09:58:15 +02:00
Improve precision in parsing of filesize values (#15950)
- improves rounding error reported in #15851 - my ~~discussion~~ monologue on how filesizes are parsed currently: #15944 # Description The issue linked above reported rounding errors when converting MiB to GiB, which is mainly caused by parsing of the literal. Nushell tries to convert all filesize values to bytes, but currently does so in 2 steps: - first converting it to the next smaller unit in `nu-parser` (so `MiB` to `KiB`, in this case), and truncating to an `i64` here - then converting that to bytes in `nu-engine`, again truncating to `i64` In the specific example above (`95307.27MiB`), this causes 419 bytes of rounding error. By instead directly converting to bytes while parsing, the value is accurate (truncating those 0.52 bytes, or 4.12 bits). Rounding error in the conversion to GiB is also multiple magnitudes lower. (Note that I haven't thoroughly tested this, so I can't say with confidence that all values would be parsed accurate to the byte.) # User-Facing Changes More accurate filesize values, and lower accumulated rounding error in calculations. # Tests + Formatting new test: `parse_filesize` in `nu-parser` - verifies that `95307.27MiB` is parsed correctly as `99_936_915_947B` # After Submitting
This commit is contained in:
parent
ba59f71f20
commit
22d1fdcdf6
@ -2801,13 +2801,24 @@ pub fn parse_unit_value<'res>(
|
||||
None => number_part,
|
||||
};
|
||||
|
||||
// Convert all durations to nanoseconds to not lose precision
|
||||
let num = match unit_to_ns_factor(&unit) {
|
||||
// Convert all durations to nanoseconds, and filesizes to bytes,
|
||||
// to minimize loss of precision
|
||||
let factor = match ty {
|
||||
Type::Filesize => unit_to_byte_factor(&unit),
|
||||
Type::Duration => unit_to_ns_factor(&unit),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let num = match factor {
|
||||
Some(factor) => {
|
||||
let num_ns = num_float * factor;
|
||||
if i64::MIN as f64 <= num_ns && num_ns <= i64::MAX as f64 {
|
||||
unit = Unit::Nanosecond;
|
||||
num_ns as i64
|
||||
let num_base = num_float * factor;
|
||||
if i64::MIN as f64 <= num_base && num_base <= i64::MAX as f64 {
|
||||
unit = if ty == Type::Filesize {
|
||||
Unit::Filesize(FilesizeUnit::B)
|
||||
} else {
|
||||
Unit::Nanosecond
|
||||
};
|
||||
num_base as i64
|
||||
} else {
|
||||
// not safe to convert, because of the overflow
|
||||
num_float as i64
|
||||
@ -2934,6 +2945,27 @@ fn unit_to_ns_factor(unit: &Unit) -> Option<f64> {
|
||||
}
|
||||
}
|
||||
|
||||
fn unit_to_byte_factor(unit: &Unit) -> Option<f64> {
|
||||
match unit {
|
||||
Unit::Filesize(FilesizeUnit::B) => Some(1.0),
|
||||
Unit::Filesize(FilesizeUnit::KB) => Some(1_000.0),
|
||||
Unit::Filesize(FilesizeUnit::MB) => Some(1_000_000.0),
|
||||
Unit::Filesize(FilesizeUnit::GB) => Some(1_000_000_000.0),
|
||||
Unit::Filesize(FilesizeUnit::TB) => Some(1_000_000_000_000.0),
|
||||
Unit::Filesize(FilesizeUnit::PB) => Some(1_000_000_000_000_000.0),
|
||||
Unit::Filesize(FilesizeUnit::EB) => Some(1_000_000_000_000_000_000.0),
|
||||
Unit::Filesize(FilesizeUnit::KiB) => Some(1024.0),
|
||||
Unit::Filesize(FilesizeUnit::MiB) => Some(1024.0 * 1024.0),
|
||||
Unit::Filesize(FilesizeUnit::GiB) => Some(1024.0 * 1024.0 * 1024.0),
|
||||
Unit::Filesize(FilesizeUnit::TiB) => Some(1024.0 * 1024.0 * 1024.0 * 1024.0),
|
||||
Unit::Filesize(FilesizeUnit::PiB) => Some(1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0),
|
||||
Unit::Filesize(FilesizeUnit::EiB) => {
|
||||
Some(1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// Borrowed from libm at https://github.com/rust-lang/libm/blob/master/src/math/modf.rs
|
||||
fn modf(x: f64) -> (f64, f64) {
|
||||
let rv2: f64;
|
||||
|
@ -1,6 +1,6 @@
|
||||
use nu_parser::*;
|
||||
use nu_protocol::{
|
||||
DeclId, ParseError, Signature, Span, SyntaxShape, Type,
|
||||
DeclId, FilesizeUnit, ParseError, Signature, Span, SyntaxShape, Type, Unit,
|
||||
ast::{Argument, Expr, Expression, ExternalArgument, PathMember, Range},
|
||||
engine::{Command, EngineState, Stack, StateWorkingSet},
|
||||
};
|
||||
@ -270,6 +270,28 @@ pub fn parse_int_with_underscores() {
|
||||
assert_eq!(element.expr.expr, Expr::Int(420692023));
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn parse_filesize() {
|
||||
let engine_state = EngineState::new();
|
||||
let mut working_set = StateWorkingSet::new(&engine_state);
|
||||
|
||||
let block = parse(&mut working_set, None, b"95307.27MiB", true);
|
||||
|
||||
assert!(working_set.parse_errors.is_empty());
|
||||
assert_eq!(block.len(), 1);
|
||||
let pipeline = &block.pipelines[0];
|
||||
assert_eq!(pipeline.len(), 1);
|
||||
let element = &pipeline.elements[0];
|
||||
assert!(element.redirection.is_none());
|
||||
|
||||
let Expr::ValueWithUnit(value) = &element.expr.expr else {
|
||||
panic!("should be a ValueWithUnit");
|
||||
};
|
||||
|
||||
assert_eq!(value.expr.expr, Expr::Int(99_936_915_947));
|
||||
assert_eq!(value.unit.item, Unit::Filesize(FilesizeUnit::B));
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn parse_cell_path() {
|
||||
let engine_state = EngineState::new();
|
||||
|
Loading…
x
Reference in New Issue
Block a user