Improve precision in parsing of filesize values (#15950)

- improves rounding error reported in #15851
- my ~~discussion~~ monologue on how filesizes are parsed currently:
#15944

# Description

The issue linked above reported rounding errors when converting MiB to
GiB, which is mainly caused by parsing of the literal.

Nushell tries to convert all filesize values to bytes, but currently
does so in 2 steps:
- first converting it to the next smaller unit in `nu-parser` (so `MiB`
to `KiB`, in this case), and truncating to an `i64` here
- then converting that to bytes in `nu-engine`, again truncating to
`i64`

In the specific example above (`95307.27MiB`), this causes 419 bytes of
rounding error. By instead directly converting to bytes while parsing,
the value is accurate (truncating those 0.52 bytes, or 4.12 bits).
Rounding error in the conversion to GiB is also multiple magnitudes
lower.

(Note that I haven't thoroughly tested this, so I can't say with
confidence that all values would be parsed accurate to the byte.)

# User-Facing Changes

More accurate filesize values, and lower accumulated rounding error in
calculations.

# Tests + Formatting

new test: `parse_filesize` in `nu-parser` - verifies that `95307.27MiB`
is parsed correctly as `99_936_915_947B`

# After Submitting
This commit is contained in:
liquid-dragons 2025-06-12 14:58:21 +02:00 committed by GitHub
parent ba59f71f20
commit 22d1fdcdf6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 61 additions and 7 deletions

View File

@ -2801,13 +2801,24 @@ pub fn parse_unit_value<'res>(
None => number_part, None => number_part,
}; };
// Convert all durations to nanoseconds to not lose precision // Convert all durations to nanoseconds, and filesizes to bytes,
let num = match unit_to_ns_factor(&unit) { // to minimize loss of precision
let factor = match ty {
Type::Filesize => unit_to_byte_factor(&unit),
Type::Duration => unit_to_ns_factor(&unit),
_ => None,
};
let num = match factor {
Some(factor) => { Some(factor) => {
let num_ns = num_float * factor; let num_base = num_float * factor;
if i64::MIN as f64 <= num_ns && num_ns <= i64::MAX as f64 { if i64::MIN as f64 <= num_base && num_base <= i64::MAX as f64 {
unit = Unit::Nanosecond; unit = if ty == Type::Filesize {
num_ns as i64 Unit::Filesize(FilesizeUnit::B)
} else {
Unit::Nanosecond
};
num_base as i64
} else { } else {
// not safe to convert, because of the overflow // not safe to convert, because of the overflow
num_float as i64 num_float as i64
@ -2934,6 +2945,27 @@ fn unit_to_ns_factor(unit: &Unit) -> Option<f64> {
} }
} }
fn unit_to_byte_factor(unit: &Unit) -> Option<f64> {
match unit {
Unit::Filesize(FilesizeUnit::B) => Some(1.0),
Unit::Filesize(FilesizeUnit::KB) => Some(1_000.0),
Unit::Filesize(FilesizeUnit::MB) => Some(1_000_000.0),
Unit::Filesize(FilesizeUnit::GB) => Some(1_000_000_000.0),
Unit::Filesize(FilesizeUnit::TB) => Some(1_000_000_000_000.0),
Unit::Filesize(FilesizeUnit::PB) => Some(1_000_000_000_000_000.0),
Unit::Filesize(FilesizeUnit::EB) => Some(1_000_000_000_000_000_000.0),
Unit::Filesize(FilesizeUnit::KiB) => Some(1024.0),
Unit::Filesize(FilesizeUnit::MiB) => Some(1024.0 * 1024.0),
Unit::Filesize(FilesizeUnit::GiB) => Some(1024.0 * 1024.0 * 1024.0),
Unit::Filesize(FilesizeUnit::TiB) => Some(1024.0 * 1024.0 * 1024.0 * 1024.0),
Unit::Filesize(FilesizeUnit::PiB) => Some(1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0),
Unit::Filesize(FilesizeUnit::EiB) => {
Some(1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0)
}
_ => None,
}
}
// Borrowed from libm at https://github.com/rust-lang/libm/blob/master/src/math/modf.rs // Borrowed from libm at https://github.com/rust-lang/libm/blob/master/src/math/modf.rs
fn modf(x: f64) -> (f64, f64) { fn modf(x: f64) -> (f64, f64) {
let rv2: f64; let rv2: f64;

View File

@ -1,6 +1,6 @@
use nu_parser::*; use nu_parser::*;
use nu_protocol::{ use nu_protocol::{
DeclId, ParseError, Signature, Span, SyntaxShape, Type, DeclId, FilesizeUnit, ParseError, Signature, Span, SyntaxShape, Type, Unit,
ast::{Argument, Expr, Expression, ExternalArgument, PathMember, Range}, ast::{Argument, Expr, Expression, ExternalArgument, PathMember, Range},
engine::{Command, EngineState, Stack, StateWorkingSet}, engine::{Command, EngineState, Stack, StateWorkingSet},
}; };
@ -270,6 +270,28 @@ pub fn parse_int_with_underscores() {
assert_eq!(element.expr.expr, Expr::Int(420692023)); assert_eq!(element.expr.expr, Expr::Int(420692023));
} }
#[test]
pub fn parse_filesize() {
let engine_state = EngineState::new();
let mut working_set = StateWorkingSet::new(&engine_state);
let block = parse(&mut working_set, None, b"95307.27MiB", true);
assert!(working_set.parse_errors.is_empty());
assert_eq!(block.len(), 1);
let pipeline = &block.pipelines[0];
assert_eq!(pipeline.len(), 1);
let element = &pipeline.elements[0];
assert!(element.redirection.is_none());
let Expr::ValueWithUnit(value) = &element.expr.expr else {
panic!("should be a ValueWithUnit");
};
assert_eq!(value.expr.expr, Expr::Int(99_936_915_947));
assert_eq!(value.unit.item, Unit::Filesize(FilesizeUnit::B));
}
#[test] #[test]
pub fn parse_cell_path() { pub fn parse_cell_path() {
let engine_state = EngineState::new(); let engine_state = EngineState::new();