From 22d1fdcdf62a99736766274f40459f3b08706d8b Mon Sep 17 00:00:00 2001 From: liquid-dragons <93494392+liquid-dragons@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:58:21 +0200 Subject: [PATCH] Improve precision in parsing of filesize values (#15950) - improves rounding error reported in #15851 - my ~~discussion~~ monologue on how filesizes are parsed currently: #15944 # Description The issue linked above reported rounding errors when converting MiB to GiB, which is mainly caused by parsing of the literal. Nushell tries to convert all filesize values to bytes, but currently does so in 2 steps: - first converting it to the next smaller unit in `nu-parser` (so `MiB` to `KiB`, in this case), and truncating to an `i64` here - then converting that to bytes in `nu-engine`, again truncating to `i64` In the specific example above (`95307.27MiB`), this causes 419 bytes of rounding error. By instead directly converting to bytes while parsing, the value is accurate (truncating those 0.52 bytes, or 4.12 bits). Rounding error in the conversion to GiB is also multiple magnitudes lower. (Note that I haven't thoroughly tested this, so I can't say with confidence that all values would be parsed accurate to the byte.) # User-Facing Changes More accurate filesize values, and lower accumulated rounding error in calculations. # Tests + Formatting new test: `parse_filesize` in `nu-parser` - verifies that `95307.27MiB` is parsed correctly as `99_936_915_947B` # After Submitting --- crates/nu-parser/src/parser.rs | 44 +++++++++++++++++++++++---- crates/nu-parser/tests/test_parser.rs | 24 ++++++++++++++- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index e01c9dd10d..d78834c119 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -2801,13 +2801,24 @@ pub fn parse_unit_value<'res>( None => number_part, }; - // Convert all durations to nanoseconds to not lose precision - let num = match unit_to_ns_factor(&unit) { + // Convert all durations to nanoseconds, and filesizes to bytes, + // to minimize loss of precision + let factor = match ty { + Type::Filesize => unit_to_byte_factor(&unit), + Type::Duration => unit_to_ns_factor(&unit), + _ => None, + }; + + let num = match factor { Some(factor) => { - let num_ns = num_float * factor; - if i64::MIN as f64 <= num_ns && num_ns <= i64::MAX as f64 { - unit = Unit::Nanosecond; - num_ns as i64 + let num_base = num_float * factor; + if i64::MIN as f64 <= num_base && num_base <= i64::MAX as f64 { + unit = if ty == Type::Filesize { + Unit::Filesize(FilesizeUnit::B) + } else { + Unit::Nanosecond + }; + num_base as i64 } else { // not safe to convert, because of the overflow num_float as i64 @@ -2934,6 +2945,27 @@ fn unit_to_ns_factor(unit: &Unit) -> Option { } } +fn unit_to_byte_factor(unit: &Unit) -> Option { + match unit { + Unit::Filesize(FilesizeUnit::B) => Some(1.0), + Unit::Filesize(FilesizeUnit::KB) => Some(1_000.0), + Unit::Filesize(FilesizeUnit::MB) => Some(1_000_000.0), + Unit::Filesize(FilesizeUnit::GB) => Some(1_000_000_000.0), + Unit::Filesize(FilesizeUnit::TB) => Some(1_000_000_000_000.0), + Unit::Filesize(FilesizeUnit::PB) => Some(1_000_000_000_000_000.0), + Unit::Filesize(FilesizeUnit::EB) => Some(1_000_000_000_000_000_000.0), + Unit::Filesize(FilesizeUnit::KiB) => Some(1024.0), + Unit::Filesize(FilesizeUnit::MiB) => Some(1024.0 * 1024.0), + Unit::Filesize(FilesizeUnit::GiB) => Some(1024.0 * 1024.0 * 1024.0), + Unit::Filesize(FilesizeUnit::TiB) => Some(1024.0 * 1024.0 * 1024.0 * 1024.0), + Unit::Filesize(FilesizeUnit::PiB) => Some(1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0), + Unit::Filesize(FilesizeUnit::EiB) => { + Some(1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0) + } + _ => None, + } +} + // Borrowed from libm at https://github.com/rust-lang/libm/blob/master/src/math/modf.rs fn modf(x: f64) -> (f64, f64) { let rv2: f64; diff --git a/crates/nu-parser/tests/test_parser.rs b/crates/nu-parser/tests/test_parser.rs index 8b4fd5e95b..0678a2ca13 100644 --- a/crates/nu-parser/tests/test_parser.rs +++ b/crates/nu-parser/tests/test_parser.rs @@ -1,6 +1,6 @@ use nu_parser::*; use nu_protocol::{ - DeclId, ParseError, Signature, Span, SyntaxShape, Type, + DeclId, FilesizeUnit, ParseError, Signature, Span, SyntaxShape, Type, Unit, ast::{Argument, Expr, Expression, ExternalArgument, PathMember, Range}, engine::{Command, EngineState, Stack, StateWorkingSet}, }; @@ -270,6 +270,28 @@ pub fn parse_int_with_underscores() { assert_eq!(element.expr.expr, Expr::Int(420692023)); } +#[test] +pub fn parse_filesize() { + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let block = parse(&mut working_set, None, b"95307.27MiB", true); + + assert!(working_set.parse_errors.is_empty()); + assert_eq!(block.len(), 1); + let pipeline = &block.pipelines[0]; + assert_eq!(pipeline.len(), 1); + let element = &pipeline.elements[0]; + assert!(element.redirection.is_none()); + + let Expr::ValueWithUnit(value) = &element.expr.expr else { + panic!("should be a ValueWithUnit"); + }; + + assert_eq!(value.expr.expr, Expr::Int(99_936_915_947)); + assert_eq!(value.unit.item, Unit::Filesize(FilesizeUnit::B)); +} + #[test] pub fn parse_cell_path() { let engine_state = EngineState::new();