perf: reorder cell-path member accesses to avoid clones (#15682)

Co-authored-by: Bahex <17417311+Bahex@users.noreply.github.com>
Co-authored-by: Piepmatz <git+github@cptpiepmatz.de>
This commit is contained in:
Bahex
2025-07-02 13:55:46 +03:00
committed by GitHub
parent 105ec0c89f
commit c95c1e845c
5 changed files with 84 additions and 2 deletions

View File

@ -6,6 +6,7 @@
use crate::*;
mod example;
mod reorder_cell_paths;
/// Marker trait for defining experimental options.
///
@ -40,6 +41,7 @@ pub(crate) trait ExperimentalOptionMarker {
// Export only the static values.
// The marker structs are not relevant and needlessly clutter the generated docs.
pub use example::EXAMPLE;
pub use reorder_cell_paths::REORDER_CELL_PATHS;
// Include all experimental option statics in here.
// This will test them and add them to the parsing list.
@ -48,7 +50,7 @@ pub use example::EXAMPLE;
///
/// Use this to show users every experimental option, including their descriptions,
/// identifiers, and current state.
pub static ALL: &[&ExperimentalOption] = &[&EXAMPLE];
pub static ALL: &[&ExperimentalOption] = &[&EXAMPLE, &REORDER_CELL_PATHS];
#[cfg(test)]
mod tests {

View File

@ -0,0 +1,30 @@
use crate::*;
/// Reorder cell-path members in `Value::follow_cell_path` to decrease memory usage.
///
/// - Accessing a field in a record is cheap, just a simple search and you get a reference to the
/// value.
/// - Accessing an row in a list is even cheaper, just an array access and you get a reference to
/// the value.
/// - Accessing a **column** in a table is expensive, it requires accessing the relevant field in
/// all rows, and creating a new list to store those value. This uses more computation and more
/// memory than the previous two operations.
///
/// Thus, accessing a column first, and then immediately accessing a row of that column is very
/// wasteful. By simply prioritizing row accesses over column accesses whenever possible we can
/// significantly reduce time and memory use.
pub static REORDER_CELL_PATHS: ExperimentalOption = ExperimentalOption::new(&ReorderCellPaths);
// No documentation needed here since this type isn't public.
// The static above provides all necessary details.
struct ReorderCellPaths;
impl ExperimentalOptionMarker for ReorderCellPaths {
const IDENTIFIER: &'static str = "reorder-cell-paths";
const DESCRIPTION: &'static str = "\
Reorder the steps in accessing nested value to decrease memory usage.
\
Reorder the parts of cell-path when accessing a cell in a table, always select the row before selecting the column.";
const STATUS: Status = Status::OptIn;
}

View File

@ -21,6 +21,7 @@ nu-glob = { path = "../nu-glob", version = "0.105.2" }
nu-path = { path = "../nu-path", version = "0.105.2" }
nu-system = { path = "../nu-system", version = "0.105.2" }
nu-utils = { path = "../nu-utils", version = "0.105.2", default-features = false }
nu-experimental = { path = "../nu-experimental", version = "0.105.2" }
brotli = { workspace = true, optional = true }
bytes = { workspace = true }

View File

@ -1111,7 +1111,55 @@ impl Value {
let mut store: Value = Value::test_nothing();
let mut current: MultiLife<'out, '_, Value> = MultiLife::Out(self);
for member in cell_path {
let reorder_cell_paths = nu_experimental::REORDER_CELL_PATHS.get();
let mut members: Vec<_> = if reorder_cell_paths {
cell_path.iter().map(Some).collect()
} else {
Vec::new()
};
let mut members = members.as_mut_slice();
let mut cell_path = cell_path;
loop {
let member = if reorder_cell_paths {
// Skip any None values at the start.
while let Some(None) = members.first() {
members = &mut members[1..];
}
if members.is_empty() {
break;
}
// Reorder cell-path member access by prioritizing Int members to avoid cloning unless
// necessary
let member = if let Value::List { .. } = &*current {
// If the value is a list, try to find an Int member
members
.iter_mut()
.find(|x| matches!(x, Some(PathMember::Int { .. })))
// And take it from the list of members
.and_then(Option::take)
} else {
None
};
let Some(member) = member.or_else(|| members.first_mut().and_then(Option::take))
else {
break;
};
member
} else {
match cell_path {
[first, rest @ ..] => {
cell_path = rest;
first
}
_ => break,
}
};
current = match current {
MultiLife::Out(current) => match get_value_member(current, member)? {
ControlFlow::Break(span) => return Ok(Cow::Owned(Value::nothing(span))),