From 0cf7de598b2cbeba8fbe955baa0557de9d9df121 Mon Sep 17 00:00:00 2001 From: Stefan Holderbach Date: Sun, 31 Mar 2024 00:47:17 +0100 Subject: [PATCH] Refactor `Record` to use a single backing `Vec` (#12326) # Description This shrinks `Record`'s size in half and and allows you to include it in `Value` without growing the size. Changing the `Record` internals may have slightly different performance characteristics as the cache locality changes on lookups (if you directly need the value, it should be closer, but in other cases may blow up the cache line budget) Also different perf characteristics on creation expected. `Record::from_raw_cols_vals` now probably worse. ## Benchmarking Comparison with the main branch (boxed Record) revealed no significant change to the creation but an improvement when accessing larger N. The fact that this was more pronounced for nested access (still cloning before nushell/nushell#12325) leads to the conclusion that this may still be dominated by the smaller clone necessary for a 24-byte `Record` over the previous 48 bytes. # User-Facing Changes Reduced memory usage --- crates/nu-protocol/src/value/record.rs | 131 ++++++++++--------------- 1 file changed, 50 insertions(+), 81 deletions(-) diff --git a/crates/nu-protocol/src/value/record.rs b/crates/nu-protocol/src/value/record.rs index d1dc28973c..9a3ad31902 100644 --- a/crates/nu-protocol/src/value/record.rs +++ b/crates/nu-protocol/src/value/record.rs @@ -6,8 +6,7 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct Record { - cols: Vec, - vals: Vec, + inner: Vec<(String, Value)>, } impl Record { @@ -17,8 +16,7 @@ impl Record { pub fn with_capacity(capacity: usize) -> Self { Self { - cols: Vec::with_capacity(capacity), - vals: Vec::with_capacity(capacity), + inner: Vec::with_capacity(capacity), } } @@ -35,7 +33,8 @@ impl Record { creation_site_span: Span, ) -> Result { if cols.len() == vals.len() { - Ok(Self { cols, vals }) + let inner = cols.into_iter().zip(vals).collect(); + Ok(Self { inner }) } else { Err(ShellError::RecordColsValsMismatch { bad_value: input_span, @@ -53,13 +52,11 @@ impl Record { } pub fn is_empty(&self) -> bool { - debug_assert_eq!(self.cols.len(), self.vals.len()); - self.cols.is_empty() + self.inner.is_empty() } pub fn len(&self) -> usize { - debug_assert_eq!(self.cols.len(), self.vals.len()); - self.cols.len() + self.inner.len() } /// Naive push to the end of the datastructure. @@ -68,8 +65,7 @@ impl Record { /// /// Consider to use [`Record::insert`] instead pub fn push(&mut self, col: impl Into, val: Value) { - self.cols.push(col.into()); - self.vals.push(val); + self.inner.push((col.into(), val)); } /// Insert into the record, replacing preexisting value if found. @@ -79,9 +75,7 @@ impl Record { where K: AsRef + Into, { - if let Some(idx) = self.index_of(&col) { - // Can panic if vals.len() < cols.len() - let curr_val = &mut self.vals[idx]; + if let Some(curr_val) = self.get_mut(&col) { Some(std::mem::replace(curr_val, val)) } else { self.push(col, val); @@ -98,15 +92,19 @@ impl Record { } pub fn get(&self, col: impl AsRef) -> Option<&Value> { - self.index_of(col).and_then(|idx| self.vals.get(idx)) + self.inner + .iter() + .find_map(|(k, v)| if k == col.as_ref() { Some(v) } else { None }) } pub fn get_mut(&mut self, col: impl AsRef) -> Option<&mut Value> { - self.index_of(col).and_then(|idx| self.vals.get_mut(idx)) + self.inner + .iter_mut() + .find_map(|(k, v)| if k == col.as_ref() { Some(v) } else { None }) } pub fn get_index(&self, idx: usize) -> Option<(&String, &Value)> { - Some((self.cols.get(idx)?, self.vals.get(idx)?)) + self.inner.get(idx).map(|(col, val): &(_, _)| (col, val)) } /// Remove single value by key @@ -116,8 +114,8 @@ impl Record { /// Note: makes strong assumption that keys are unique pub fn remove(&mut self, col: impl AsRef) -> Option { let idx = self.index_of(col)?; - self.cols.remove(idx); - Some(self.vals.remove(idx)) + let (_, val) = self.inner.remove(idx); + Some(val) } /// Remove elements in-place that do not satisfy `keep` @@ -185,33 +183,7 @@ impl Record { where F: FnMut(&str, &mut Value) -> bool, { - // `Vec::retain` is able to optimize memcopies internally. - // For maximum benefit, `retain` is used on `vals`, - // as `Value` is a larger struct than `String`. - // - // To do a simultaneous retain on the `cols`, three portions of it are tracked: - // [..retained, ..dropped, ..unvisited] - - // number of elements keep so far, start of ..dropped and length of ..retained - let mut retained = 0; - // current index of element being checked, start of ..unvisited - let mut idx = 0; - - self.vals.retain_mut(|val| { - if keep(&self.cols[idx], val) { - // skip swaps for first consecutive run of kept elements - if idx != retained { - self.cols.swap(idx, retained); - } - retained += 1; - idx += 1; - true - } else { - idx += 1; - false - } - }); - self.cols.truncate(retained); + self.inner.retain_mut(|(col, val)| keep(col, val)); } /// Truncate record to the first `len` elements. @@ -235,25 +207,24 @@ impl Record { /// assert_eq!(rec.len(), 0); /// ``` pub fn truncate(&mut self, len: usize) { - self.cols.truncate(len); - self.vals.truncate(len); + self.inner.truncate(len); } pub fn columns(&self) -> Columns { Columns { - iter: self.cols.iter(), + iter: self.inner.iter(), } } pub fn values(&self) -> Values { Values { - iter: self.vals.iter(), + iter: self.inner.iter(), } } pub fn into_values(self) -> IntoValues { IntoValues { - iter: self.vals.into_iter(), + iter: self.inner.into_iter(), } } @@ -282,19 +253,18 @@ impl Record { where R: RangeBounds + Clone, { - debug_assert_eq!(self.cols.len(), self.vals.len()); Drain { - keys: self.cols.drain(range.clone()), - values: self.vals.drain(range), + iter: self.inner.drain(range), } } } impl FromIterator<(String, Value)> for Record { fn from_iter>(iter: T) -> Self { - let (cols, vals) = iter.into_iter().unzip(); // TODO: should this check for duplicate keys/columns? - Self { cols, vals } + Self { + inner: iter.into_iter().collect(), + } } } @@ -308,7 +278,7 @@ impl Extend<(String, Value)> for Record { } pub struct IntoIter { - iter: std::iter::Zip, std::vec::IntoIter>, + iter: std::vec::IntoIter<(String, Value)>, } impl Iterator for IntoIter { @@ -338,26 +308,26 @@ impl IntoIterator for Record { fn into_iter(self) -> Self::IntoIter { IntoIter { - iter: self.cols.into_iter().zip(self.vals), + iter: self.inner.into_iter(), } } } pub struct Iter<'a> { - iter: std::iter::Zip, std::slice::Iter<'a, Value>>, + iter: std::slice::Iter<'a, (String, Value)>, } impl<'a> Iterator for Iter<'a> { type Item = (&'a String, &'a Value); fn next(&mut self) -> Option { - self.iter.next() + self.iter.next().map(|(col, val): &(_, _)| (col, val)) } } impl<'a> DoubleEndedIterator for Iter<'a> { fn next_back(&mut self) -> Option { - self.iter.next_back() + self.iter.next_back().map(|(col, val): &(_, _)| (col, val)) } } @@ -374,26 +344,26 @@ impl<'a> IntoIterator for &'a Record { fn into_iter(self) -> Self::IntoIter { Iter { - iter: self.cols.iter().zip(&self.vals), + iter: self.inner.iter(), } } } pub struct IterMut<'a> { - iter: std::iter::Zip, std::slice::IterMut<'a, Value>>, + iter: std::slice::IterMut<'a, (String, Value)>, } impl<'a> Iterator for IterMut<'a> { type Item = (&'a String, &'a mut Value); fn next(&mut self) -> Option { - self.iter.next() + self.iter.next().map(|(col, val)| (&*col, val)) } } impl<'a> DoubleEndedIterator for IterMut<'a> { fn next_back(&mut self) -> Option { - self.iter.next_back() + self.iter.next_back().map(|(col, val)| (&*col, val)) } } @@ -410,20 +380,20 @@ impl<'a> IntoIterator for &'a mut Record { fn into_iter(self) -> Self::IntoIter { IterMut { - iter: self.cols.iter().zip(&mut self.vals), + iter: self.inner.iter_mut(), } } } pub struct Columns<'a> { - iter: std::slice::Iter<'a, String>, + iter: std::slice::Iter<'a, (String, Value)>, } impl<'a> Iterator for Columns<'a> { type Item = &'a String; fn next(&mut self) -> Option { - self.iter.next() + self.iter.next().map(|(col, _)| col) } fn size_hint(&self) -> (usize, Option) { @@ -433,7 +403,7 @@ impl<'a> Iterator for Columns<'a> { impl<'a> DoubleEndedIterator for Columns<'a> { fn next_back(&mut self) -> Option { - self.iter.next_back() + self.iter.next_back().map(|(col, _)| col) } } @@ -444,14 +414,14 @@ impl<'a> ExactSizeIterator for Columns<'a> { } pub struct Values<'a> { - iter: std::slice::Iter<'a, Value>, + iter: std::slice::Iter<'a, (String, Value)>, } impl<'a> Iterator for Values<'a> { type Item = &'a Value; fn next(&mut self) -> Option { - self.iter.next() + self.iter.next().map(|(_, val)| val) } fn size_hint(&self) -> (usize, Option) { @@ -461,7 +431,7 @@ impl<'a> Iterator for Values<'a> { impl<'a> DoubleEndedIterator for Values<'a> { fn next_back(&mut self) -> Option { - self.iter.next_back() + self.iter.next_back().map(|(_, val)| val) } } @@ -472,14 +442,14 @@ impl<'a> ExactSizeIterator for Values<'a> { } pub struct IntoValues { - iter: std::vec::IntoIter, + iter: std::vec::IntoIter<(String, Value)>, } impl Iterator for IntoValues { type Item = Value; fn next(&mut self) -> Option { - self.iter.next() + self.iter.next().map(|(_, val)| val) } fn size_hint(&self) -> (usize, Option) { @@ -489,7 +459,7 @@ impl Iterator for IntoValues { impl DoubleEndedIterator for IntoValues { fn next_back(&mut self) -> Option { - self.iter.next_back() + self.iter.next_back().map(|(_, val)| val) } } @@ -500,31 +470,30 @@ impl ExactSizeIterator for IntoValues { } pub struct Drain<'a> { - keys: std::vec::Drain<'a, String>, - values: std::vec::Drain<'a, Value>, + iter: std::vec::Drain<'a, (String, Value)>, } impl Iterator for Drain<'_> { type Item = (String, Value); fn next(&mut self) -> Option { - Some((self.keys.next()?, self.values.next()?)) + self.iter.next() } fn size_hint(&self) -> (usize, Option) { - self.keys.size_hint() + self.iter.size_hint() } } impl DoubleEndedIterator for Drain<'_> { fn next_back(&mut self) -> Option { - Some((self.keys.next_back()?, self.values.next_back()?)) + self.iter.next_back() } } impl ExactSizeIterator for Drain<'_> { fn len(&self) -> usize { - self.keys.len() + self.iter.len() } }