Refactor Record to use a single backing Vec (#12326)

# Description
This shrinks `Record`'s size in half and and allows you to include it in
`Value` without growing the size.

Changing the `Record` internals may have slightly different performance
characteristics as the cache locality changes on lookups (if you
directly need the value, it should be closer, but in other cases may
blow up the cache line budget)

Also different perf characteristics on creation expected. 
`Record::from_raw_cols_vals` now probably worse.

## Benchmarking

Comparison with the main branch (boxed Record) revealed no significant
change to the creation but an improvement when accessing larger N.
The fact that this was more pronounced for nested access (still cloning
before nushell/nushell#12325) leads to the conclusion that this may
still be dominated by the smaller clone necessary for a 24-byte `Record`
over the previous 48 bytes.

# User-Facing Changes

Reduced memory usage
This commit is contained in:
Stefan Holderbach 2024-03-31 00:47:17 +01:00 committed by GitHub
parent 3b8258ae57
commit 0cf7de598b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -6,8 +6,7 @@ use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Default, Serialize, Deserialize)] #[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Record { pub struct Record {
cols: Vec<String>, inner: Vec<(String, Value)>,
vals: Vec<Value>,
} }
impl Record { impl Record {
@ -17,8 +16,7 @@ impl Record {
pub fn with_capacity(capacity: usize) -> Self { pub fn with_capacity(capacity: usize) -> Self {
Self { Self {
cols: Vec::with_capacity(capacity), inner: Vec::with_capacity(capacity),
vals: Vec::with_capacity(capacity),
} }
} }
@ -35,7 +33,8 @@ impl Record {
creation_site_span: Span, creation_site_span: Span,
) -> Result<Self, ShellError> { ) -> Result<Self, ShellError> {
if cols.len() == vals.len() { if cols.len() == vals.len() {
Ok(Self { cols, vals }) let inner = cols.into_iter().zip(vals).collect();
Ok(Self { inner })
} else { } else {
Err(ShellError::RecordColsValsMismatch { Err(ShellError::RecordColsValsMismatch {
bad_value: input_span, bad_value: input_span,
@ -53,13 +52,11 @@ impl Record {
} }
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
debug_assert_eq!(self.cols.len(), self.vals.len()); self.inner.is_empty()
self.cols.is_empty()
} }
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
debug_assert_eq!(self.cols.len(), self.vals.len()); self.inner.len()
self.cols.len()
} }
/// Naive push to the end of the datastructure. /// Naive push to the end of the datastructure.
@ -68,8 +65,7 @@ impl Record {
/// ///
/// Consider to use [`Record::insert`] instead /// Consider to use [`Record::insert`] instead
pub fn push(&mut self, col: impl Into<String>, val: Value) { pub fn push(&mut self, col: impl Into<String>, val: Value) {
self.cols.push(col.into()); self.inner.push((col.into(), val));
self.vals.push(val);
} }
/// Insert into the record, replacing preexisting value if found. /// Insert into the record, replacing preexisting value if found.
@ -79,9 +75,7 @@ impl Record {
where where
K: AsRef<str> + Into<String>, K: AsRef<str> + Into<String>,
{ {
if let Some(idx) = self.index_of(&col) { if let Some(curr_val) = self.get_mut(&col) {
// Can panic if vals.len() < cols.len()
let curr_val = &mut self.vals[idx];
Some(std::mem::replace(curr_val, val)) Some(std::mem::replace(curr_val, val))
} else { } else {
self.push(col, val); self.push(col, val);
@ -98,15 +92,19 @@ impl Record {
} }
pub fn get(&self, col: impl AsRef<str>) -> Option<&Value> { pub fn get(&self, col: impl AsRef<str>) -> Option<&Value> {
self.index_of(col).and_then(|idx| self.vals.get(idx)) self.inner
.iter()
.find_map(|(k, v)| if k == col.as_ref() { Some(v) } else { None })
} }
pub fn get_mut(&mut self, col: impl AsRef<str>) -> Option<&mut Value> { pub fn get_mut(&mut self, col: impl AsRef<str>) -> Option<&mut Value> {
self.index_of(col).and_then(|idx| self.vals.get_mut(idx)) self.inner
.iter_mut()
.find_map(|(k, v)| if k == col.as_ref() { Some(v) } else { None })
} }
pub fn get_index(&self, idx: usize) -> Option<(&String, &Value)> { pub fn get_index(&self, idx: usize) -> Option<(&String, &Value)> {
Some((self.cols.get(idx)?, self.vals.get(idx)?)) self.inner.get(idx).map(|(col, val): &(_, _)| (col, val))
} }
/// Remove single value by key /// Remove single value by key
@ -116,8 +114,8 @@ impl Record {
/// Note: makes strong assumption that keys are unique /// Note: makes strong assumption that keys are unique
pub fn remove(&mut self, col: impl AsRef<str>) -> Option<Value> { pub fn remove(&mut self, col: impl AsRef<str>) -> Option<Value> {
let idx = self.index_of(col)?; let idx = self.index_of(col)?;
self.cols.remove(idx); let (_, val) = self.inner.remove(idx);
Some(self.vals.remove(idx)) Some(val)
} }
/// Remove elements in-place that do not satisfy `keep` /// Remove elements in-place that do not satisfy `keep`
@ -185,33 +183,7 @@ impl Record {
where where
F: FnMut(&str, &mut Value) -> bool, F: FnMut(&str, &mut Value) -> bool,
{ {
// `Vec::retain` is able to optimize memcopies internally. self.inner.retain_mut(|(col, val)| keep(col, val));
// For maximum benefit, `retain` is used on `vals`,
// as `Value` is a larger struct than `String`.
//
// To do a simultaneous retain on the `cols`, three portions of it are tracked:
// [..retained, ..dropped, ..unvisited]
// number of elements keep so far, start of ..dropped and length of ..retained
let mut retained = 0;
// current index of element being checked, start of ..unvisited
let mut idx = 0;
self.vals.retain_mut(|val| {
if keep(&self.cols[idx], val) {
// skip swaps for first consecutive run of kept elements
if idx != retained {
self.cols.swap(idx, retained);
}
retained += 1;
idx += 1;
true
} else {
idx += 1;
false
}
});
self.cols.truncate(retained);
} }
/// Truncate record to the first `len` elements. /// Truncate record to the first `len` elements.
@ -235,25 +207,24 @@ impl Record {
/// assert_eq!(rec.len(), 0); /// assert_eq!(rec.len(), 0);
/// ``` /// ```
pub fn truncate(&mut self, len: usize) { pub fn truncate(&mut self, len: usize) {
self.cols.truncate(len); self.inner.truncate(len);
self.vals.truncate(len);
} }
pub fn columns(&self) -> Columns { pub fn columns(&self) -> Columns {
Columns { Columns {
iter: self.cols.iter(), iter: self.inner.iter(),
} }
} }
pub fn values(&self) -> Values { pub fn values(&self) -> Values {
Values { Values {
iter: self.vals.iter(), iter: self.inner.iter(),
} }
} }
pub fn into_values(self) -> IntoValues { pub fn into_values(self) -> IntoValues {
IntoValues { IntoValues {
iter: self.vals.into_iter(), iter: self.inner.into_iter(),
} }
} }
@ -282,19 +253,18 @@ impl Record {
where where
R: RangeBounds<usize> + Clone, R: RangeBounds<usize> + Clone,
{ {
debug_assert_eq!(self.cols.len(), self.vals.len());
Drain { Drain {
keys: self.cols.drain(range.clone()), iter: self.inner.drain(range),
values: self.vals.drain(range),
} }
} }
} }
impl FromIterator<(String, Value)> for Record { impl FromIterator<(String, Value)> for Record {
fn from_iter<T: IntoIterator<Item = (String, Value)>>(iter: T) -> Self { fn from_iter<T: IntoIterator<Item = (String, Value)>>(iter: T) -> Self {
let (cols, vals) = iter.into_iter().unzip();
// TODO: should this check for duplicate keys/columns? // TODO: should this check for duplicate keys/columns?
Self { cols, vals } Self {
inner: iter.into_iter().collect(),
}
} }
} }
@ -308,7 +278,7 @@ impl Extend<(String, Value)> for Record {
} }
pub struct IntoIter { pub struct IntoIter {
iter: std::iter::Zip<std::vec::IntoIter<String>, std::vec::IntoIter<Value>>, iter: std::vec::IntoIter<(String, Value)>,
} }
impl Iterator for IntoIter { impl Iterator for IntoIter {
@ -338,26 +308,26 @@ impl IntoIterator for Record {
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
IntoIter { IntoIter {
iter: self.cols.into_iter().zip(self.vals), iter: self.inner.into_iter(),
} }
} }
} }
pub struct Iter<'a> { pub struct Iter<'a> {
iter: std::iter::Zip<std::slice::Iter<'a, String>, std::slice::Iter<'a, Value>>, iter: std::slice::Iter<'a, (String, Value)>,
} }
impl<'a> Iterator for Iter<'a> { impl<'a> Iterator for Iter<'a> {
type Item = (&'a String, &'a Value); type Item = (&'a String, &'a Value);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.iter.next() self.iter.next().map(|(col, val): &(_, _)| (col, val))
} }
} }
impl<'a> DoubleEndedIterator for Iter<'a> { impl<'a> DoubleEndedIterator for Iter<'a> {
fn next_back(&mut self) -> Option<Self::Item> { fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back() self.iter.next_back().map(|(col, val): &(_, _)| (col, val))
} }
} }
@ -374,26 +344,26 @@ impl<'a> IntoIterator for &'a Record {
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
Iter { Iter {
iter: self.cols.iter().zip(&self.vals), iter: self.inner.iter(),
} }
} }
} }
pub struct IterMut<'a> { pub struct IterMut<'a> {
iter: std::iter::Zip<std::slice::Iter<'a, String>, std::slice::IterMut<'a, Value>>, iter: std::slice::IterMut<'a, (String, Value)>,
} }
impl<'a> Iterator for IterMut<'a> { impl<'a> Iterator for IterMut<'a> {
type Item = (&'a String, &'a mut Value); type Item = (&'a String, &'a mut Value);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.iter.next() self.iter.next().map(|(col, val)| (&*col, val))
} }
} }
impl<'a> DoubleEndedIterator for IterMut<'a> { impl<'a> DoubleEndedIterator for IterMut<'a> {
fn next_back(&mut self) -> Option<Self::Item> { fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back() self.iter.next_back().map(|(col, val)| (&*col, val))
} }
} }
@ -410,20 +380,20 @@ impl<'a> IntoIterator for &'a mut Record {
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
IterMut { IterMut {
iter: self.cols.iter().zip(&mut self.vals), iter: self.inner.iter_mut(),
} }
} }
} }
pub struct Columns<'a> { pub struct Columns<'a> {
iter: std::slice::Iter<'a, String>, iter: std::slice::Iter<'a, (String, Value)>,
} }
impl<'a> Iterator for Columns<'a> { impl<'a> Iterator for Columns<'a> {
type Item = &'a String; type Item = &'a String;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.iter.next() self.iter.next().map(|(col, _)| col)
} }
fn size_hint(&self) -> (usize, Option<usize>) { fn size_hint(&self) -> (usize, Option<usize>) {
@ -433,7 +403,7 @@ impl<'a> Iterator for Columns<'a> {
impl<'a> DoubleEndedIterator for Columns<'a> { impl<'a> DoubleEndedIterator for Columns<'a> {
fn next_back(&mut self) -> Option<Self::Item> { fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back() self.iter.next_back().map(|(col, _)| col)
} }
} }
@ -444,14 +414,14 @@ impl<'a> ExactSizeIterator for Columns<'a> {
} }
pub struct Values<'a> { pub struct Values<'a> {
iter: std::slice::Iter<'a, Value>, iter: std::slice::Iter<'a, (String, Value)>,
} }
impl<'a> Iterator for Values<'a> { impl<'a> Iterator for Values<'a> {
type Item = &'a Value; type Item = &'a Value;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.iter.next() self.iter.next().map(|(_, val)| val)
} }
fn size_hint(&self) -> (usize, Option<usize>) { fn size_hint(&self) -> (usize, Option<usize>) {
@ -461,7 +431,7 @@ impl<'a> Iterator for Values<'a> {
impl<'a> DoubleEndedIterator for Values<'a> { impl<'a> DoubleEndedIterator for Values<'a> {
fn next_back(&mut self) -> Option<Self::Item> { fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back() self.iter.next_back().map(|(_, val)| val)
} }
} }
@ -472,14 +442,14 @@ impl<'a> ExactSizeIterator for Values<'a> {
} }
pub struct IntoValues { pub struct IntoValues {
iter: std::vec::IntoIter<Value>, iter: std::vec::IntoIter<(String, Value)>,
} }
impl Iterator for IntoValues { impl Iterator for IntoValues {
type Item = Value; type Item = Value;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.iter.next() self.iter.next().map(|(_, val)| val)
} }
fn size_hint(&self) -> (usize, Option<usize>) { fn size_hint(&self) -> (usize, Option<usize>) {
@ -489,7 +459,7 @@ impl Iterator for IntoValues {
impl DoubleEndedIterator for IntoValues { impl DoubleEndedIterator for IntoValues {
fn next_back(&mut self) -> Option<Self::Item> { fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back() self.iter.next_back().map(|(_, val)| val)
} }
} }
@ -500,31 +470,30 @@ impl ExactSizeIterator for IntoValues {
} }
pub struct Drain<'a> { pub struct Drain<'a> {
keys: std::vec::Drain<'a, String>, iter: std::vec::Drain<'a, (String, Value)>,
values: std::vec::Drain<'a, Value>,
} }
impl Iterator for Drain<'_> { impl Iterator for Drain<'_> {
type Item = (String, Value); type Item = (String, Value);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
Some((self.keys.next()?, self.values.next()?)) self.iter.next()
} }
fn size_hint(&self) -> (usize, Option<usize>) { fn size_hint(&self) -> (usize, Option<usize>) {
self.keys.size_hint() self.iter.size_hint()
} }
} }
impl DoubleEndedIterator for Drain<'_> { impl DoubleEndedIterator for Drain<'_> {
fn next_back(&mut self) -> Option<Self::Item> { fn next_back(&mut self) -> Option<Self::Item> {
Some((self.keys.next_back()?, self.values.next_back()?)) self.iter.next_back()
} }
} }
impl ExactSizeIterator for Drain<'_> { impl ExactSizeIterator for Drain<'_> {
fn len(&self) -> usize { fn len(&self) -> usize {
self.keys.len() self.iter.len()
} }
} }