mirror of
https://github.com/nushell/nushell.git
synced 2024-11-22 08:23:24 +01:00
Refactor Record
to use a single backing Vec
(#12326)
# Description This shrinks `Record`'s size in half and and allows you to include it in `Value` without growing the size. Changing the `Record` internals may have slightly different performance characteristics as the cache locality changes on lookups (if you directly need the value, it should be closer, but in other cases may blow up the cache line budget) Also different perf characteristics on creation expected. `Record::from_raw_cols_vals` now probably worse. ## Benchmarking Comparison with the main branch (boxed Record) revealed no significant change to the creation but an improvement when accessing larger N. The fact that this was more pronounced for nested access (still cloning before nushell/nushell#12325) leads to the conclusion that this may still be dominated by the smaller clone necessary for a 24-byte `Record` over the previous 48 bytes. # User-Facing Changes Reduced memory usage
This commit is contained in:
parent
3b8258ae57
commit
0cf7de598b
@ -6,8 +6,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||||
pub struct Record {
|
pub struct Record {
|
||||||
cols: Vec<String>,
|
inner: Vec<(String, Value)>,
|
||||||
vals: Vec<Value>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Record {
|
impl Record {
|
||||||
@ -17,8 +16,7 @@ impl Record {
|
|||||||
|
|
||||||
pub fn with_capacity(capacity: usize) -> Self {
|
pub fn with_capacity(capacity: usize) -> Self {
|
||||||
Self {
|
Self {
|
||||||
cols: Vec::with_capacity(capacity),
|
inner: Vec::with_capacity(capacity),
|
||||||
vals: Vec::with_capacity(capacity),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -35,7 +33,8 @@ impl Record {
|
|||||||
creation_site_span: Span,
|
creation_site_span: Span,
|
||||||
) -> Result<Self, ShellError> {
|
) -> Result<Self, ShellError> {
|
||||||
if cols.len() == vals.len() {
|
if cols.len() == vals.len() {
|
||||||
Ok(Self { cols, vals })
|
let inner = cols.into_iter().zip(vals).collect();
|
||||||
|
Ok(Self { inner })
|
||||||
} else {
|
} else {
|
||||||
Err(ShellError::RecordColsValsMismatch {
|
Err(ShellError::RecordColsValsMismatch {
|
||||||
bad_value: input_span,
|
bad_value: input_span,
|
||||||
@ -53,13 +52,11 @@ impl Record {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
debug_assert_eq!(self.cols.len(), self.vals.len());
|
self.inner.is_empty()
|
||||||
self.cols.is_empty()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
debug_assert_eq!(self.cols.len(), self.vals.len());
|
self.inner.len()
|
||||||
self.cols.len()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Naive push to the end of the datastructure.
|
/// Naive push to the end of the datastructure.
|
||||||
@ -68,8 +65,7 @@ impl Record {
|
|||||||
///
|
///
|
||||||
/// Consider to use [`Record::insert`] instead
|
/// Consider to use [`Record::insert`] instead
|
||||||
pub fn push(&mut self, col: impl Into<String>, val: Value) {
|
pub fn push(&mut self, col: impl Into<String>, val: Value) {
|
||||||
self.cols.push(col.into());
|
self.inner.push((col.into(), val));
|
||||||
self.vals.push(val);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Insert into the record, replacing preexisting value if found.
|
/// Insert into the record, replacing preexisting value if found.
|
||||||
@ -79,9 +75,7 @@ impl Record {
|
|||||||
where
|
where
|
||||||
K: AsRef<str> + Into<String>,
|
K: AsRef<str> + Into<String>,
|
||||||
{
|
{
|
||||||
if let Some(idx) = self.index_of(&col) {
|
if let Some(curr_val) = self.get_mut(&col) {
|
||||||
// Can panic if vals.len() < cols.len()
|
|
||||||
let curr_val = &mut self.vals[idx];
|
|
||||||
Some(std::mem::replace(curr_val, val))
|
Some(std::mem::replace(curr_val, val))
|
||||||
} else {
|
} else {
|
||||||
self.push(col, val);
|
self.push(col, val);
|
||||||
@ -98,15 +92,19 @@ impl Record {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn get(&self, col: impl AsRef<str>) -> Option<&Value> {
|
pub fn get(&self, col: impl AsRef<str>) -> Option<&Value> {
|
||||||
self.index_of(col).and_then(|idx| self.vals.get(idx))
|
self.inner
|
||||||
|
.iter()
|
||||||
|
.find_map(|(k, v)| if k == col.as_ref() { Some(v) } else { None })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_mut(&mut self, col: impl AsRef<str>) -> Option<&mut Value> {
|
pub fn get_mut(&mut self, col: impl AsRef<str>) -> Option<&mut Value> {
|
||||||
self.index_of(col).and_then(|idx| self.vals.get_mut(idx))
|
self.inner
|
||||||
|
.iter_mut()
|
||||||
|
.find_map(|(k, v)| if k == col.as_ref() { Some(v) } else { None })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_index(&self, idx: usize) -> Option<(&String, &Value)> {
|
pub fn get_index(&self, idx: usize) -> Option<(&String, &Value)> {
|
||||||
Some((self.cols.get(idx)?, self.vals.get(idx)?))
|
self.inner.get(idx).map(|(col, val): &(_, _)| (col, val))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Remove single value by key
|
/// Remove single value by key
|
||||||
@ -116,8 +114,8 @@ impl Record {
|
|||||||
/// Note: makes strong assumption that keys are unique
|
/// Note: makes strong assumption that keys are unique
|
||||||
pub fn remove(&mut self, col: impl AsRef<str>) -> Option<Value> {
|
pub fn remove(&mut self, col: impl AsRef<str>) -> Option<Value> {
|
||||||
let idx = self.index_of(col)?;
|
let idx = self.index_of(col)?;
|
||||||
self.cols.remove(idx);
|
let (_, val) = self.inner.remove(idx);
|
||||||
Some(self.vals.remove(idx))
|
Some(val)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Remove elements in-place that do not satisfy `keep`
|
/// Remove elements in-place that do not satisfy `keep`
|
||||||
@ -185,33 +183,7 @@ impl Record {
|
|||||||
where
|
where
|
||||||
F: FnMut(&str, &mut Value) -> bool,
|
F: FnMut(&str, &mut Value) -> bool,
|
||||||
{
|
{
|
||||||
// `Vec::retain` is able to optimize memcopies internally.
|
self.inner.retain_mut(|(col, val)| keep(col, val));
|
||||||
// For maximum benefit, `retain` is used on `vals`,
|
|
||||||
// as `Value` is a larger struct than `String`.
|
|
||||||
//
|
|
||||||
// To do a simultaneous retain on the `cols`, three portions of it are tracked:
|
|
||||||
// [..retained, ..dropped, ..unvisited]
|
|
||||||
|
|
||||||
// number of elements keep so far, start of ..dropped and length of ..retained
|
|
||||||
let mut retained = 0;
|
|
||||||
// current index of element being checked, start of ..unvisited
|
|
||||||
let mut idx = 0;
|
|
||||||
|
|
||||||
self.vals.retain_mut(|val| {
|
|
||||||
if keep(&self.cols[idx], val) {
|
|
||||||
// skip swaps for first consecutive run of kept elements
|
|
||||||
if idx != retained {
|
|
||||||
self.cols.swap(idx, retained);
|
|
||||||
}
|
|
||||||
retained += 1;
|
|
||||||
idx += 1;
|
|
||||||
true
|
|
||||||
} else {
|
|
||||||
idx += 1;
|
|
||||||
false
|
|
||||||
}
|
|
||||||
});
|
|
||||||
self.cols.truncate(retained);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Truncate record to the first `len` elements.
|
/// Truncate record to the first `len` elements.
|
||||||
@ -235,25 +207,24 @@ impl Record {
|
|||||||
/// assert_eq!(rec.len(), 0);
|
/// assert_eq!(rec.len(), 0);
|
||||||
/// ```
|
/// ```
|
||||||
pub fn truncate(&mut self, len: usize) {
|
pub fn truncate(&mut self, len: usize) {
|
||||||
self.cols.truncate(len);
|
self.inner.truncate(len);
|
||||||
self.vals.truncate(len);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn columns(&self) -> Columns {
|
pub fn columns(&self) -> Columns {
|
||||||
Columns {
|
Columns {
|
||||||
iter: self.cols.iter(),
|
iter: self.inner.iter(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn values(&self) -> Values {
|
pub fn values(&self) -> Values {
|
||||||
Values {
|
Values {
|
||||||
iter: self.vals.iter(),
|
iter: self.inner.iter(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn into_values(self) -> IntoValues {
|
pub fn into_values(self) -> IntoValues {
|
||||||
IntoValues {
|
IntoValues {
|
||||||
iter: self.vals.into_iter(),
|
iter: self.inner.into_iter(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -282,19 +253,18 @@ impl Record {
|
|||||||
where
|
where
|
||||||
R: RangeBounds<usize> + Clone,
|
R: RangeBounds<usize> + Clone,
|
||||||
{
|
{
|
||||||
debug_assert_eq!(self.cols.len(), self.vals.len());
|
|
||||||
Drain {
|
Drain {
|
||||||
keys: self.cols.drain(range.clone()),
|
iter: self.inner.drain(range),
|
||||||
values: self.vals.drain(range),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FromIterator<(String, Value)> for Record {
|
impl FromIterator<(String, Value)> for Record {
|
||||||
fn from_iter<T: IntoIterator<Item = (String, Value)>>(iter: T) -> Self {
|
fn from_iter<T: IntoIterator<Item = (String, Value)>>(iter: T) -> Self {
|
||||||
let (cols, vals) = iter.into_iter().unzip();
|
|
||||||
// TODO: should this check for duplicate keys/columns?
|
// TODO: should this check for duplicate keys/columns?
|
||||||
Self { cols, vals }
|
Self {
|
||||||
|
inner: iter.into_iter().collect(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,7 +278,7 @@ impl Extend<(String, Value)> for Record {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct IntoIter {
|
pub struct IntoIter {
|
||||||
iter: std::iter::Zip<std::vec::IntoIter<String>, std::vec::IntoIter<Value>>,
|
iter: std::vec::IntoIter<(String, Value)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for IntoIter {
|
impl Iterator for IntoIter {
|
||||||
@ -338,26 +308,26 @@ impl IntoIterator for Record {
|
|||||||
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
IntoIter {
|
IntoIter {
|
||||||
iter: self.cols.into_iter().zip(self.vals),
|
iter: self.inner.into_iter(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Iter<'a> {
|
pub struct Iter<'a> {
|
||||||
iter: std::iter::Zip<std::slice::Iter<'a, String>, std::slice::Iter<'a, Value>>,
|
iter: std::slice::Iter<'a, (String, Value)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Iterator for Iter<'a> {
|
impl<'a> Iterator for Iter<'a> {
|
||||||
type Item = (&'a String, &'a Value);
|
type Item = (&'a String, &'a Value);
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.iter.next()
|
self.iter.next().map(|(col, val): &(_, _)| (col, val))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> DoubleEndedIterator for Iter<'a> {
|
impl<'a> DoubleEndedIterator for Iter<'a> {
|
||||||
fn next_back(&mut self) -> Option<Self::Item> {
|
fn next_back(&mut self) -> Option<Self::Item> {
|
||||||
self.iter.next_back()
|
self.iter.next_back().map(|(col, val): &(_, _)| (col, val))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -374,26 +344,26 @@ impl<'a> IntoIterator for &'a Record {
|
|||||||
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
Iter {
|
Iter {
|
||||||
iter: self.cols.iter().zip(&self.vals),
|
iter: self.inner.iter(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct IterMut<'a> {
|
pub struct IterMut<'a> {
|
||||||
iter: std::iter::Zip<std::slice::Iter<'a, String>, std::slice::IterMut<'a, Value>>,
|
iter: std::slice::IterMut<'a, (String, Value)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Iterator for IterMut<'a> {
|
impl<'a> Iterator for IterMut<'a> {
|
||||||
type Item = (&'a String, &'a mut Value);
|
type Item = (&'a String, &'a mut Value);
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.iter.next()
|
self.iter.next().map(|(col, val)| (&*col, val))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> DoubleEndedIterator for IterMut<'a> {
|
impl<'a> DoubleEndedIterator for IterMut<'a> {
|
||||||
fn next_back(&mut self) -> Option<Self::Item> {
|
fn next_back(&mut self) -> Option<Self::Item> {
|
||||||
self.iter.next_back()
|
self.iter.next_back().map(|(col, val)| (&*col, val))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -410,20 +380,20 @@ impl<'a> IntoIterator for &'a mut Record {
|
|||||||
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
IterMut {
|
IterMut {
|
||||||
iter: self.cols.iter().zip(&mut self.vals),
|
iter: self.inner.iter_mut(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Columns<'a> {
|
pub struct Columns<'a> {
|
||||||
iter: std::slice::Iter<'a, String>,
|
iter: std::slice::Iter<'a, (String, Value)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Iterator for Columns<'a> {
|
impl<'a> Iterator for Columns<'a> {
|
||||||
type Item = &'a String;
|
type Item = &'a String;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.iter.next()
|
self.iter.next().map(|(col, _)| col)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
@ -433,7 +403,7 @@ impl<'a> Iterator for Columns<'a> {
|
|||||||
|
|
||||||
impl<'a> DoubleEndedIterator for Columns<'a> {
|
impl<'a> DoubleEndedIterator for Columns<'a> {
|
||||||
fn next_back(&mut self) -> Option<Self::Item> {
|
fn next_back(&mut self) -> Option<Self::Item> {
|
||||||
self.iter.next_back()
|
self.iter.next_back().map(|(col, _)| col)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -444,14 +414,14 @@ impl<'a> ExactSizeIterator for Columns<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct Values<'a> {
|
pub struct Values<'a> {
|
||||||
iter: std::slice::Iter<'a, Value>,
|
iter: std::slice::Iter<'a, (String, Value)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Iterator for Values<'a> {
|
impl<'a> Iterator for Values<'a> {
|
||||||
type Item = &'a Value;
|
type Item = &'a Value;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.iter.next()
|
self.iter.next().map(|(_, val)| val)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
@ -461,7 +431,7 @@ impl<'a> Iterator for Values<'a> {
|
|||||||
|
|
||||||
impl<'a> DoubleEndedIterator for Values<'a> {
|
impl<'a> DoubleEndedIterator for Values<'a> {
|
||||||
fn next_back(&mut self) -> Option<Self::Item> {
|
fn next_back(&mut self) -> Option<Self::Item> {
|
||||||
self.iter.next_back()
|
self.iter.next_back().map(|(_, val)| val)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -472,14 +442,14 @@ impl<'a> ExactSizeIterator for Values<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct IntoValues {
|
pub struct IntoValues {
|
||||||
iter: std::vec::IntoIter<Value>,
|
iter: std::vec::IntoIter<(String, Value)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for IntoValues {
|
impl Iterator for IntoValues {
|
||||||
type Item = Value;
|
type Item = Value;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.iter.next()
|
self.iter.next().map(|(_, val)| val)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
@ -489,7 +459,7 @@ impl Iterator for IntoValues {
|
|||||||
|
|
||||||
impl DoubleEndedIterator for IntoValues {
|
impl DoubleEndedIterator for IntoValues {
|
||||||
fn next_back(&mut self) -> Option<Self::Item> {
|
fn next_back(&mut self) -> Option<Self::Item> {
|
||||||
self.iter.next_back()
|
self.iter.next_back().map(|(_, val)| val)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -500,31 +470,30 @@ impl ExactSizeIterator for IntoValues {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct Drain<'a> {
|
pub struct Drain<'a> {
|
||||||
keys: std::vec::Drain<'a, String>,
|
iter: std::vec::Drain<'a, (String, Value)>,
|
||||||
values: std::vec::Drain<'a, Value>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for Drain<'_> {
|
impl Iterator for Drain<'_> {
|
||||||
type Item = (String, Value);
|
type Item = (String, Value);
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
Some((self.keys.next()?, self.values.next()?))
|
self.iter.next()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
self.keys.size_hint()
|
self.iter.size_hint()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DoubleEndedIterator for Drain<'_> {
|
impl DoubleEndedIterator for Drain<'_> {
|
||||||
fn next_back(&mut self) -> Option<Self::Item> {
|
fn next_back(&mut self) -> Option<Self::Item> {
|
||||||
Some((self.keys.next_back()?, self.values.next_back()?))
|
self.iter.next_back()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ExactSizeIterator for Drain<'_> {
|
impl ExactSizeIterator for Drain<'_> {
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
self.keys.len()
|
self.iter.len()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user