Add common map-like API to nu_protocol::Record (#10841)

# Description

> Our `Record` looks like a map, quacks like a map, so let's treat it
with the API for a map

Implement common methods found on e.g. `std::collections::HashMap` or
the insertion-ordered [indexmap](https://docs.rs/indexmap).

This allows contributors to not have to worry about how to get to the
relevant items and not mess up the assumptions of a Nushell record.

## Record assumptions
- `cols` and `vals` are of equal length
- for all practical purposes, keys/columns should be unique

## End goal

The end goal of the upcoming series of PR's is to allow us to make
`cols` and `vals` private.
Then it would be possible to exchange the backing datastructure to best
fit the expected workload.
This could be statically (by finding the best balance) or dynamically by
using an `enum` of potential representations.

## Parts
- Add validating explicit part constructor
`Record::from_raw_cols_vals()`
- Add `Record.columns()` iterator
- Add `Record.values()` iterator
- Add consuming `Record.into_values()` iterator
- Add `Record.contains()` helper
- Add `Record.insert()` that respects existing keys
- Add key-based `.get()`/`.get_mut()` to `Record`
- Add `Record.get_index()` for index-based access
- Implement `Extend` for `Record` naively
- Use checked constructor in `record!` macro
- Add `Record.index_of()` to get index by key

# User-Facing Changes
None directly

# Developer facing changes
You don't have to roll your own record handling and can use a familiar
API

# Tests + Formatting
No explicit unit tests yet. Wouldn't be too tricky to validate core
properties directly.
Will be exercised by the following PRs using the new
methods/traits/iterators.
This commit is contained in:
Stefan Holderbach 2023-10-28 15:18:41 +02:00 committed by GitHub
parent 4b301710d3
commit c87bac04c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -20,6 +20,16 @@ impl Record {
} }
} }
// Constructor that checks that `cols` and `vals` are of the same length.
//
// For perf reasons does not validate the rest of the record assumptions.
// - unique keys
pub fn from_raw_cols_vals(cols: Vec<String>, vals: Vec<Value>) -> Self {
assert_eq!(cols.len(), vals.len());
Self { cols, vals }
}
pub fn iter(&self) -> Iter { pub fn iter(&self) -> Iter {
self.into_iter() self.into_iter()
} }
@ -36,10 +46,71 @@ impl Record {
usize::min(self.cols.len(), self.vals.len()) usize::min(self.cols.len(), self.vals.len())
} }
/// Naive push to the end of the datastructure.
///
/// May duplicate data!
///
/// Consider to use [`Record::insert`] instead
pub fn push(&mut self, col: impl Into<String>, val: Value) { pub fn push(&mut self, col: impl Into<String>, val: Value) {
self.cols.push(col.into()); self.cols.push(col.into());
self.vals.push(val); self.vals.push(val);
} }
/// Insert into the record, replacing preexisting value if found.
///
/// Returns `Some(previous_value)` if found. Else `None`
pub fn insert<K>(&mut self, col: K, val: Value) -> Option<Value>
where
K: AsRef<str> + Into<String>,
{
if let Some(idx) = self.index_of(&col) {
// Can panic if vals.len() < cols.len()
let curr_val = &mut self.vals[idx];
Some(std::mem::replace(curr_val, val))
} else {
self.cols.push(col.into());
self.vals.push(val);
None
}
}
pub fn contains(&self, col: impl AsRef<str>) -> bool {
self.cols.iter().any(|k| k == col.as_ref())
}
pub fn index_of(&self, col: impl AsRef<str>) -> Option<usize> {
self.columns().position(|k| k == col.as_ref())
}
pub fn get(&self, col: impl AsRef<str>) -> Option<&Value> {
self.index_of(col).and_then(|idx| self.vals.get(idx))
}
pub fn get_mut(&mut self, col: impl AsRef<str>) -> Option<&mut Value> {
self.index_of(col).and_then(|idx| self.vals.get_mut(idx))
}
pub fn get_index(&self, idx: usize) -> Option<(&String, &Value)> {
Some((self.cols.get(idx)?, self.vals.get(idx)?))
}
pub fn columns(&self) -> Columns {
Columns {
iter: self.cols.iter(),
}
}
pub fn values(&self) -> Values {
Values {
iter: self.vals.iter(),
}
}
pub fn into_values(self) -> IntoValues {
IntoValues {
iter: self.vals.into_iter(),
}
}
} }
impl FromIterator<(String, Value)> for Record { impl FromIterator<(String, Value)> for Record {
@ -49,6 +120,16 @@ impl FromIterator<(String, Value)> for Record {
} }
} }
impl Extend<(String, Value)> for Record {
fn extend<T: IntoIterator<Item = (String, Value)>>(&mut self, iter: T) {
for (k, v) in iter {
// TODO: should this .insert with a check?
self.cols.push(k);
self.vals.push(v);
}
}
}
pub type IntoIter = std::iter::Zip<std::vec::IntoIter<String>, std::vec::IntoIter<Value>>; pub type IntoIter = std::iter::Zip<std::vec::IntoIter<String>, std::vec::IntoIter<Value>>;
impl IntoIterator for Record { impl IntoIterator for Record {
@ -85,13 +166,97 @@ impl<'a> IntoIterator for &'a mut Record {
} }
} }
pub struct Columns<'a> {
iter: std::slice::Iter<'a, String>,
}
impl<'a> Iterator for Columns<'a> {
type Item = &'a String;
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> DoubleEndedIterator for Columns<'a> {
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back()
}
}
impl<'a> ExactSizeIterator for Columns<'a> {
fn len(&self) -> usize {
self.iter.len()
}
}
pub struct Values<'a> {
iter: std::slice::Iter<'a, Value>,
}
impl<'a> Iterator for Values<'a> {
type Item = &'a Value;
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> DoubleEndedIterator for Values<'a> {
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back()
}
}
impl<'a> ExactSizeIterator for Values<'a> {
fn len(&self) -> usize {
self.iter.len()
}
}
pub struct IntoValues {
iter: std::vec::IntoIter<Value>,
}
impl Iterator for IntoValues {
type Item = Value;
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl DoubleEndedIterator for IntoValues {
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back()
}
}
impl ExactSizeIterator for IntoValues {
fn len(&self) -> usize {
self.iter.len()
}
}
#[macro_export] #[macro_export]
macro_rules! record { macro_rules! record {
{$($col:expr => $val:expr),+ $(,)?} => { {$($col:expr => $val:expr),+ $(,)?} => {
$crate::Record { $crate::Record::from_raw_cols_vals (
cols: vec![$($col.into(),)+], vec![$($col.into(),)+],
vals: vec![$($val,)+] vec![$($val,)+]
} )
}; };
{} => { {} => {
$crate::Record::new() $crate::Record::new()