forked from extern/nushell
Refactor flatten
command (#11017)
# Description Refactors the `flatten` command to remove a bunch of cloning. This was down by passing ownership of the `Value` to `flat_value`, removing the lifetime on `TableInside`, and using `Vec<Record>` in `FlattenedRows` instead of a pair of `Vec` of columns and values. For the quick benchmark below, it seems to be twice as fast now: ```nushell let data = ls crates | where type == dir | each { ls $'($in.name)/**/*' } timeit { for x in 0..1000 { $data | flatten } } ``` This took 550ms on v0.86.0 and only 230ms on this PR. But considering that ```nushell timeit { for x in 0..1000 { $data } } ``` takes 200ms on both versions, then the difference for `flatten` itself is really 250ms vs 30ms -- 8x faster.
This commit is contained in:
parent
fe92051bb3
commit
523d0bca16
@ -126,157 +126,126 @@ fn flatten(
|
|||||||
call: &Call,
|
call: &Call,
|
||||||
input: PipelineData,
|
input: PipelineData,
|
||||||
) -> Result<PipelineData, ShellError> {
|
) -> Result<PipelineData, ShellError> {
|
||||||
let tag = call.head;
|
|
||||||
let columns: Vec<CellPath> = call.rest(engine_state, stack, 0)?;
|
let columns: Vec<CellPath> = call.rest(engine_state, stack, 0)?;
|
||||||
let metadata = input.metadata();
|
let metadata = input.metadata();
|
||||||
let flatten_all = call.has_flag("all");
|
let flatten_all = call.has_flag("all");
|
||||||
|
|
||||||
input
|
input
|
||||||
.flat_map(
|
.flat_map(
|
||||||
move |item| flat_value(&columns, &item, tag, flatten_all),
|
move |item| flat_value(&columns, item, flatten_all),
|
||||||
engine_state.ctrlc.clone(),
|
engine_state.ctrlc.clone(),
|
||||||
)
|
)
|
||||||
.map(|x| x.set_metadata(metadata))
|
.map(|x| x.set_metadata(metadata))
|
||||||
}
|
}
|
||||||
|
|
||||||
enum TableInside<'a> {
|
enum TableInside {
|
||||||
// handle for a column which contains a single list(but not list of records)
|
// handle for a column which contains a single list(but not list of records)
|
||||||
// it contains (column, span, values in the column, column index).
|
// it contains (column, span, values in the column, column index).
|
||||||
Entries(&'a str, &'a Span, Vec<&'a Value>, usize),
|
Entries(String, Vec<Value>, usize),
|
||||||
// handle for a column which contains a table, we can flatten the inner column to outer level
|
// handle for a column which contains a table, we can flatten the inner column to outer level
|
||||||
// `columns` means that for the given row, it contains `len(columns)` nested rows, and each nested row contains a list of column name.
|
// `records` is the nested/inner table to flatten to the outer level
|
||||||
// Likely, `values` means that for the given row, it contains `len(values)` nested rows, and each nested row contains a list of values.
|
|
||||||
//
|
|
||||||
// `parent_column_name` is handled for conflicting column name, the nested table may contains columns which has the same name
|
// `parent_column_name` is handled for conflicting column name, the nested table may contains columns which has the same name
|
||||||
// to outer level, for that case, the output column name should be f"{parent_column_name}_{inner_column_name}".
|
// to outer level, for that case, the output column name should be f"{parent_column_name}_{inner_column_name}".
|
||||||
// `parent_column_index` is the column index in original table.
|
// `parent_column_index` is the column index in original table.
|
||||||
FlattenedRows {
|
FlattenedRows {
|
||||||
columns: Vec<Vec<String>>,
|
records: Vec<Record>,
|
||||||
_span: &'a Span,
|
parent_column_name: String,
|
||||||
values: Vec<Vec<Value>>,
|
|
||||||
parent_column_name: &'a str,
|
|
||||||
parent_column_index: usize,
|
parent_column_index: usize,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
fn flat_value(columns: &[CellPath], item: &Value, name_tag: Span, all: bool) -> Vec<Value> {
|
fn flat_value(columns: &[CellPath], item: Value, all: bool) -> Vec<Value> {
|
||||||
let tag = item.span();
|
let tag = item.span();
|
||||||
|
|
||||||
if item.as_record().is_ok() {
|
match item {
|
||||||
|
Value::Record { val, .. } => {
|
||||||
let mut out = IndexMap::<String, Value>::new();
|
let mut out = IndexMap::<String, Value>::new();
|
||||||
let mut inner_table = None;
|
let mut inner_table = None;
|
||||||
|
|
||||||
let record = match item {
|
for (column_index, (column, value)) in val.into_iter().enumerate() {
|
||||||
Value::Record { val, .. } => val,
|
let column_requested = columns.iter().find(|c| c.into_string() == column);
|
||||||
// Propagate errors by explicitly matching them before the final case.
|
|
||||||
Value::Error { .. } => return vec![item.clone()],
|
|
||||||
other => {
|
|
||||||
return vec![Value::error(
|
|
||||||
ShellError::OnlySupportsThisInputType {
|
|
||||||
exp_input_type: "record".into(),
|
|
||||||
wrong_type: other.get_type().to_string(),
|
|
||||||
dst_span: name_tag,
|
|
||||||
src_span: other.span(),
|
|
||||||
},
|
|
||||||
name_tag,
|
|
||||||
)];
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let s = item.span();
|
|
||||||
|
|
||||||
for (column_index, (column, value)) in record.iter().enumerate() {
|
|
||||||
let column_requested = columns.iter().find(|c| c.into_string() == *column);
|
|
||||||
let need_flatten = { columns.is_empty() || column_requested.is_some() };
|
let need_flatten = { columns.is_empty() || column_requested.is_some() };
|
||||||
let span = value.span();
|
let span = value.span();
|
||||||
|
|
||||||
match value {
|
match value {
|
||||||
Value::Record { val, .. } => {
|
Value::Record { val, .. } => {
|
||||||
if need_flatten {
|
if need_flatten {
|
||||||
val.iter().for_each(|(col, val)| {
|
for (col, val) in val {
|
||||||
if out.contains_key(col) {
|
if out.contains_key(&col) {
|
||||||
out.insert(format!("{column}_{col}"), val.clone());
|
out.insert(format!("{column}_{col}"), val);
|
||||||
} else {
|
} else {
|
||||||
out.insert(col.to_string(), val.clone());
|
out.insert(col, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if out.contains_key(&column) {
|
||||||
|
out.insert(format!("{column}_{column}"), Value::record(val, span));
|
||||||
|
} else {
|
||||||
|
out.insert(column, Value::record(val, span));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::List { vals, .. } => {
|
||||||
|
if need_flatten && inner_table.is_some() {
|
||||||
|
return vec![Value::error(
|
||||||
|
ShellError::UnsupportedInput {
|
||||||
|
msg: "can only flatten one inner list at a time. tried flattening more than one column with inner lists... but is flattened already".into(),
|
||||||
|
input: "value originates from here".into(),
|
||||||
|
msg_span: tag,
|
||||||
|
input_span: span
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
)];
|
||||||
|
}
|
||||||
|
|
||||||
|
if all && vals.iter().all(|f| f.as_record().is_ok()) {
|
||||||
|
// it's a table (a list of record, we can flatten inner record)
|
||||||
|
if need_flatten {
|
||||||
|
let records = vals
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|v| {
|
||||||
|
if let Value::Record { val, .. } = v {
|
||||||
|
Some(val)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
} else if out.contains_key(column) {
|
.collect();
|
||||||
out.insert(format!("{column}_{column}"), value.clone());
|
|
||||||
} else {
|
|
||||||
out.insert(column.to_string(), value.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Value::List { vals, .. } if all && vals.iter().all(|f| f.as_record().is_ok()) => {
|
|
||||||
if need_flatten && inner_table.is_some() {
|
|
||||||
return vec![Value::error( ShellError::UnsupportedInput { msg: "can only flatten one inner list at a time. tried flattening more than one column with inner lists... but is flattened already".to_string(), input: "value originates from here".into(), msg_span: s, input_span: span }, span)
|
|
||||||
];
|
|
||||||
}
|
|
||||||
// it's a table (a list of record, we can flatten inner record)
|
|
||||||
let mut records = vec![];
|
|
||||||
|
|
||||||
for v in vals {
|
|
||||||
if let Ok(r) = v.as_record() {
|
|
||||||
records.push(r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if need_flatten {
|
|
||||||
let cols = records.iter().map(|r| r.cols.clone());
|
|
||||||
let vals = records.iter().map(|r| r.vals.clone());
|
|
||||||
|
|
||||||
inner_table = Some(TableInside::FlattenedRows {
|
inner_table = Some(TableInside::FlattenedRows {
|
||||||
columns: cols.collect(),
|
records,
|
||||||
_span: &s,
|
|
||||||
values: vals.collect(),
|
|
||||||
parent_column_name: column,
|
parent_column_name: column,
|
||||||
parent_column_index: column_index,
|
parent_column_index: column_index,
|
||||||
});
|
});
|
||||||
} else if out.contains_key(column) {
|
} else if out.contains_key(&column) {
|
||||||
out.insert(format!("{column}_{column}"), value.clone());
|
out.insert(format!("{column}_{column}"), Value::list(vals, span));
|
||||||
} else {
|
} else {
|
||||||
out.insert(column.to_string(), value.clone());
|
out.insert(column, Value::list(vals, span));
|
||||||
}
|
}
|
||||||
}
|
} else if !columns.is_empty() {
|
||||||
Value::List { vals: values, .. } => {
|
let cell_path =
|
||||||
if need_flatten && inner_table.is_some() {
|
column_requested.and_then(|x| match x.members.first() {
|
||||||
return vec![Value::error( ShellError::UnsupportedInput { msg: "can only flatten one inner list at a time. tried flattening more than one column with inner lists... but is flattened already".to_string(), input: "value originates from here".into(), msg_span: s, input_span: span }, span)
|
Some(PathMember::String { val, .. }) => Some(val),
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
if !columns.is_empty() {
|
|
||||||
let cell_path = column_requested.and_then(|x| match x.members.first() {
|
|
||||||
Some(PathMember::String { val, span: _, .. }) => Some(val),
|
|
||||||
_ => None,
|
_ => None,
|
||||||
});
|
});
|
||||||
|
|
||||||
if let Some(r) = cell_path {
|
if let Some(r) = cell_path {
|
||||||
inner_table = Some(TableInside::Entries(
|
inner_table =
|
||||||
r,
|
Some(TableInside::Entries(r.clone(), vals, column_index));
|
||||||
&s,
|
|
||||||
values.iter().collect::<Vec<_>>(),
|
|
||||||
column_index,
|
|
||||||
));
|
|
||||||
} else {
|
} else {
|
||||||
out.insert(column.to_string(), value.clone());
|
out.insert(column, Value::list(vals, span));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
inner_table = Some(TableInside::Entries(
|
inner_table = Some(TableInside::Entries(column, vals, column_index));
|
||||||
column,
|
|
||||||
&s,
|
|
||||||
values.iter().collect::<Vec<_>>(),
|
|
||||||
column_index,
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
out.insert(column.to_string(), value.clone());
|
out.insert(column, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut expanded = vec![];
|
let mut expanded = vec![];
|
||||||
match inner_table {
|
match inner_table {
|
||||||
Some(TableInside::Entries(column, _, entries, parent_column_index)) => {
|
Some(TableInside::Entries(column, entries, parent_column_index)) => {
|
||||||
for entry in entries {
|
for entry in entries {
|
||||||
let base = out.clone();
|
let base = out.clone();
|
||||||
let mut record = Record::new();
|
let mut record = Record::new();
|
||||||
@ -285,37 +254,38 @@ fn flat_value(columns: &[CellPath], item: &Value, name_tag: Span, all: bool) ->
|
|||||||
// meet the flattened column, push them to result record first
|
// meet the flattened column, push them to result record first
|
||||||
// this can avoid output column order changed.
|
// this can avoid output column order changed.
|
||||||
if index == parent_column_index {
|
if index == parent_column_index {
|
||||||
record.push(column, entry.clone());
|
record.push(column.clone(), entry.clone());
|
||||||
}
|
}
|
||||||
record.push(col, val);
|
record.push(col, val);
|
||||||
index += 1;
|
index += 1;
|
||||||
}
|
}
|
||||||
// the flattened column may be the last column in the original table.
|
// the flattened column may be the last column in the original table.
|
||||||
if index == parent_column_index {
|
if index == parent_column_index {
|
||||||
record.push(column, entry.clone());
|
record.push(column.clone(), entry);
|
||||||
}
|
}
|
||||||
expanded.push(Value::record(record, tag));
|
expanded.push(Value::record(record, tag));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(TableInside::FlattenedRows {
|
Some(TableInside::FlattenedRows {
|
||||||
columns,
|
records,
|
||||||
_span,
|
|
||||||
values,
|
|
||||||
parent_column_name,
|
parent_column_name,
|
||||||
parent_column_index,
|
parent_column_index,
|
||||||
}) => {
|
}) => {
|
||||||
for (inner_cols, inner_vals) in columns.into_iter().zip(values) {
|
for inner_record in records {
|
||||||
let base = out.clone();
|
let base = out.clone();
|
||||||
let mut record = Record::new();
|
let mut record = Record::new();
|
||||||
let mut index = 0;
|
let mut index = 0;
|
||||||
|
|
||||||
for (base_col, base_val) in base.into_iter() {
|
for (base_col, base_val) in base {
|
||||||
// meet the flattened column, push them to result record first
|
// meet the flattened column, push them to result record first
|
||||||
// this can avoid output column order changed.
|
// this can avoid output column order changed.
|
||||||
if index == parent_column_index {
|
if index == parent_column_index {
|
||||||
for (col, val) in inner_cols.iter().zip(inner_vals.iter()) {
|
for (col, val) in &inner_record {
|
||||||
if record.contains(col) {
|
if record.contains(col) {
|
||||||
record.push(format!("{parent_column_name}_{col}"), val.clone());
|
record.push(
|
||||||
|
format!("{parent_column_name}_{col}"),
|
||||||
|
val.clone(),
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
record.push(col, val.clone());
|
record.push(col, val.clone());
|
||||||
};
|
};
|
||||||
@ -328,11 +298,11 @@ fn flat_value(columns: &[CellPath], item: &Value, name_tag: Span, all: bool) ->
|
|||||||
|
|
||||||
// the flattened column may be the last column in the original table.
|
// the flattened column may be the last column in the original table.
|
||||||
if index == parent_column_index {
|
if index == parent_column_index {
|
||||||
for (col, val) in inner_cols.iter().zip(inner_vals.iter()) {
|
for (col, val) in inner_record {
|
||||||
if record.contains(col) {
|
if record.contains(&col) {
|
||||||
record.push(format!("{parent_column_name}_{col}"), val.clone());
|
record.push(format!("{parent_column_name}_{col}"), val);
|
||||||
} else {
|
} else {
|
||||||
record.push(col, val.clone());
|
record.push(col, val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -344,14 +314,9 @@ fn flat_value(columns: &[CellPath], item: &Value, name_tag: Span, all: bool) ->
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
expanded
|
expanded
|
||||||
} else if item.as_list().is_ok() {
|
|
||||||
if let Value::List { vals, .. } = item {
|
|
||||||
vals.to_vec()
|
|
||||||
} else {
|
|
||||||
vec![]
|
|
||||||
}
|
}
|
||||||
} else {
|
Value::List { vals, .. } => vals,
|
||||||
vec![item.clone()]
|
item => vec![item],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user