mirror of
https://github.com/nushell/nushell.git
synced 2024-11-28 19:33:47 +01:00
Improve select
row perf for large N (#10355)
# Description While reviewing #10350 I noticed that a `HashSet<usize>` was used to deduplicate the incoming rows, which are then sorted after cloning to a separate `Vec`. This sounds like a candidate for a `BTreeSet` which guarantees the ordering. In the process I removed some dead code. - Use `BTreeSet` instead of `HashSet` - Remove dead `skip` logic - Use `BTreeSet` directly in `NthIterator` - Consume `BTreeSet` through `Peekable<IntoIter>`
This commit is contained in:
parent
ef7ade59f3
commit
1bb953877e
@ -5,7 +5,7 @@ use nu_protocol::{
|
|||||||
Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData,
|
Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData,
|
||||||
PipelineIterator, Record, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
PipelineIterator, Record, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||||
};
|
};
|
||||||
use std::collections::HashSet;
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Select;
|
pub struct Select;
|
||||||
@ -197,7 +197,7 @@ fn select(
|
|||||||
columns: Vec<CellPath>,
|
columns: Vec<CellPath>,
|
||||||
input: PipelineData,
|
input: PipelineData,
|
||||||
) -> Result<PipelineData, ShellError> {
|
) -> Result<PipelineData, ShellError> {
|
||||||
let mut unique_rows: HashSet<usize> = HashSet::new();
|
let mut unique_rows: BTreeSet<usize> = BTreeSet::new();
|
||||||
|
|
||||||
let mut new_columns = vec![];
|
let mut new_columns = vec![];
|
||||||
|
|
||||||
@ -224,18 +224,15 @@ fn select(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
let columns = new_columns;
|
let columns = new_columns;
|
||||||
let mut unique_rows: Vec<usize> = unique_rows.into_iter().collect();
|
|
||||||
|
|
||||||
let input = if !unique_rows.is_empty() {
|
let input = if !unique_rows.is_empty() {
|
||||||
unique_rows.sort_unstable();
|
|
||||||
// let skip = call.has_flag("skip");
|
// let skip = call.has_flag("skip");
|
||||||
let metadata = input.metadata();
|
let metadata = input.metadata();
|
||||||
let pipeline_iter: PipelineIterator = input.into_iter();
|
let pipeline_iter: PipelineIterator = input.into_iter();
|
||||||
|
|
||||||
NthIterator {
|
NthIterator {
|
||||||
input: pipeline_iter,
|
input: pipeline_iter,
|
||||||
rows: unique_rows,
|
rows: unique_rows.into_iter().peekable(),
|
||||||
skip: false,
|
|
||||||
current: 0,
|
current: 0,
|
||||||
}
|
}
|
||||||
.into_pipeline_data(engine_state.ctrlc.clone())
|
.into_pipeline_data(engine_state.ctrlc.clone())
|
||||||
@ -336,8 +333,7 @@ fn select(
|
|||||||
|
|
||||||
struct NthIterator {
|
struct NthIterator {
|
||||||
input: PipelineIterator,
|
input: PipelineIterator,
|
||||||
rows: Vec<usize>,
|
rows: std::iter::Peekable<std::collections::btree_set::IntoIter<usize>>,
|
||||||
skip: bool,
|
|
||||||
current: usize,
|
current: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -346,32 +342,18 @@ impl Iterator for NthIterator {
|
|||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
loop {
|
loop {
|
||||||
if !self.skip {
|
if let Some(row) = self.rows.peek() {
|
||||||
if let Some(row) = self.rows.first() {
|
|
||||||
if self.current == *row {
|
|
||||||
self.rows.remove(0);
|
|
||||||
self.current += 1;
|
|
||||||
return self.input.next();
|
|
||||||
} else {
|
|
||||||
self.current += 1;
|
|
||||||
let _ = self.input.next();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
} else if let Some(row) = self.rows.first() {
|
|
||||||
if self.current == *row {
|
if self.current == *row {
|
||||||
self.rows.remove(0);
|
self.rows.next();
|
||||||
|
self.current += 1;
|
||||||
|
return self.input.next();
|
||||||
|
} else {
|
||||||
self.current += 1;
|
self.current += 1;
|
||||||
let _ = self.input.next();
|
let _ = self.input.next();
|
||||||
continue;
|
continue;
|
||||||
} else {
|
|
||||||
self.current += 1;
|
|
||||||
return self.input.next();
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return self.input.next();
|
return None;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user