forked from extern/nushell
Improve select
row perf for large N (#10355)
# Description While reviewing #10350 I noticed that a `HashSet<usize>` was used to deduplicate the incoming rows, which are then sorted after cloning to a separate `Vec`. This sounds like a candidate for a `BTreeSet` which guarantees the ordering. In the process I removed some dead code. - Use `BTreeSet` instead of `HashSet` - Remove dead `skip` logic - Use `BTreeSet` directly in `NthIterator` - Consume `BTreeSet` through `Peekable<IntoIter>`
This commit is contained in:
parent
ef7ade59f3
commit
1bb953877e
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData,
|
||||
PipelineIterator, Record, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use std::collections::HashSet;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Select;
|
||||
@ -197,7 +197,7 @@ fn select(
|
||||
columns: Vec<CellPath>,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let mut unique_rows: HashSet<usize> = HashSet::new();
|
||||
let mut unique_rows: BTreeSet<usize> = BTreeSet::new();
|
||||
|
||||
let mut new_columns = vec![];
|
||||
|
||||
@ -224,18 +224,15 @@ fn select(
|
||||
};
|
||||
}
|
||||
let columns = new_columns;
|
||||
let mut unique_rows: Vec<usize> = unique_rows.into_iter().collect();
|
||||
|
||||
let input = if !unique_rows.is_empty() {
|
||||
unique_rows.sort_unstable();
|
||||
// let skip = call.has_flag("skip");
|
||||
let metadata = input.metadata();
|
||||
let pipeline_iter: PipelineIterator = input.into_iter();
|
||||
|
||||
NthIterator {
|
||||
input: pipeline_iter,
|
||||
rows: unique_rows,
|
||||
skip: false,
|
||||
rows: unique_rows.into_iter().peekable(),
|
||||
current: 0,
|
||||
}
|
||||
.into_pipeline_data(engine_state.ctrlc.clone())
|
||||
@ -336,8 +333,7 @@ fn select(
|
||||
|
||||
struct NthIterator {
|
||||
input: PipelineIterator,
|
||||
rows: Vec<usize>,
|
||||
skip: bool,
|
||||
rows: std::iter::Peekable<std::collections::btree_set::IntoIter<usize>>,
|
||||
current: usize,
|
||||
}
|
||||
|
||||
@ -346,32 +342,18 @@ impl Iterator for NthIterator {
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
if !self.skip {
|
||||
if let Some(row) = self.rows.first() {
|
||||
if self.current == *row {
|
||||
self.rows.remove(0);
|
||||
self.current += 1;
|
||||
return self.input.next();
|
||||
} else {
|
||||
self.current += 1;
|
||||
let _ = self.input.next();
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
} else if let Some(row) = self.rows.first() {
|
||||
if let Some(row) = self.rows.peek() {
|
||||
if self.current == *row {
|
||||
self.rows.remove(0);
|
||||
self.rows.next();
|
||||
self.current += 1;
|
||||
return self.input.next();
|
||||
} else {
|
||||
self.current += 1;
|
||||
let _ = self.input.next();
|
||||
continue;
|
||||
} else {
|
||||
self.current += 1;
|
||||
return self.input.next();
|
||||
}
|
||||
} else {
|
||||
return self.input.next();
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user