mirror of
https://github.com/nushell/nushell.git
synced 2025-04-15 16:58:19 +02:00
Refactor parse
command (#12791)
# Description - Switches the `excess` in the `ParserStream` and `ParseStreamerExternal` types from a `Vec` to a `VecDeque` - Removes unnecessary clones to `stream_helper` - Other simplifications and loop restructuring - Merges the `ParseStreamer` and `ParseStreamerExternal` types into a common `ParseIter` - `parse` now streams for list values
This commit is contained in:
parent
e462b6cd99
commit
3b26c08dab
@ -1,9 +1,9 @@
|
|||||||
use fancy_regex::Regex;
|
use fancy_regex::{Captures, Regex};
|
||||||
use nu_engine::command_prelude::*;
|
use nu_engine::command_prelude::*;
|
||||||
use nu_protocol::{ListStream, ValueIterator};
|
use nu_protocol::ListStream;
|
||||||
use std::sync::{
|
use std::{
|
||||||
atomic::{AtomicBool, Ordering},
|
collections::VecDeque,
|
||||||
Arc,
|
sync::{atomic::AtomicBool, Arc},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -119,7 +119,6 @@ fn operate(
|
|||||||
let head = call.head;
|
let head = call.head;
|
||||||
let pattern: Spanned<String> = call.req(engine_state, stack, 0)?;
|
let pattern: Spanned<String> = call.req(engine_state, stack, 0)?;
|
||||||
let regex: bool = call.has_flag(engine_state, stack, "regex")?;
|
let regex: bool = call.has_flag(engine_state, stack, "regex")?;
|
||||||
let ctrlc = engine_state.ctrlc.clone();
|
|
||||||
|
|
||||||
let pattern_item = pattern.item;
|
let pattern_item = pattern.item;
|
||||||
let pattern_span = pattern.span;
|
let pattern_span = pattern.span;
|
||||||
@ -130,7 +129,7 @@ fn operate(
|
|||||||
build_regex(&pattern_item, pattern_span)?
|
build_regex(&pattern_item, pattern_span)?
|
||||||
};
|
};
|
||||||
|
|
||||||
let regex_pattern = Regex::new(&item_to_parse).map_err(|e| ShellError::GenericError {
|
let regex = Regex::new(&item_to_parse).map_err(|e| ShellError::GenericError {
|
||||||
error: "Error with regular expression".into(),
|
error: "Error with regular expression".into(),
|
||||||
msg: e.to_string(),
|
msg: e.to_string(),
|
||||||
span: Some(pattern_span),
|
span: Some(pattern_span),
|
||||||
@ -138,92 +137,108 @@ fn operate(
|
|||||||
inner: vec![],
|
inner: vec![],
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let columns = column_names(®ex_pattern);
|
let columns = regex
|
||||||
|
.capture_names()
|
||||||
|
.skip(1)
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, name)| {
|
||||||
|
name.map(String::from)
|
||||||
|
.unwrap_or_else(|| format!("capture{i}"))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let ctrlc = engine_state.ctrlc.clone();
|
||||||
|
|
||||||
match input {
|
match input {
|
||||||
PipelineData::Empty => Ok(PipelineData::Empty),
|
PipelineData::Empty => Ok(PipelineData::Empty),
|
||||||
PipelineData::Value(..) => {
|
PipelineData::Value(value, ..) => match value {
|
||||||
let mut parsed: Vec<Value> = Vec::new();
|
Value::String { val, .. } => {
|
||||||
|
let captures = regex
|
||||||
|
.captures_iter(&val)
|
||||||
|
.map(|captures| captures_to_value(captures, &columns, head))
|
||||||
|
.collect::<Result<_, _>>()?;
|
||||||
|
|
||||||
for v in input {
|
Ok(Value::list(captures, head).into_pipeline_data())
|
||||||
let v_span = v.span();
|
|
||||||
match v.coerce_into_string() {
|
|
||||||
Ok(s) => {
|
|
||||||
let results = regex_pattern.captures_iter(&s);
|
|
||||||
|
|
||||||
for c in results {
|
|
||||||
let captures = match c {
|
|
||||||
Ok(c) => c,
|
|
||||||
Err(e) => {
|
|
||||||
return Err(ShellError::GenericError {
|
|
||||||
error: "Error with regular expression captures".into(),
|
|
||||||
msg: e.to_string(),
|
|
||||||
span: None,
|
|
||||||
help: None,
|
|
||||||
inner: vec![],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let record = columns
|
|
||||||
.iter()
|
|
||||||
.zip(captures.iter().skip(1))
|
|
||||||
.map(|(column_name, cap)| {
|
|
||||||
let cap_string = cap.map(|v| v.as_str()).unwrap_or("");
|
|
||||||
(column_name.clone(), Value::string(cap_string, v_span))
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
parsed.push(Value::record(record, head));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
return Err(ShellError::PipelineMismatch {
|
|
||||||
exp_input_type: "string".into(),
|
|
||||||
dst_span: head,
|
|
||||||
src_span: v_span,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Value::List { vals, .. } => {
|
||||||
|
let iter = vals.into_iter().map(move |val| {
|
||||||
|
let span = val.span();
|
||||||
|
val.into_string().map_err(|_| ShellError::PipelineMismatch {
|
||||||
|
exp_input_type: "string".into(),
|
||||||
|
dst_span: head,
|
||||||
|
src_span: span,
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
Ok(ListStream::new(parsed.into_iter(), head, ctrlc).into())
|
let iter = ParseIter {
|
||||||
}
|
captures: VecDeque::new(),
|
||||||
|
regex,
|
||||||
|
columns,
|
||||||
|
iter,
|
||||||
|
span: head,
|
||||||
|
ctrlc,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(ListStream::new(iter, head, None).into())
|
||||||
|
}
|
||||||
|
value => Err(ShellError::PipelineMismatch {
|
||||||
|
exp_input_type: "string".into(),
|
||||||
|
dst_span: head,
|
||||||
|
src_span: value.span(),
|
||||||
|
}),
|
||||||
|
},
|
||||||
PipelineData::ListStream(stream, ..) => Ok(stream
|
PipelineData::ListStream(stream, ..) => Ok(stream
|
||||||
.modify(|stream| ParseStreamer {
|
.modify(|stream| {
|
||||||
span: head,
|
let iter = stream.map(move |val| {
|
||||||
excess: Vec::new(),
|
let span = val.span();
|
||||||
regex: regex_pattern,
|
val.into_string().map_err(|_| ShellError::PipelineMismatch {
|
||||||
columns,
|
exp_input_type: "string".into(),
|
||||||
stream,
|
dst_span: head,
|
||||||
ctrlc,
|
src_span: span,
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
ParseIter {
|
||||||
|
captures: VecDeque::new(),
|
||||||
|
regex,
|
||||||
|
columns,
|
||||||
|
iter,
|
||||||
|
span: head,
|
||||||
|
ctrlc,
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.into()),
|
.into()),
|
||||||
|
|
||||||
PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::Empty),
|
PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::Empty),
|
||||||
|
|
||||||
PipelineData::ExternalStream {
|
PipelineData::ExternalStream {
|
||||||
stdout: Some(stream),
|
stdout: Some(stream),
|
||||||
..
|
..
|
||||||
} => Ok(ListStream::new(
|
} => {
|
||||||
ParseStreamerExternal {
|
// Collect all `stream` chunks into a single `chunk` to be able to deal with matches that
|
||||||
span: head,
|
// extend across chunk boundaries.
|
||||||
excess: Vec::new(),
|
// This is a stop-gap solution until the `regex` crate supports streaming or an alternative
|
||||||
regex: regex_pattern,
|
// solution is found.
|
||||||
|
// See https://github.com/nushell/nushell/issues/9795
|
||||||
|
let str = stream.into_string()?.item;
|
||||||
|
|
||||||
|
// let iter = stream.lines();
|
||||||
|
|
||||||
|
let iter = ParseIter {
|
||||||
|
captures: VecDeque::new(),
|
||||||
|
regex,
|
||||||
columns,
|
columns,
|
||||||
stream: stream.stream,
|
iter: std::iter::once(Ok(str)),
|
||||||
},
|
span: head,
|
||||||
head,
|
ctrlc,
|
||||||
ctrlc,
|
};
|
||||||
)
|
|
||||||
.into()),
|
Ok(ListStream::new(iter, head, None).into())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_regex(input: &str, span: Span) -> Result<String, ShellError> {
|
fn build_regex(input: &str, span: Span) -> Result<String, ShellError> {
|
||||||
let mut output = "(?s)\\A".to_string();
|
let mut output = "(?s)\\A".to_string();
|
||||||
|
|
||||||
//let mut loop_input = input;
|
|
||||||
let mut loop_input = input.chars().peekable();
|
let mut loop_input = input.chars().peekable();
|
||||||
loop {
|
loop {
|
||||||
let mut before = String::new();
|
let mut before = String::new();
|
||||||
@ -274,172 +289,73 @@ fn build_regex(input: &str, span: Span) -> Result<String, ShellError> {
|
|||||||
Ok(output)
|
Ok(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn column_names(regex: &Regex) -> Vec<String> {
|
struct ParseIter<I: Iterator<Item = Result<String, ShellError>>> {
|
||||||
regex
|
captures: VecDeque<Value>,
|
||||||
.capture_names()
|
|
||||||
.enumerate()
|
|
||||||
.skip(1)
|
|
||||||
.map(|(i, name)| {
|
|
||||||
name.map(String::from)
|
|
||||||
.unwrap_or_else(|| format!("capture{}", i - 1))
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct ParseStreamer {
|
|
||||||
span: Span,
|
|
||||||
excess: Vec<Value>,
|
|
||||||
regex: Regex,
|
regex: Regex,
|
||||||
columns: Vec<String>,
|
columns: Vec<String>,
|
||||||
stream: ValueIterator,
|
iter: I,
|
||||||
|
span: Span,
|
||||||
ctrlc: Option<Arc<AtomicBool>>,
|
ctrlc: Option<Arc<AtomicBool>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for ParseStreamer {
|
impl<I: Iterator<Item = Result<String, ShellError>>> ParseIter<I> {
|
||||||
type Item = Value;
|
fn populate_captures(&mut self, str: &str) -> Result<(), ShellError> {
|
||||||
fn next(&mut self) -> Option<Value> {
|
for captures in self.regex.captures_iter(str) {
|
||||||
if !self.excess.is_empty() {
|
self.captures
|
||||||
return Some(self.excess.remove(0));
|
.push_back(captures_to_value(captures, &self.columns, self.span)?);
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I: Iterator<Item = Result<String, ShellError>>> Iterator for ParseIter<I> {
|
||||||
|
type Item = Value;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Value> {
|
||||||
loop {
|
loop {
|
||||||
if let Some(ctrlc) = &self.ctrlc {
|
if nu_utils::ctrl_c::was_pressed(&self.ctrlc) {
|
||||||
if ctrlc.load(Ordering::SeqCst) {
|
return None;
|
||||||
break None;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let v = self.stream.next()?;
|
if let Some(val) = self.captures.pop_front() {
|
||||||
let span = v.span();
|
return Some(val);
|
||||||
|
}
|
||||||
|
|
||||||
let Ok(s) = v.coerce_into_string() else {
|
let result = self
|
||||||
return Some(Value::error(
|
.iter
|
||||||
ShellError::PipelineMismatch {
|
.next()?
|
||||||
exp_input_type: "string".into(),
|
.and_then(|str| self.populate_captures(&str));
|
||||||
dst_span: self.span,
|
|
||||||
src_span: span,
|
|
||||||
},
|
|
||||||
span,
|
|
||||||
));
|
|
||||||
};
|
|
||||||
|
|
||||||
let parsed = stream_helper(
|
if let Err(err) = result {
|
||||||
self.regex.clone(),
|
return Some(Value::error(err, self.span));
|
||||||
span,
|
}
|
||||||
s,
|
|
||||||
self.columns.clone(),
|
|
||||||
&mut self.excess,
|
|
||||||
);
|
|
||||||
|
|
||||||
if parsed.is_none() {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
return parsed;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ParseStreamerExternal {
|
fn captures_to_value(
|
||||||
|
captures: Result<Captures, fancy_regex::Error>,
|
||||||
|
columns: &[String],
|
||||||
span: Span,
|
span: Span,
|
||||||
excess: Vec<Value>,
|
) -> Result<Value, ShellError> {
|
||||||
regex: Regex,
|
let captures = captures.map_err(|err| ShellError::GenericError {
|
||||||
columns: Vec<String>,
|
error: "Error with regular expression captures".into(),
|
||||||
stream: Box<dyn Iterator<Item = Result<Vec<u8>, ShellError>> + Send + 'static>,
|
msg: err.to_string(),
|
||||||
}
|
span: Some(span),
|
||||||
|
help: None,
|
||||||
|
inner: vec![],
|
||||||
|
})?;
|
||||||
|
|
||||||
impl Iterator for ParseStreamerExternal {
|
let record = columns
|
||||||
type Item = Value;
|
.iter()
|
||||||
fn next(&mut self) -> Option<Value> {
|
.zip(captures.iter().skip(1))
|
||||||
if !self.excess.is_empty() {
|
.map(|(column, match_)| {
|
||||||
return Some(self.excess.remove(0));
|
let match_str = match_.map(|m| m.as_str()).unwrap_or("");
|
||||||
}
|
(column.clone(), Value::string(match_str, span))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
let mut chunk = self.stream.next();
|
Ok(Value::record(record, span))
|
||||||
|
|
||||||
// Collect all `stream` chunks into a single `chunk` to be able to deal with matches that
|
|
||||||
// extend across chunk boundaries.
|
|
||||||
// This is a stop-gap solution until the `regex` crate supports streaming or an alternative
|
|
||||||
// solution is found.
|
|
||||||
// See https://github.com/nushell/nushell/issues/9795
|
|
||||||
while let Some(Ok(chunks)) = &mut chunk {
|
|
||||||
match self.stream.next() {
|
|
||||||
Some(Ok(mut next_chunk)) => chunks.append(&mut next_chunk),
|
|
||||||
error @ Some(Err(_)) => chunk = error,
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let chunk = match chunk {
|
|
||||||
Some(Ok(chunk)) => chunk,
|
|
||||||
Some(Err(err)) => return Some(Value::error(err, self.span)),
|
|
||||||
_ => return None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let Ok(chunk) = String::from_utf8(chunk) else {
|
|
||||||
return Some(Value::error(
|
|
||||||
ShellError::PipelineMismatch {
|
|
||||||
exp_input_type: "string".into(),
|
|
||||||
dst_span: self.span,
|
|
||||||
src_span: self.span,
|
|
||||||
},
|
|
||||||
self.span,
|
|
||||||
));
|
|
||||||
};
|
|
||||||
|
|
||||||
stream_helper(
|
|
||||||
self.regex.clone(),
|
|
||||||
self.span,
|
|
||||||
chunk,
|
|
||||||
self.columns.clone(),
|
|
||||||
&mut self.excess,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn stream_helper(
|
|
||||||
regex: Regex,
|
|
||||||
span: Span,
|
|
||||||
s: String,
|
|
||||||
columns: Vec<String>,
|
|
||||||
excess: &mut Vec<Value>,
|
|
||||||
) -> Option<Value> {
|
|
||||||
let results = regex.captures_iter(&s);
|
|
||||||
|
|
||||||
for c in results {
|
|
||||||
let captures = match c {
|
|
||||||
Ok(c) => c,
|
|
||||||
Err(e) => {
|
|
||||||
return Some(Value::error(
|
|
||||||
ShellError::GenericError {
|
|
||||||
error: "Error with regular expression captures".into(),
|
|
||||||
msg: e.to_string(),
|
|
||||||
span: Some(span),
|
|
||||||
help: Some(e.to_string()),
|
|
||||||
inner: vec![],
|
|
||||||
},
|
|
||||||
span,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let record = columns
|
|
||||||
.iter()
|
|
||||||
.zip(captures.iter().skip(1))
|
|
||||||
.map(|(column_name, cap)| {
|
|
||||||
let cap_string = cap.map(|v| v.as_str()).unwrap_or("");
|
|
||||||
(column_name.clone(), Value::string(cap_string, span))
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
excess.push(Value::record(record, span));
|
|
||||||
}
|
|
||||||
|
|
||||||
if !excess.is_empty() {
|
|
||||||
Some(excess.remove(0))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
Loading…
Reference in New Issue
Block a user