forked from extern/nushell
parse: collect external stream chunks before matching (#9950)
<!-- if this PR closes one or more issues, you can automatically link the PR with them by using one of the [*linking keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword), e.g. - this PR should close #xxxx - fixes #xxxx --> # Description This PR implements the workaround discussed in #9795, i.e. having `parse` collect an external stream before operating on it with a regex. - Should close #9795 # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> - `parse` will give the correct output for external streams - increased memory and time overhead due to collecting the entire stream (no short-circuiting) # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A clippy::needless_collect -A clippy::result_large_err` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass - `cargo run -- -c "use std testing; testing run-tests --path crates/nu-std"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> - formatting is checked - clippy is happy - no tests that weren't already broken fail - added test case # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
570175f95d
commit
56ed1eb807
@ -394,32 +394,48 @@ impl Iterator for ParseStreamerExternal {
|
|||||||
return Some(self.excess.remove(0));
|
return Some(self.excess.remove(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
let v = self.stream.next();
|
let mut chunk = self.stream.next();
|
||||||
|
|
||||||
if let Some(Ok(v)) = v {
|
// Collect all `stream` chunks into a single `chunk` to be able to deal with matches that
|
||||||
match String::from_utf8(v) {
|
// extend across chunk boundaries.
|
||||||
Ok(s) => stream_helper(
|
// This is a stop-gap solution until the `regex` crate supports streaming or an alternative
|
||||||
self.regex.clone(),
|
// solution is found.
|
||||||
self.span,
|
// See https://github.com/nushell/nushell/issues/9795
|
||||||
s,
|
while let Some(Ok(chunks)) = &mut chunk {
|
||||||
self.columns.clone(),
|
match self.stream.next() {
|
||||||
&mut self.excess,
|
Some(Ok(mut next_chunk)) => chunks.append(&mut next_chunk),
|
||||||
),
|
error @ Some(Err(_)) => chunk = error,
|
||||||
Err(_) => Some(Value::Error {
|
None => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let chunk = match chunk {
|
||||||
|
Some(Ok(chunk)) => chunk,
|
||||||
|
Some(Err(err)) => {
|
||||||
|
return Some(Value::Error {
|
||||||
|
error: Box::new(err),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
_ => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let Ok(chunk) = String::from_utf8(chunk) else {
|
||||||
|
return Some(Value::Error {
|
||||||
error: Box::new(ShellError::PipelineMismatch {
|
error: Box::new(ShellError::PipelineMismatch {
|
||||||
exp_input_type: "string".into(),
|
exp_input_type: "string".into(),
|
||||||
dst_span: self.span,
|
dst_span: self.span,
|
||||||
src_span: self.span,
|
src_span: self.span,
|
||||||
}),
|
}),
|
||||||
}),
|
|
||||||
}
|
|
||||||
} else if let Some(Err(err)) = v {
|
|
||||||
Some(Value::Error {
|
|
||||||
error: Box::new(err),
|
|
||||||
})
|
})
|
||||||
} else {
|
};
|
||||||
None
|
|
||||||
}
|
stream_helper(
|
||||||
|
self.regex.clone(),
|
||||||
|
self.span,
|
||||||
|
chunk,
|
||||||
|
self.columns.clone(),
|
||||||
|
&mut self.excess,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -211,4 +211,19 @@ mod regex {
|
|||||||
|
|
||||||
assert_eq!(actual.out, "2");
|
assert_eq!(actual.out, "2");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_handles_external_stream_chunking() {
|
||||||
|
Playground::setup("parse_test_streaming_1", |dirs, _sandbox| {
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
"abcdefghijklmnopqrstuvwxyz" * 1000 | save --force data.txt;
|
||||||
|
open data.txt | parse --regex "(abcdefghijklmnopqrstuvwxyz)" | length
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "1000");
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user