forked from extern/nushell
Handle mixed LF+CRLF in lines
(#7316)
This closes #4989. Previously `lines` was unable to handle text input with CRLF line breaks _and_ LF line breaks. ### Before: ![image](https://user-images.githubusercontent.com/26268125/205207685-b25da9e1-19fa-4abb-8ab2-0dd216c63fc0.png) ### After: ![image](https://user-images.githubusercontent.com/26268125/205207808-9f687242-a8c2-4b79-a12c-38b0583d8d52.png)
This commit is contained in:
parent
3ac36879e0
commit
ee5a387300
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -2638,6 +2638,7 @@ dependencies = [
|
|||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
"rayon",
|
"rayon",
|
||||||
"reedline",
|
"reedline",
|
||||||
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"roxmltree",
|
"roxmltree",
|
||||||
"rstest",
|
"rstest",
|
||||||
|
@ -68,6 +68,7 @@ powierza-coefficient = "1.0.1"
|
|||||||
quick-xml = "0.25"
|
quick-xml = "0.25"
|
||||||
rand = "0.8"
|
rand = "0.8"
|
||||||
rayon = "1.5.1"
|
rayon = "1.5.1"
|
||||||
|
regex = "1.6.0"
|
||||||
reqwest = {version = "0.11", features = ["blocking", "json"] }
|
reqwest = {version = "0.11", features = ["blocking", "json"] }
|
||||||
roxmltree = "0.16.0"
|
roxmltree = "0.16.0"
|
||||||
rust-embed = "6.3.0"
|
rust-embed = "6.3.0"
|
||||||
|
@ -4,6 +4,10 @@ use nu_protocol::{
|
|||||||
Category, Example, IntoInterruptiblePipelineData, PipelineData, RawStream, ShellError,
|
Category, Example, IntoInterruptiblePipelineData, PipelineData, RawStream, ShellError,
|
||||||
Signature, Span, Type, Value,
|
Signature, Span, Type, Value,
|
||||||
};
|
};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
// regex can be replaced with fancy-regex once it suppports `split()`
|
||||||
|
// https://github.com/fancy-regex/fancy-regex/issues/104
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Lines;
|
pub struct Lines;
|
||||||
@ -34,16 +38,18 @@ impl Command for Lines {
|
|||||||
let head = call.head;
|
let head = call.head;
|
||||||
let ctrlc = engine_state.ctrlc.clone();
|
let ctrlc = engine_state.ctrlc.clone();
|
||||||
let skip_empty = call.has_flag("skip-empty");
|
let skip_empty = call.has_flag("skip-empty");
|
||||||
|
|
||||||
|
// match \r\n or \n
|
||||||
|
static LINE_BREAK_REGEX: Lazy<Regex> =
|
||||||
|
Lazy::new(|| Regex::new(r"\r\n|\n").expect("unable to compile regex"));
|
||||||
match input {
|
match input {
|
||||||
#[allow(clippy::needless_collect)]
|
#[allow(clippy::needless_collect)]
|
||||||
// Collect is needed because the string may not live long enough for
|
// Collect is needed because the string may not live long enough for
|
||||||
// the Rc structure to continue using it. If split could take ownership
|
// the Rc structure to continue using it. If split could take ownership
|
||||||
// of the split values, then this wouldn't be needed
|
// of the split values, then this wouldn't be needed
|
||||||
PipelineData::Value(Value::String { val, span }, ..) => {
|
PipelineData::Value(Value::String { val, span }, ..) => {
|
||||||
let split_char = if val.contains("\r\n") { "\r\n" } else { "\n" };
|
let mut lines = LINE_BREAK_REGEX
|
||||||
|
.split(&val)
|
||||||
let mut lines = val
|
|
||||||
.split(split_char)
|
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
.collect::<Vec<String>>();
|
.collect::<Vec<String>>();
|
||||||
|
|
||||||
@ -66,18 +72,12 @@ impl Command for Lines {
|
|||||||
Ok(iter.into_pipeline_data(engine_state.ctrlc.clone()))
|
Ok(iter.into_pipeline_data(engine_state.ctrlc.clone()))
|
||||||
}
|
}
|
||||||
PipelineData::ListStream(stream, ..) => {
|
PipelineData::ListStream(stream, ..) => {
|
||||||
let mut split_char = "\n";
|
|
||||||
|
|
||||||
let iter = stream
|
let iter = stream
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(move |value| {
|
.filter_map(move |value| {
|
||||||
if let Value::String { val, span } = value {
|
if let Value::String { val, span } = value {
|
||||||
if split_char != "\r\n" && val.contains("\r\n") {
|
let mut lines = LINE_BREAK_REGEX
|
||||||
split_char = "\r\n";
|
.split(&val)
|
||||||
}
|
|
||||||
|
|
||||||
let mut lines = val
|
|
||||||
.split(split_char)
|
|
||||||
.filter_map(|s| {
|
.filter_map(|s| {
|
||||||
if skip_empty && s.trim().is_empty() {
|
if skip_empty && s.trim().is_empty() {
|
||||||
None
|
None
|
||||||
@ -153,6 +153,9 @@ impl Iterator for RawStreamLinesAdapter {
|
|||||||
type Item = Result<Value, ShellError>;
|
type Item = Result<Value, ShellError>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
static LINE_BREAK_REGEX: Lazy<Regex> =
|
||||||
|
Lazy::new(|| Regex::new(r"\r\n|\n").expect("unable to compile regex"));
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
if !self.queue.is_empty() {
|
if !self.queue.is_empty() {
|
||||||
let s = self.queue.remove(0usize);
|
let s = self.queue.remove(0usize);
|
||||||
@ -188,11 +191,8 @@ impl Iterator for RawStreamLinesAdapter {
|
|||||||
Value::String { val, span } => {
|
Value::String { val, span } => {
|
||||||
self.span = span;
|
self.span = span;
|
||||||
|
|
||||||
let split_char =
|
let mut lines = LINE_BREAK_REGEX
|
||||||
if val.contains("\r\n") { "\r\n" } else { "\n" };
|
.split(&val)
|
||||||
|
|
||||||
let mut lines = val
|
|
||||||
.split(split_char)
|
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
@ -48,3 +48,16 @@ fn lines_multi_value_split() {
|
|||||||
|
|
||||||
assert_eq!(actual.out, "6");
|
assert_eq!(actual.out, "6");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// test whether this handles CRLF and LF in the same input
|
||||||
|
#[test]
|
||||||
|
fn lines_mixed_line_endings() {
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: "tests/fixtures/formats", pipeline(
|
||||||
|
r#"
|
||||||
|
"foo\nbar\r\nquux" | lines | length
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "3");
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user