From 1060ba220670261c3104b2873eff0ffe6b87b61f Mon Sep 17 00:00:00 2001 From: Thomas Hartmann Date: Mon, 11 Nov 2019 15:07:02 +0100 Subject: [PATCH] Fixes --headerless functionality for from-ssv. Squashed commit of the following: commit fc59d47a2291461d84e0587fc0fe63af0dc26f9f Author: Thomas Hartmann Date: Tue Nov 12 15:39:38 2019 +0100 Fixes inconsistencies in output. commit da4084e9fdd983557b101207b381e333a443e551 Author: Thomas Hartmann Date: Tue Nov 12 13:04:10 2019 +0100 remove unused enum. commit 7f6a105879c8746786b99fb19bb9f0860c41796a Author: Thomas Hartmann Date: Tue Nov 12 12:58:41 2019 +0100 Starts refactoring from_ssv. commit b70ddd169ef0c900e03fb590cb171cc7181528db Author: Thomas Hartmann Date: Tue Nov 12 11:34:06 2019 +0100 Fixes --headerless for non-aligned columns. commit 6332778dd26de8d07be77b291124115141479892 Author: Thomas Hartmann Date: Tue Nov 12 10:27:35 2019 +0100 Fixes from-ssv headerless aligned-columns logic. commit 747d8c812e06349b4a15b8c130721881d86fff98 Author: Thomas Hartmann Date: Mon Nov 11 23:53:59 2019 +0100 fixes unit tests for ssv. commit c77cb451623b37a7a9742c791a4fc38cad053d3d Author: Thomas Hartmann Date: Mon Nov 11 22:49:21 2019 +0100 it compiles! one broken test. commit 08a05964f56cf92507c255057d0aaf2b6dbb6f45 Author: Thomas Hartmann Date: Mon Nov 11 18:52:54 2019 +0100 Backed into a corner. Help. commit c95ab683025a8007b8a6f8e1659f021a002df584 Author: Thomas Hartmann Date: Mon Nov 11 17:30:54 2019 +0100 broken but on the way --- src/commands/from_ssv.rs | 280 +++++++++++++++++++++++++++++---------- tests/filters_test.rs | 16 ++- 2 files changed, 226 insertions(+), 70 deletions(-) diff --git a/src/commands/from_ssv.rs b/src/commands/from_ssv.rs index 090bab508..37bba215f 100644 --- a/src/commands/from_ssv.rs +++ b/src/commands/from_ssv.rs @@ -45,6 +45,149 @@ impl WholeStreamCommand for FromSSV { } } +enum HeaderOptions<'a> { + WithHeaders(&'a str), + WithoutHeaders, +} + +fn parse_aligned_columns<'a>( + lines: impl Iterator, + headers: HeaderOptions, + separator: &str, +) -> Vec> { + fn construct<'a>( + lines: impl Iterator, + headers: Vec<(String, usize)>, + ) -> Vec> { + lines + .map(|l| { + headers + .iter() + .enumerate() + .map(|(i, (header_name, start_position))| { + let val = match headers.get(i + 1) { + Some((_, end)) => { + if *end < l.len() { + l.get(*start_position..*end) + } else { + l.get(*start_position..) + } + } + None => l.get(*start_position..), + } + .unwrap_or("") + .trim() + .into(); + (header_name.clone(), val) + }) + .collect() + }) + .collect() + } + + let find_indices = |line: &str| { + let values = line + .split(&separator) + .map(str::trim) + .filter(|s| !s.is_empty()); + values + .fold( + (0, vec![]), + |(current_pos, mut indices), value| match line[current_pos..].find(value) { + None => (current_pos, indices), + Some(index) => { + let absolute_index = current_pos + index; + indices.push(absolute_index); + (absolute_index + value.len(), indices) + } + }, + ) + .1 + }; + + let parse_with_headers = |lines, headers_raw: &str| { + let indices = find_indices(headers_raw); + let headers = headers_raw + .split(&separator) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(String::from) + .zip(indices); + + let columns = headers.collect::>(); + + construct(lines, columns) + }; + + let parse_without_headers = |ls: Vec<&str>| { + let mut indices = ls + .iter() + .flat_map(|s| find_indices(*s)) + .collect::>(); + + indices.sort(); + indices.dedup(); + + let headers: Vec<(String, usize)> = indices + .iter() + .enumerate() + .map(|(i, position)| (format!("Column{}", i + 1), *position)) + .collect(); + + construct(ls.iter().map(|s| s.to_owned()), headers) + }; + + match headers { + HeaderOptions::WithHeaders(headers_raw) => parse_with_headers(lines, headers_raw), + HeaderOptions::WithoutHeaders => parse_without_headers(lines.collect()), + } +} + +fn parse_separated_columns<'a>( + lines: impl Iterator, + headers: HeaderOptions, + separator: &str, +) -> Vec> { + fn collect<'a>( + headers: Vec, + rows: impl Iterator, + separator: &str, + ) -> Vec> { + rows.map(|r| { + headers + .iter() + .zip(r.split(separator).map(str::trim).filter(|s| !s.is_empty())) + .map(|(a, b)| (a.to_owned(), b.to_owned())) + .collect() + }) + .collect() + } + + let parse_with_headers = |lines, headers_raw: &str| { + let headers = headers_raw + .split(&separator) + .map(str::trim) + .map(|s| s.to_owned()) + .filter(|s| !s.is_empty()) + .collect(); + collect(headers, lines, separator) + }; + + let parse_without_headers = |ls: Vec<&str>| { + let num_columns = ls.iter().map(|r| r.len()).max().unwrap_or(0); + + let headers = (1..=num_columns) + .map(|i| format!("Column{}", i)) + .collect::>(); + collect(headers, ls.iter().map(|s| s.as_ref()), separator) + }; + + match headers { + HeaderOptions::WithHeaders(headers_raw) => parse_with_headers(lines, headers_raw), + HeaderOptions::WithoutHeaders => parse_without_headers(lines.collect()), + } +} + fn string_to_table( s: &str, headerless: bool, @@ -54,76 +197,23 @@ fn string_to_table( let mut lines = s.lines().filter(|l| !l.trim().is_empty()); let separator = " ".repeat(std::cmp::max(split_at, 1)); - if aligned_columns { - let headers_raw = lines.next()?; - - let headers = headers_raw - .trim() - .split(&separator) - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(|s| (headers_raw.find(s).unwrap(), s.to_owned())); - - let columns = if headerless { - headers - .enumerate() - .map(|(header_no, (string_index, _))| { - (string_index, format!("Column{}", header_no + 1)) - }) - .collect::>() - } else { - headers.collect::>() - }; - - Some( - lines - .map(|l| { - columns - .iter() - .enumerate() - .filter_map(|(i, (start, col))| { - (match columns.get(i + 1) { - Some((end, _)) => l.get(*start..*end), - None => l.get(*start..), - }) - .and_then(|s| Some((col.clone(), String::from(s.trim())))) - }) - .collect() - }) - .collect(), - ) + let (ls, header_options) = if headerless { + (lines, HeaderOptions::WithoutHeaders) } else { - let headers = lines - .next()? - .split(&separator) - .map(|s| s.trim()) - .filter(|s| !s.is_empty()) - .map(|s| s.to_owned()) - .collect::>(); + let headers = lines.next()?; + (lines, HeaderOptions::WithHeaders(headers)) + }; - let header_row = if headerless { - (1..=headers.len()) - .map(|i| format!("Column{}", i)) - .collect::>() - } else { - headers - }; + let f = if aligned_columns { + parse_aligned_columns + } else { + parse_separated_columns + }; - Some( - lines - .map(|l| { - header_row - .iter() - .zip( - l.split(&separator) - .map(|s| s.trim()) - .filter(|s| !s.is_empty()), - ) - .map(|(a, b)| (String::from(a), String::from(b))) - .collect() - }) - .collect(), - ) + let parsed = f(ls, header_options, &separator); + match parsed.len() { + 0 => None, + _ => Some(parsed), } } @@ -250,7 +340,7 @@ mod tests { } #[test] - fn it_ignores_headers_when_headerless() { + fn it_uses_first_row_as_data_when_headerless() { let input = r#" a b 1 2 @@ -260,6 +350,7 @@ mod tests { assert_eq!( result, Some(vec![ + vec![owned("Column1", "a"), owned("Column2", "b")], vec![owned("Column1", "1"), owned("Column2", "2")], vec![owned("Column1", "3"), owned("Column2", "4")] ]) @@ -357,4 +448,57 @@ mod tests { ],] ) } + + #[test] + fn it_handles_empty_values_when_headerless_and_aligned_columns() { + let input = r#" + a multi-word value b d + 1 3-3 4 + last + "#; + + let result = string_to_table(input, true, true, 2).unwrap(); + assert_eq!( + result, + vec![ + vec![ + owned("Column1", "a multi-word value"), + owned("Column2", "b"), + owned("Column3", ""), + owned("Column4", "d"), + owned("Column5", "") + ], + vec![ + owned("Column1", "1"), + owned("Column2", ""), + owned("Column3", "3-3"), + owned("Column4", "4"), + owned("Column5", "") + ], + vec![ + owned("Column1", ""), + owned("Column2", ""), + owned("Column3", ""), + owned("Column4", ""), + owned("Column5", "last") + ], + ] + ) + } + + #[test] + fn input_is_parsed_correctly_if_either_option_works() { + let input = r#" + docker-registry docker-registry=default docker-registry=default 172.30.78.158 5000/TCP + kubernetes component=apiserver,provider=kubernetes 172.30.0.2 443/TCP + kubernetes-ro component=apiserver,provider=kubernetes 172.30.0.1 80/TCP + "#; + + let aligned_columns_headerless = string_to_table(input, true, true, 2).unwrap(); + let separator_headerless = string_to_table(input, true, false, 2).unwrap(); + let aligned_columns_with_headers = string_to_table(input, false, true, 2).unwrap(); + let separator_with_headers = string_to_table(input, false, false, 2).unwrap(); + assert_eq!(aligned_columns_headerless, separator_headerless); + assert_eq!(aligned_columns_with_headers, separator_with_headers); + } } diff --git a/tests/filters_test.rs b/tests/filters_test.rs index 9ccb4ab71..e18f20be6 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -450,7 +450,18 @@ fn converts_from_ssv_text_treating_first_line_as_data_with_flag() { "#, )]); - let actual = nu!( + let aligned_columns = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open oc_get_svc.txt + | from-ssv --headerless --aligned-columns + | first + | get Column1 + | echo $it + "# + )); + + let separator_based = nu!( cwd: dirs.test(), h::pipeline( r#" open oc_get_svc.txt @@ -461,7 +472,8 @@ fn converts_from_ssv_text_treating_first_line_as_data_with_flag() { "# )); - assert_eq!(actual, "docker-registry"); + assert_eq!(aligned_columns, separator_based); + assert_eq!(separator_based, "docker-registry"); }) }