use crate::commands::WholeStreamCommand; use crate::prelude::*; use nu_errors::ShellError; use nu_protocol::{ Primitive, ReturnSuccess, Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value, }; use nu_source::Tagged; pub struct FromSSV; #[derive(Deserialize)] pub struct FromSSVArgs { headerless: bool, #[serde(rename(deserialize = "aligned-columns"))] aligned_columns: bool, #[serde(rename(deserialize = "minimum-spaces"))] minimum_spaces: Option>, } const STRING_REPRESENTATION: &str = "from-ssv"; const DEFAULT_MINIMUM_SPACES: usize = 2; impl WholeStreamCommand for FromSSV { fn name(&self) -> &str { STRING_REPRESENTATION } fn signature(&self) -> Signature { Signature::build(STRING_REPRESENTATION) .switch("headerless", "don't treat the first row as column names") .switch("aligned-columns", "assume columns are aligned") .named( "minimum-spaces", SyntaxShape::Int, "the mininum spaces to separate columns", ) } fn usage(&self) -> &str { "Parse text as space-separated values and create a table. The default minimum number of spaces counted as a separator is 2." } fn run( &self, args: CommandArgs, registry: &CommandRegistry, ) -> Result { args.process(registry, from_ssv)?.run() } } enum HeaderOptions<'a> { WithHeaders(&'a str), WithoutHeaders, } fn parse_aligned_columns<'a>( lines: impl Iterator, headers: HeaderOptions, separator: &str, ) -> Vec> { fn construct<'a>( lines: impl Iterator, headers: Vec<(String, usize)>, ) -> Vec> { lines .map(|l| { headers .iter() .enumerate() .map(|(i, (header_name, start_position))| { let val = match headers.get(i + 1) { Some((_, end)) => { if *end < l.len() { l.get(*start_position..*end) } else { l.get(*start_position..) } } None => l.get(*start_position..), } .unwrap_or("") .trim() .into(); (header_name.clone(), val) }) .collect() }) .collect() } let find_indices = |line: &str| { let values = line .split(&separator) .map(str::trim) .filter(|s| !s.is_empty()); values .fold( (0, vec![]), |(current_pos, mut indices), value| match line[current_pos..].find(value) { None => (current_pos, indices), Some(index) => { let absolute_index = current_pos + index; indices.push(absolute_index); (absolute_index + value.len(), indices) } }, ) .1 }; let parse_with_headers = |lines, headers_raw: &str| { let indices = find_indices(headers_raw); let headers = headers_raw .split(&separator) .map(str::trim) .filter(|s| !s.is_empty()) .map(String::from) .zip(indices); let columns = headers.collect::>(); construct(lines, columns) }; let parse_without_headers = |ls: Vec<&str>| { let mut indices = ls .iter() .flat_map(|s| find_indices(*s)) .collect::>(); indices.sort(); indices.dedup(); let headers: Vec<(String, usize)> = indices .iter() .enumerate() .map(|(i, position)| (format!("Column{}", i + 1), *position)) .collect(); construct(ls.iter().map(|s| s.to_owned()), headers) }; match headers { HeaderOptions::WithHeaders(headers_raw) => parse_with_headers(lines, headers_raw), HeaderOptions::WithoutHeaders => parse_without_headers(lines.collect()), } } fn parse_separated_columns<'a>( lines: impl Iterator, headers: HeaderOptions, separator: &str, ) -> Vec> { fn collect<'a>( headers: Vec, rows: impl Iterator, separator: &str, ) -> Vec> { rows.map(|r| { headers .iter() .zip(r.split(separator).map(str::trim).filter(|s| !s.is_empty())) .map(|(a, b)| (a.to_owned(), b.to_owned())) .collect() }) .collect() } let parse_with_headers = |lines, headers_raw: &str| { let headers = headers_raw .split(&separator) .map(str::trim) .map(|s| s.to_owned()) .filter(|s| !s.is_empty()) .collect(); collect(headers, lines, separator) }; let parse_without_headers = |ls: Vec<&str>| { let num_columns = ls.iter().map(|r| r.len()).max().unwrap_or(0); let headers = (1..=num_columns) .map(|i| format!("Column{}", i)) .collect::>(); collect(headers, ls.iter().map(|s| s.as_ref()), separator) }; match headers { HeaderOptions::WithHeaders(headers_raw) => parse_with_headers(lines, headers_raw), HeaderOptions::WithoutHeaders => parse_without_headers(lines.collect()), } } fn string_to_table( s: &str, headerless: bool, aligned_columns: bool, split_at: usize, ) -> Option>> { let mut lines = s.lines().filter(|l| !l.trim().is_empty()); let separator = " ".repeat(std::cmp::max(split_at, 1)); let (ls, header_options) = if headerless { (lines, HeaderOptions::WithoutHeaders) } else { let headers = lines.next()?; (lines, HeaderOptions::WithHeaders(headers)) }; let f = if aligned_columns { parse_aligned_columns } else { parse_separated_columns }; let parsed = f(ls, header_options, &separator); match parsed.len() { 0 => None, _ => Some(parsed), } } fn from_ssv_string_to_value( s: &str, headerless: bool, aligned_columns: bool, split_at: usize, tag: impl Into, ) -> Option { let tag = tag.into(); let rows = string_to_table(s, headerless, aligned_columns, split_at)? .iter() .map(|row| { let mut tagged_dict = TaggedDictBuilder::new(&tag); for (col, entry) in row { tagged_dict.insert_value( col, UntaggedValue::Primitive(Primitive::String(String::from(entry))) .into_value(&tag), ) } tagged_dict.into_value() }) .collect(); Some(UntaggedValue::Table(rows).into_value(&tag)) } fn from_ssv( FromSSVArgs { headerless, aligned_columns, minimum_spaces, }: FromSSVArgs, RunnableContext { input, name, .. }: RunnableContext, ) -> Result { let stream = async_stream! { let values: Vec = input.values.collect().await; let mut concat_string = String::new(); let mut latest_tag: Option = None; let split_at = match minimum_spaces { Some(number) => number.item, None => DEFAULT_MINIMUM_SPACES }; for value in values { let value_tag = value.tag.clone(); latest_tag = Some(value_tag.clone()); if let Ok(s) = value.as_string() { concat_string.push_str(&s); } else { yield Err(ShellError::labeled_error_with_secondary ( "Expected a string from pipeline", "requires string input", &name, "value originates from here", &value_tag )) } } match from_ssv_string_to_value(&concat_string, headerless, aligned_columns, split_at, name.clone()) { Some(x) => match x { Value { value: UntaggedValue::Table(list), ..} => { for l in list { yield ReturnSuccess::value(l) } } x => yield ReturnSuccess::value(x) }, None => if let Some(tag) = latest_tag { yield Err(ShellError::labeled_error_with_secondary( "Could not parse as SSV", "input cannot be parsed ssv", &name, "value originates from here", &tag, )) }, } }; Ok(stream.to_output_stream()) } #[cfg(test)] mod tests { use super::*; fn owned(x: &str, y: &str) -> (String, String) { (String::from(x), String::from(y)) } #[test] fn it_trims_empty_and_whitespace_only_lines() { let input = r#" a b 1 2 3 4 "#; let result = string_to_table(input, false, true, 1); assert_eq!( result, Some(vec![ vec![owned("a", "1"), owned("b", "2")], vec![owned("a", "3"), owned("b", "4")] ]) ); } #[test] fn it_deals_with_single_column_input() { let input = r#" a 1 2 "#; let result = string_to_table(input, false, true, 1); assert_eq!( result, Some(vec![vec![owned("a", "1")], vec![owned("a", "2")]]) ); } #[test] fn it_uses_first_row_as_data_when_headerless() { let input = r#" a b 1 2 3 4 "#; let result = string_to_table(input, true, true, 1); assert_eq!( result, Some(vec![ vec![owned("Column1", "a"), owned("Column2", "b")], vec![owned("Column1", "1"), owned("Column2", "2")], vec![owned("Column1", "3"), owned("Column2", "4")] ]) ); } #[test] fn it_returns_none_given_an_empty_string() { let input = ""; let result = string_to_table(input, true, true, 1); assert!(result.is_none()); } #[test] fn it_allows_a_predefined_number_of_spaces() { let input = r#" column a column b entry 1 entry number 2 3 four "#; let result = string_to_table(input, false, true, 3); assert_eq!( result, Some(vec![ vec![ owned("column a", "entry 1"), owned("column b", "entry number 2") ], vec![owned("column a", "3"), owned("column b", "four")] ]) ); } #[test] fn it_trims_remaining_separator_space() { let input = r#" colA colB colC val1 val2 val3 "#; let trimmed = |s: &str| s.trim() == s; let result = string_to_table(input, false, true, 2).unwrap(); assert!(result .iter() .all(|row| row.iter().all(|(a, b)| trimmed(a) && trimmed(b)))) } #[test] fn it_keeps_empty_columns() { let input = r#" colA col B col C val2 val3 val4 val 5 val 6 val7 val8 "#; let result = string_to_table(input, false, true, 2).unwrap(); assert_eq!( result, vec![ vec![ owned("colA", ""), owned("col B", "val2"), owned("col C", "val3") ], vec![ owned("colA", "val4"), owned("col B", "val 5"), owned("col C", "val 6") ], vec![ owned("colA", "val7"), owned("col B", ""), owned("col C", "val8") ], ] ) } #[test] fn it_uses_the_full_final_column() { let input = r#" colA col B val1 val2 trailing value that should be included "#; let result = string_to_table(input, false, true, 2).unwrap(); assert_eq!( result, vec![vec![ owned("colA", "val1"), owned("col B", "val2 trailing value that should be included"), ],] ) } #[test] fn it_handles_empty_values_when_headerless_and_aligned_columns() { let input = r#" a multi-word value b d 1 3-3 4 last "#; let result = string_to_table(input, true, true, 2).unwrap(); assert_eq!( result, vec![ vec![ owned("Column1", "a multi-word value"), owned("Column2", "b"), owned("Column3", ""), owned("Column4", "d"), owned("Column5", "") ], vec![ owned("Column1", "1"), owned("Column2", ""), owned("Column3", "3-3"), owned("Column4", "4"), owned("Column5", "") ], vec![ owned("Column1", ""), owned("Column2", ""), owned("Column3", ""), owned("Column4", ""), owned("Column5", "last") ], ] ) } #[test] fn input_is_parsed_correctly_if_either_option_works() { let input = r#" docker-registry docker-registry=default docker-registry=default 172.30.78.158 5000/TCP kubernetes component=apiserver,provider=kubernetes 172.30.0.2 443/TCP kubernetes-ro component=apiserver,provider=kubernetes 172.30.0.1 80/TCP "#; let aligned_columns_headerless = string_to_table(input, true, true, 2).unwrap(); let separator_headerless = string_to_table(input, true, false, 2).unwrap(); let aligned_columns_with_headers = string_to_table(input, false, true, 2).unwrap(); let separator_with_headers = string_to_table(input, false, false, 2).unwrap(); assert_eq!(aligned_columns_headerless, separator_headerless); assert_eq!(aligned_columns_with_headers, separator_with_headers); } }