nushell/src/commands/from_ssv.rs

314 lines
8.5 KiB
Rust
Raw Normal View History

2019-10-13 21:15:30 +02:00
use crate::commands::WholeStreamCommand;
use crate::data::{Primitive, TaggedDictBuilder, Value};
use crate::prelude::*;
pub struct FromSSV;
#[derive(Deserialize)]
pub struct FromSSVArgs {
headerless: bool,
2019-10-15 22:05:32 +02:00
#[serde(rename(deserialize = "minimum-spaces"))]
minimum_spaces: Option<Tagged<usize>>,
2019-10-13 21:15:30 +02:00
}
const STRING_REPRESENTATION: &str = "from-ssv";
const DEFAULT_MINIMUM_SPACES: usize = 2;
2019-10-13 21:15:30 +02:00
impl WholeStreamCommand for FromSSV {
fn name(&self) -> &str {
STRING_REPRESENTATION
}
fn signature(&self) -> Signature {
Signature::build(STRING_REPRESENTATION)
.switch("headerless")
2019-10-15 22:05:32 +02:00
.named("minimum-spaces", SyntaxShape::Int)
2019-10-13 21:15:30 +02:00
}
fn usage(&self) -> &str {
2019-10-15 23:20:06 +02:00
"Parse text as space-separated values and create a table. The default minimum number of spaces counted as a separator is 2."
2019-10-13 21:15:30 +02:00
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, from_ssv)?.run()
}
}
fn string_to_table(
s: &str,
headerless: bool,
split_at: usize,
) -> Option<Vec<Vec<(String, String)>>> {
let mut lines = s.lines().filter(|l| !l.trim().is_empty());
let separator = " ".repeat(std::cmp::max(split_at, 1));
let headers_raw = lines.next()?;
let headers = headers_raw
.trim()
.split(&separator)
.map(str::trim)
.filter(|s| !s.is_empty())
.map(|s| (headers_raw.find(s).unwrap(), s.to_owned()));
let columns = if headerless {
headers
.enumerate()
.map(|(header_no, (string_index, _))| {
(string_index, format!("Column{}", header_no + 1))
})
.collect::<Vec<(usize, String)>>()
} else {
headers.collect::<Vec<(usize, String)>>()
};
Some(
lines
.map(|l| {
columns
.iter()
.enumerate()
.filter_map(|(i, (start, col))| {
(match columns.get(i + 1) {
Some((end, _)) => l.get(*start..*end),
None => l.get(*start..)?.split(&separator).next(),
})
.and_then(|s| Some((col.clone(), String::from(s.trim()))))
})
.collect()
})
.collect(),
)
}
2019-10-13 22:50:45 +02:00
fn from_ssv_string_to_value(
s: &str,
headerless: bool,
split_at: usize,
2019-10-13 22:50:45 +02:00
tag: impl Into<Tag>,
) -> Option<Tagged<Value>> {
let tag = tag.into();
let rows = string_to_table(s, headerless, split_at)?
.iter()
.map(|row| {
let mut tagged_dict = TaggedDictBuilder::new(&tag);
for (col, entry) in row {
tagged_dict.insert_tagged(
col,
Value::Primitive(Primitive::String(String::from(entry))).tagged(&tag),
2019-10-13 22:50:45 +02:00
)
}
tagged_dict.into_tagged_value()
2019-10-13 22:50:45 +02:00
})
.collect();
Some(Value::Table(rows).tagged(&tag))
2019-10-13 22:50:45 +02:00
}
2019-10-13 21:15:30 +02:00
fn from_ssv(
2019-10-15 22:05:32 +02:00
FromSSVArgs {
headerless,
minimum_spaces,
}: FromSSVArgs,
2019-10-13 21:15:30 +02:00
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
2019-10-13 22:50:45 +02:00
let stream = async_stream! {
let values: Vec<Tagged<Value>> = input.values.collect().await;
let mut concat_string = String::new();
let mut latest_tag: Option<Tag> = None;
2019-10-15 22:05:32 +02:00
let split_at = match minimum_spaces {
Some(number) => number.item,
None => DEFAULT_MINIMUM_SPACES
};
2019-10-13 22:50:45 +02:00
for value in values {
let value_tag = value.tag();
latest_tag = Some(value_tag.clone());
2019-10-13 22:50:45 +02:00
match value.item {
Value::Primitive(Primitive::String(s)) => {
concat_string.push_str(&s);
}
_ => yield Err(ShellError::labeled_error_with_secondary (
"Expected a string from pipeline",
"requires string input",
&name,
2019-10-13 22:50:45 +02:00
"value originates from here",
&value_tag
2019-10-13 22:50:45 +02:00
)),
}
}
match from_ssv_string_to_value(&concat_string, headerless, split_at, name.clone()) {
Some(x) => match x {
2019-10-13 22:50:45 +02:00
Tagged { item: Value::Table(list), ..} => {
for l in list { yield ReturnSuccess::value(l) }
}
x => yield ReturnSuccess::value(x)
},
None => if let Some(tag) = latest_tag {
2019-10-13 22:50:45 +02:00
yield Err(ShellError::labeled_error_with_secondary(
"Could not parse as SSV",
"input cannot be parsed ssv",
&name,
2019-10-13 22:50:45 +02:00
"value originates from here",
&tag,
2019-10-13 22:50:45 +02:00
))
},
2019-10-13 22:50:45 +02:00
}
};
Ok(stream.to_output_stream())
2019-10-13 21:15:30 +02:00
}
#[cfg(test)]
mod tests {
use super::*;
fn owned(x: &str, y: &str) -> (String, String) {
(String::from(x), String::from(y))
}
#[test]
fn it_trims_empty_and_whitespace_only_lines() {
let input = r#"
a b
1 2
3 4
"#;
let result = string_to_table(input, false, 1);
assert_eq!(
result,
Some(vec![
vec![owned("a", "1"), owned("b", "2")],
vec![owned("a", "3"), owned("b", "4")]
])
);
}
#[test]
fn it_deals_with_single_column_input() {
let input = r#"
a
1
2
"#;
let result = string_to_table(input, false, 1);
assert_eq!(
result,
Some(vec![vec![owned("a", "1")], vec![owned("a", "2")]])
);
}
#[test]
fn it_ignores_headers_when_headerless() {
let input = r#"
a b
1 2
3 4
"#;
let result = string_to_table(input, true, 1);
assert_eq!(
result,
Some(vec![
vec![owned("Column1", "1"), owned("Column2", "2")],
vec![owned("Column1", "3"), owned("Column2", "4")]
])
);
}
#[test]
fn it_returns_none_given_an_empty_string() {
let input = "";
let result = string_to_table(input, true, 1);
assert!(result.is_none());
}
2019-10-15 19:10:38 +02:00
#[test]
fn it_allows_a_predefined_number_of_spaces() {
let input = r#"
column a column b
entry 1 entry number 2
3 four
2019-10-15 19:10:38 +02:00
"#;
let result = string_to_table(input, false, 3);
2019-10-15 19:10:38 +02:00
assert_eq!(
result,
Some(vec![
vec![
owned("column a", "entry 1"),
owned("column b", "entry number 2")
],
vec![owned("column a", "3"), owned("column b", "four")]
])
);
}
#[test]
fn it_trims_remaining_separator_space() {
let input = r#"
colA colB colC
val1 val2 val3
"#;
let trimmed = |s: &str| s.trim() == s;
let result = string_to_table(input, false, 2).unwrap();
assert!(result
.iter()
.all(|row| row.iter().all(|(a, b)| trimmed(a) && trimmed(b))))
}
#[test]
fn it_keeps_empty_columns() {
let input = r#"
colA col B col C
val2 val3
val4 val 5 val 6
val7 val8
"#;
let result = string_to_table(input, false, 2).unwrap();
2019-10-15 19:10:38 +02:00
assert_eq!(
result,
vec![
vec![
owned("colA", ""),
owned("col B", "val2"),
owned("col C", "val3")
],
vec![
owned("colA", "val4"),
owned("col B", "val 5"),
owned("col C", "val 6")
],
vec![
owned("colA", "val7"),
owned("col B", ""),
owned("col C", "val8")
],
]
)
}
#[test]
fn it_drops_trailing_values() {
let input = r#"
colA col B
val1 val2 trailing value that should be ignored
"#;
let result = string_to_table(input, false, 2).unwrap();
assert_eq!(
result,
vec![vec![owned("colA", "val1"), owned("col B", "val2"),],]
2019-10-15 19:10:38 +02:00
)
}
}