forked from extern/nushell
Merge pull request #524 from androbtech/tsv-support
[from/to]tsv support.
This commit is contained in:
commit
6638fe4ab3
@ -180,6 +180,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
|
||||
whole_stream_command(ToCSV),
|
||||
whole_stream_command(ToJSON),
|
||||
whole_stream_command(ToTOML),
|
||||
whole_stream_command(ToTSV),
|
||||
whole_stream_command(ToYAML),
|
||||
whole_stream_command(SortBy),
|
||||
whole_stream_command(Tags),
|
||||
@ -188,6 +189,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
|
||||
whole_stream_command(FromArray),
|
||||
whole_stream_command(FromArray),
|
||||
whole_stream_command(FromCSV),
|
||||
whole_stream_command(FromTSV),
|
||||
whole_stream_command(FromINI),
|
||||
whole_stream_command(FromBSON),
|
||||
whole_stream_command(FromJSON),
|
||||
|
@ -20,6 +20,7 @@ crate mod from_csv;
|
||||
crate mod from_ini;
|
||||
crate mod from_json;
|
||||
crate mod from_toml;
|
||||
crate mod from_tsv;
|
||||
crate mod from_xml;
|
||||
crate mod from_yaml;
|
||||
crate mod get;
|
||||
@ -52,6 +53,7 @@ crate mod to_bson;
|
||||
crate mod to_csv;
|
||||
crate mod to_json;
|
||||
crate mod to_toml;
|
||||
crate mod to_tsv;
|
||||
crate mod to_yaml;
|
||||
crate mod trim;
|
||||
crate mod version;
|
||||
@ -78,6 +80,7 @@ crate use from_csv::FromCSV;
|
||||
crate use from_ini::FromINI;
|
||||
crate use from_json::FromJSON;
|
||||
crate use from_toml::FromTOML;
|
||||
crate use from_tsv::FromTSV;
|
||||
crate use from_xml::FromXML;
|
||||
crate use from_yaml::FromYAML;
|
||||
crate use get::Get;
|
||||
@ -109,6 +112,7 @@ crate use to_bson::ToBSON;
|
||||
crate use to_csv::ToCSV;
|
||||
crate use to_json::ToJSON;
|
||||
crate use to_toml::ToTOML;
|
||||
crate use to_tsv::ToTSV;
|
||||
crate use to_yaml::ToYAML;
|
||||
crate use trim::Trim;
|
||||
crate use version::Version;
|
||||
|
135
src/commands/from_tsv.rs
Normal file
135
src/commands/from_tsv.rs
Normal file
@ -0,0 +1,135 @@
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::object::{Primitive, TaggedDictBuilder, Value};
|
||||
use crate::prelude::*;
|
||||
use csv::ReaderBuilder;
|
||||
|
||||
pub struct FromTSV;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct FromTSVArgs {
|
||||
headerless: bool,
|
||||
}
|
||||
|
||||
impl WholeStreamCommand for FromTSV {
|
||||
fn name(&self) -> &str {
|
||||
"from-tsv"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("from-tsv").switch("headerless")
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
args: CommandArgs,
|
||||
registry: &CommandRegistry,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
args.process(registry, from_tsv)?.run()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_tsv_string_to_value(
|
||||
s: String,
|
||||
headerless: bool,
|
||||
tag: impl Into<Tag>,
|
||||
) -> Result<Tagged<Value>, csv::Error> {
|
||||
let mut reader = ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.delimiter(b'\t')
|
||||
.from_reader(s.as_bytes());
|
||||
let tag = tag.into();
|
||||
|
||||
let mut fields: VecDeque<String> = VecDeque::new();
|
||||
let mut iter = reader.records();
|
||||
let mut rows = vec![];
|
||||
|
||||
if let Some(result) = iter.next() {
|
||||
let line = result?;
|
||||
|
||||
for (idx, item) in line.iter().enumerate() {
|
||||
if headerless {
|
||||
fields.push_back(format!("Column{}", idx + 1));
|
||||
} else {
|
||||
fields.push_back(item.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
loop {
|
||||
if let Some(row_values) = iter.next() {
|
||||
let row_values = row_values?;
|
||||
|
||||
let mut row = TaggedDictBuilder::new(tag);
|
||||
|
||||
for (idx, entry) in row_values.iter().enumerate() {
|
||||
row.insert_tagged(
|
||||
fields.get(idx).unwrap(),
|
||||
Value::Primitive(Primitive::String(String::from(entry))).tagged(tag),
|
||||
);
|
||||
}
|
||||
|
||||
rows.push(row.into_tagged_value());
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Tagged::from_item(Value::List(rows), tag))
|
||||
}
|
||||
|
||||
fn from_tsv(
|
||||
FromTSVArgs {
|
||||
headerless: skip_headers,
|
||||
}: FromTSVArgs,
|
||||
RunnableContext { input, name, .. }: RunnableContext,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let name_span = name;
|
||||
|
||||
let stream = async_stream_block! {
|
||||
let values: Vec<Tagged<Value>> = input.values.collect().await;
|
||||
|
||||
let mut concat_string = String::new();
|
||||
let mut latest_tag: Option<Tag> = None;
|
||||
|
||||
for value in values {
|
||||
let value_tag = value.tag();
|
||||
latest_tag = Some(value_tag);
|
||||
match value.item {
|
||||
Value::Primitive(Primitive::String(s)) => {
|
||||
concat_string.push_str(&s);
|
||||
concat_string.push_str("\n");
|
||||
}
|
||||
_ => yield Err(ShellError::labeled_error_with_secondary(
|
||||
"Expected a string from pipeline",
|
||||
"requires string input",
|
||||
name_span,
|
||||
"value originates from here",
|
||||
value_tag.span,
|
||||
)),
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
match from_tsv_string_to_value(concat_string, skip_headers, name_span) {
|
||||
Ok(x) => match x {
|
||||
Tagged { item: Value::List(list), .. } => {
|
||||
for l in list {
|
||||
yield ReturnSuccess::value(l);
|
||||
}
|
||||
}
|
||||
x => yield ReturnSuccess::value(x),
|
||||
},
|
||||
Err(_) => if let Some(last_tag) = latest_tag {
|
||||
yield Err(ShellError::labeled_error_with_secondary(
|
||||
"Could not parse as TSV",
|
||||
"input cannot be parsed as TSV",
|
||||
name_span,
|
||||
"value originates from here",
|
||||
last_tag.span,
|
||||
))
|
||||
} ,
|
||||
}
|
||||
};
|
||||
|
||||
Ok(stream.to_output_stream())
|
||||
}
|
@ -437,6 +437,16 @@ pub fn parse_string_as_value(
|
||||
)
|
||||
})
|
||||
}
|
||||
Some(ref x) if x == "tsv" => {
|
||||
crate::commands::from_tsv::from_tsv_string_to_value(contents, false, contents_tag)
|
||||
.map_err(move |_| {
|
||||
ShellError::labeled_error(
|
||||
"Could not open as TSV",
|
||||
"could not open as TSV",
|
||||
name_span,
|
||||
)
|
||||
})
|
||||
}
|
||||
Some(ref x) if x == "toml" => {
|
||||
crate::commands::from_toml::from_toml_string_to_value(contents, contents_tag).map_err(
|
||||
move |_| {
|
||||
|
@ -1,4 +1,5 @@
|
||||
use crate::commands::to_csv::{to_string as to_csv_to_string, value_to_csv_value};
|
||||
use crate::commands::to_tsv::{to_string as to_tsv_to_string, value_to_tsv_value};
|
||||
use crate::commands::to_json::value_to_json_value;
|
||||
use crate::commands::to_toml::value_to_toml_value;
|
||||
use crate::commands::to_yaml::value_to_yaml_value;
|
||||
@ -166,6 +167,14 @@ fn to_string_for(
|
||||
}
|
||||
to_csv_to_string(&value_to_csv_value(&input[0]))?
|
||||
}
|
||||
Some(x) if x == "tsv" => {
|
||||
if input.len() != 1 {
|
||||
return Err(ShellError::string(
|
||||
"saving to tsv requires a single object (or use --raw)",
|
||||
));
|
||||
}
|
||||
to_tsv_to_string(&value_to_tsv_value(&input[0]))?
|
||||
}
|
||||
Some(x) if x == "toml" => {
|
||||
if input.len() != 1 {
|
||||
return Err(ShellError::string(
|
||||
|
108
src/commands/to_tsv.rs
Normal file
108
src/commands/to_tsv.rs
Normal file
@ -0,0 +1,108 @@
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::object::{Primitive, Value};
|
||||
use crate::prelude::*;
|
||||
use csv::WriterBuilder;
|
||||
|
||||
pub struct ToTSV;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct ToTSVArgs {
|
||||
headerless: bool,
|
||||
}
|
||||
|
||||
impl WholeStreamCommand for ToTSV {
|
||||
fn name(&self) -> &str {
|
||||
"to-tsv"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("to-tsv").switch("headerless")
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
args: CommandArgs,
|
||||
registry: &CommandRegistry,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
args.process(registry, to_tsv)?.run()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn value_to_tsv_value(v: &Value) -> Value {
|
||||
match v {
|
||||
Value::Primitive(Primitive::String(s)) => Value::Primitive(Primitive::String(s.clone())),
|
||||
Value::Primitive(Primitive::Nothing) => Value::Primitive(Primitive::Nothing),
|
||||
Value::Primitive(Primitive::Boolean(b)) => Value::Primitive(Primitive::Boolean(b.clone())),
|
||||
Value::Primitive(Primitive::Bytes(b)) => Value::Primitive(Primitive::Bytes(b.clone())),
|
||||
Value::Primitive(Primitive::Date(d)) => Value::Primitive(Primitive::Date(d.clone())),
|
||||
Value::Object(o) => Value::Object(o.clone()),
|
||||
Value::List(l) => Value::List(l.clone()),
|
||||
Value::Block(_) => Value::Primitive(Primitive::Nothing),
|
||||
_ => Value::Primitive(Primitive::Nothing),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_string_helper(v: &Value) -> Result<String, Box<dyn std::error::Error>> {
|
||||
match v {
|
||||
Value::Primitive(Primitive::Date(d)) => Ok(d.to_string()),
|
||||
Value::Primitive(Primitive::Bytes(b)) => Ok(format!("{}", *b as u64)),
|
||||
Value::Primitive(Primitive::Boolean(_)) => Ok(v.as_string()?),
|
||||
Value::List(_) => return Ok(String::from("[list list]")),
|
||||
Value::Object(_) => return Ok(String::from("[object]")),
|
||||
Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()),
|
||||
_ => return Err("Bad input".into()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_string(v: &Value) -> Result<String, Box<dyn std::error::Error>> {
|
||||
match v {
|
||||
Value::Object(o) => {
|
||||
let mut wtr = WriterBuilder::new().delimiter(b'\t').from_writer(vec![]);
|
||||
let mut fields: VecDeque<String> = VecDeque::new();
|
||||
let mut values: VecDeque<String> = VecDeque::new();
|
||||
|
||||
for (k, v) in o.entries.iter() {
|
||||
fields.push_back(k.clone());
|
||||
values.push_back(to_string_helper(&v)?);
|
||||
}
|
||||
|
||||
wtr.write_record(fields).expect("can not write.");
|
||||
wtr.write_record(values).expect("can not write.");
|
||||
|
||||
return Ok(String::from_utf8(wtr.into_inner()?)?);
|
||||
}
|
||||
_ => return to_string_helper(&v),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_tsv(
|
||||
ToTSVArgs { headerless }: ToTSVArgs,
|
||||
RunnableContext { input, name, .. }: RunnableContext,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let name_span = name;
|
||||
let out = input;
|
||||
|
||||
Ok(out
|
||||
.values
|
||||
.map(move |a| match to_string(&value_to_tsv_value(&a.item)) {
|
||||
Ok(x) => {
|
||||
let converted = if headerless {
|
||||
x.lines().skip(1).collect()
|
||||
} else {
|
||||
x
|
||||
};
|
||||
|
||||
ReturnSuccess::value(
|
||||
Value::Primitive(Primitive::String(converted)).simple_spanned(name_span),
|
||||
)
|
||||
}
|
||||
_ => Err(ShellError::labeled_error_with_secondary(
|
||||
"Expected an object with TSV-compatible structure from pipeline",
|
||||
"requires TSV-compatible input",
|
||||
name_span,
|
||||
format!("{} originates from here", a.item.type_name()),
|
||||
a.span(),
|
||||
)),
|
||||
})
|
||||
.to_output_stream())
|
||||
}
|
@ -250,6 +250,10 @@ mod tests {
|
||||
loc: fixtures().join("caco3_plastics.csv"),
|
||||
at: 0
|
||||
},
|
||||
Res {
|
||||
loc: fixtures().join("caco3_plastics.tsv"),
|
||||
at: 0
|
||||
},
|
||||
Res {
|
||||
loc: fixtures().join("cargo_sample.toml"),
|
||||
at: 0
|
||||
|
@ -68,6 +68,21 @@ fn open_can_parse_toml() {
|
||||
assert_eq!(actual, "2018");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_can_parse_tsv() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", h::pipeline(
|
||||
r#"
|
||||
open caco3_plastics.tsv
|
||||
| first 1
|
||||
| get origin
|
||||
| echo $it
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual, "SPAIN")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_can_parse_json() {
|
||||
let actual = nu!(
|
||||
|
@ -237,6 +237,136 @@ fn converts_structured_table_to_json_text() {
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_convert_table_to_tsv_text_and_from_tsv_text_back_into_table() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats",
|
||||
"open caco3_plastics.tsv | to-tsv | from-tsv | first 1 | get origin | echo $it"
|
||||
);
|
||||
|
||||
assert_eq!(actual, "SPAIN");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn converts_structured_table_to_tsv_text() {
|
||||
Playground::setup("filter_to_tsv_test_1", |dirs, sandbox| {
|
||||
sandbox
|
||||
.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"tsv_text_sample.txt",
|
||||
r#"
|
||||
importer shipper tariff_item name origin
|
||||
Plasticos Rival Reverte 2509000000 Calcium carbonate Spain
|
||||
Tigre Ecuador OMYA Andina 3824909999 Calcium carbonate Colombia
|
||||
"#
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), h::pipeline(
|
||||
r#"
|
||||
open tsv_text_sample.txt
|
||||
| lines
|
||||
| split-column "\t" a b c d origin
|
||||
| last 1
|
||||
| to-tsv
|
||||
| lines
|
||||
| nth 1
|
||||
| echo "$it"
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual.contains("Colombia"));
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn converts_structured_table_to_tsv_text_skipping_headers_after_conversion() {
|
||||
Playground::setup("filter_to_tsv_test_2", |dirs, sandbox| {
|
||||
sandbox
|
||||
.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"tsv_text_sample.txt",
|
||||
r#"
|
||||
importer shipper tariff_item name origin
|
||||
Plasticos Rival Reverte 2509000000 Calcium carbonate Spain
|
||||
Tigre Ecuador OMYA Andina 3824909999 Calcium carbonate Colombia
|
||||
"#
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), h::pipeline(
|
||||
r#"
|
||||
open tsv_text_sample.txt
|
||||
| lines
|
||||
| split-column "\t" a b c d origin
|
||||
| last 1
|
||||
| to-tsv --headerless
|
||||
| echo "$it"
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual.contains("Colombia"));
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn converts_from_tsv_text_to_structured_table() {
|
||||
Playground::setup("filter_from_tsv_test_1", |dirs, sandbox| {
|
||||
sandbox
|
||||
.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_amigos.txt",
|
||||
r#"
|
||||
first Name Last Name rusty_luck
|
||||
Andrés Robalino 1
|
||||
Jonathan Turner 1
|
||||
Yehuda Katz 1
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), h::pipeline(
|
||||
r#"
|
||||
open los_tres_amigos.txt
|
||||
| from-tsv
|
||||
| get rusty_luck
|
||||
| str --to-int
|
||||
| sum
|
||||
| echo $it
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual, "3");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn converts_from_tsv_text_skipping_headers_to_structured_table() {
|
||||
Playground::setup("filter_from_tsv_test_2", |dirs, sandbox| {
|
||||
sandbox
|
||||
.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_amigos.txt",
|
||||
r#"
|
||||
first Name Last Name rusty_luck
|
||||
Andrés Robalino 1
|
||||
Jonathan Turner 1
|
||||
Yehuda Katz 1
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), h::pipeline(
|
||||
r#"
|
||||
open los_tres_amigos.txt
|
||||
| from-tsv --headerless
|
||||
| get Column3
|
||||
| str --to-int
|
||||
| sum
|
||||
| echo $it
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual, "3");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_convert_json_text_to_bson_and_back_into_table() {
|
||||
let actual = nu!(
|
||||
@ -333,10 +463,10 @@ fn can_sum() {
|
||||
fn can_filter_by_unit_size_comparison() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats",
|
||||
"ls | where size > 1kb | sort-by size | get name | skip 1 | trim | echo $it"
|
||||
"ls | where size > 1kb | sort-by size | get name | first 1 | trim | echo $it"
|
||||
);
|
||||
|
||||
assert_eq!(actual, "caco3_plastics.csv");
|
||||
assert_eq!(actual, "cargo_sample.toml");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
10
tests/fixtures/formats/caco3_plastics.tsv
vendored
Normal file
10
tests/fixtures/formats/caco3_plastics.tsv
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
importer shipper tariff_item name origin shipped_at arrived_at net_weight fob_price cif_price cif_per_net_weight
|
||||
PLASTICOS RIVAL CIA LTDA S A REVERTE 2509000000 CARBONATO DE CALCIO TIPO CALCIPORE 160 T AL SPAIN 18/03/2016 17/04/2016 81,000.00 14,417.58 18,252.34 0.23
|
||||
MEXICHEM ECUADOR S.A. OMYA ANDINA S A 2836500000 CARBONATO COLOMBIA 07/07/2016 10/07/2016 26,000.00 7,072.00 8,127.18 0.31
|
||||
PLASTIAZUAY SA SA REVERTE 2836500000 CARBONATO DE CALCIO SPAIN 27/07/2016 09/08/2016 81,000.00 8,100.00 11,474.55 0.14
|
||||
PLASTICOS RIVAL CIA LTDA AND ENDUSTRIYEL HAMMADDELER DIS TCARET LTD.STI. 2836500000 CALCIUM CARBONATE ANADOLU ANDCARB CT-1 TURKEY 04/10/2016 11/11/2016 100,000.00 17,500.00 22,533.75 0.23
|
||||
QUIMICA COMERCIAL QUIMICIAL CIA. LTDA. SA REVERTE 2836500000 CARBONATO DE CALCIO SPAIN 24/06/2016 12/07/2016 27,000.00 3,258.90 5,585.00 0.21
|
||||
PICA PLASTICOS INDUSTRIALES C.A. OMYA ANDINA S.A 3824909999 CARBONATO DE CALCIO COLOMBIA 01/01/1900 18/01/2016 66,500.00 12,635.00 18,670.52 0.28
|
||||
PLASTIQUIM S.A. OMYA ANDINA S.A NIT 830.027.386-6 3824909999 CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYA CARB 1T CG BBS 1000 COLOMBIA 01/01/1900 25/10/2016 33,000.00 6,270.00 9,999.00 0.30
|
||||
QUIMICOS ANDINOS QUIMANDI S.A. SIBELCO COLOMBIA SAS 3824909999 CARBONATO DE CALCIO RECUBIERTO COLOMBIA 01/11/2016 03/11/2016 52,000.00 8,944.00 13,039.05 0.25
|
||||
TIGRE ECUADOR S.A. ECUATIGRE OMYA ANDINA S.A NIT 830.027.386-6 3824909999 CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYACARB 1T CG BPA 25 NO COLOMBIA 01/01/1900 28/10/2016 66,000.00 11,748.00 18,216.00 0.28
|
|
Loading…
Reference in New Issue
Block a user