combine functions behind to/from-c/tsv commands

fixes #969, admittedly without a --delimiter alias

moves from_structured_data.rs to from_delimited_data.rs to better
identify its scope and adds to_delimited_data.rs. Now csv and tsv both
use the same code, tsv passes in a fixed '\t' argument where csv passes
in the value of --separator
This commit is contained in:
David Mason 2019-11-19 15:13:10 +00:00
parent bff50c6987
commit b3c021899c
11 changed files with 365 additions and 394 deletions

View File

@ -92,6 +92,10 @@ error: Expected a string from pipeline
━━━┷━━━━━━━━━━━┷━━━━━━━━━┷━━━━━━
```
The string '\t' can be used to separate on tabs. Note that this is the same as using the from-tsv command.
Newlines '\n' are not acceptable separators.
Note that separators are currently provided as strings and need to be wrapped in quotes.
```shell

View File

@ -78,3 +78,37 @@ PLASTIQUIM S.A.,OMYA ANDINA S.A NIT 830.027.386-6,3824909999,CARBONATO DE CALCIO
QUIMICOS ANDINOS QUIMANDI S.A.,SIBELCO COLOMBIA SAS,3824909999,CARBONATO DE CALCIO RECUBIERTO,COLOMBIA,01/11/2016,03/11/2016,"52,000.00","8,944.00","13,039.05",0.25
TIGRE ECUADOR S.A. ECUATIGRE,OMYA ANDINA S.A NIT 830.027.386-6,3824909999,CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYACARB 1T CG BPA 25 NO,COLOMBIA,01/01/1900,28/10/2016,"66,000.00","11,748.00","18,216.00",0.28
```
To use a character other than ',' to separate records, use `--separator` :
```shell
> shells
━━━┯━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━
# │ │ name │ path
───┼───┼────────────┼────────────────────────
0 │ X │ filesystem │ /home/shaurya
1 │ │ filesystem │ /home/shaurya/Pictures
2 │ │ filesystem │ /home/shaurya/Desktop
━━━┷━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━
> shells | to-csv --separator ';'
;name,path
X;filesystem;/home/shaurya
;filesystem;/home/shaurya/Pictures
;filesystem;/home/shaurya/Desktop
```
The string '\t' can be used to separate on tabs. Note that this is the same as using the to-tsv command.
Newlines '\n' are not acceptable separators.
Note that separators are currently provided as strings and need to be wrapped in quotes.
It is also considered an error to use a separator greater than one char :
```shell
> open pets.txt | from-csv --separator '123'
error: Expected a single separator char from --separator
- shell:1:37
1 | open pets.txt | from-csv --separator '123'
| ^^^^^ requires a single character string input
```

View File

@ -1,7 +1,8 @@
#[macro_use]
pub(crate) mod macros;
mod from_structured_data;
mod from_delimited_data;
mod to_delimited_data;
pub(crate) mod append;
pub(crate) mod args;

View File

@ -1,4 +1,4 @@
use crate::commands::from_structured_data::from_structured_data;
use crate::commands::from_delimited_data::from_delimited_data;
use crate::commands::WholeStreamCommand;
use crate::data::{Primitive, Value};
use crate::prelude::*;
@ -52,6 +52,9 @@ fn from_csv(
tag,
..
}) => {
if s == r"\t" {
'\t'
} else {
let vec_s: Vec<char> = s.chars().collect();
if vec_s.len() != 1 {
return Err(ShellError::labeled_error(
@ -62,8 +65,9 @@ fn from_csv(
};
vec_s[0]
}
}
_ => ',',
};
from_structured_data(headerless, sep, "CSV", runnable_context)
from_delimited_data(headerless, sep, "CSV", runnable_context)
}

View File

@ -2,7 +2,7 @@ use crate::data::{Primitive, TaggedDictBuilder, Value};
use crate::prelude::*;
use csv::ReaderBuilder;
fn from_stuctured_string_to_value(
fn from_delimited_string_to_value(
s: String,
headerless: bool,
separator: char,
@ -37,7 +37,7 @@ fn from_stuctured_string_to_value(
Ok(Value::Table(rows).tagged(&tag))
}
pub fn from_structured_data(
pub fn from_delimited_data(
headerless: bool,
sep: char,
format_name: &'static str,
@ -70,7 +70,7 @@ pub fn from_structured_data(
}
}
match from_stuctured_string_to_value(concat_string, headerless, sep, name_tag.clone()) {
match from_delimited_string_to_value(concat_string, headerless, sep, name_tag.clone()) {
Ok(x) => match x {
Tagged { item: Value::Table(list), .. } => {
for l in list {

View File

@ -1,4 +1,4 @@
use crate::commands::from_structured_data::from_structured_data;
use crate::commands::from_delimited_data::from_delimited_data;
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
@ -36,5 +36,5 @@ fn from_tsv(
FromTSVArgs { headerless }: FromTSVArgs,
runnable_context: RunnableContext,
) -> Result<OutputStream, ShellError> {
from_structured_data(headerless, '\t', "TSV", runnable_context)
from_delimited_data(headerless, '\t', "TSV", runnable_context)
}

View File

@ -1,13 +1,14 @@
use crate::commands::to_delimited_data::to_delimited_data;
use crate::commands::WholeStreamCommand;
use crate::data::{Primitive, Value};
use crate::prelude::*;
use csv::WriterBuilder;
pub struct ToCSV;
#[derive(Deserialize)]
pub struct ToCSVArgs {
headerless: bool,
separator: Option<Tagged<Value>>,
}
impl WholeStreamCommand for ToCSV {
@ -35,170 +36,34 @@ impl WholeStreamCommand for ToCSV {
}
}
pub fn value_to_csv_value(v: &Tagged<Value>) -> Tagged<Value> {
match &v.item {
Value::Primitive(Primitive::String(s)) => Value::Primitive(Primitive::String(s.clone())),
Value::Primitive(Primitive::Nothing) => Value::Primitive(Primitive::Nothing),
Value::Primitive(Primitive::Boolean(b)) => Value::Primitive(Primitive::Boolean(b.clone())),
Value::Primitive(Primitive::Decimal(f)) => Value::Primitive(Primitive::Decimal(f.clone())),
Value::Primitive(Primitive::Int(i)) => Value::Primitive(Primitive::Int(i.clone())),
Value::Primitive(Primitive::Path(x)) => Value::Primitive(Primitive::Path(x.clone())),
Value::Primitive(Primitive::Bytes(b)) => Value::Primitive(Primitive::Bytes(b.clone())),
Value::Primitive(Primitive::Date(d)) => Value::Primitive(Primitive::Date(d.clone())),
Value::Row(o) => Value::Row(o.clone()),
Value::Table(l) => Value::Table(l.clone()),
Value::Block(_) => Value::Primitive(Primitive::Nothing),
_ => Value::Primitive(Primitive::Nothing),
}
.tagged(v.tag.clone())
}
fn to_string_helper(v: &Tagged<Value>) -> Result<String, ShellError> {
match &v.item {
Value::Primitive(Primitive::Date(d)) => Ok(d.to_string()),
Value::Primitive(Primitive::Bytes(b)) => Ok(format!("{}", b)),
Value::Primitive(Primitive::Boolean(_)) => Ok(v.as_string()?),
Value::Primitive(Primitive::Decimal(_)) => Ok(v.as_string()?),
Value::Primitive(Primitive::Int(_)) => Ok(v.as_string()?),
Value::Primitive(Primitive::Path(_)) => Ok(v.as_string()?),
Value::Table(_) => return Ok(String::from("[Table]")),
Value::Row(_) => return Ok(String::from("[Row]")),
Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()),
_ => {
return Err(ShellError::labeled_error(
"Unexpected value",
"",
v.tag.clone(),
))
}
}
}
fn merge_descriptors(values: &[Tagged<Value>]) -> Vec<String> {
let mut ret = vec![];
for value in values {
for desc in value.data_descriptors() {
if !ret.contains(&desc) {
ret.push(desc);
}
}
}
ret
}
pub fn to_string(tagged_value: &Tagged<Value>) -> Result<String, ShellError> {
let v = &tagged_value.item;
match v {
Value::Row(o) => {
let mut wtr = WriterBuilder::new().from_writer(vec![]);
let mut fields: VecDeque<String> = VecDeque::new();
let mut values: VecDeque<String> = VecDeque::new();
for (k, v) in o.entries.iter() {
fields.push_back(k.clone());
values.push_back(to_string_helper(&v)?);
}
wtr.write_record(fields).expect("can not write.");
wtr.write_record(values).expect("can not write.");
return Ok(String::from_utf8(wtr.into_inner().map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?)
.map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?);
}
Value::Table(list) => {
let mut wtr = WriterBuilder::new().from_writer(vec![]);
let merged_descriptors = merge_descriptors(&list);
wtr.write_record(&merged_descriptors)
.expect("can not write.");
for l in list {
let mut row = vec![];
for desc in &merged_descriptors {
match l.item.get_data_by_key(&desc) {
Some(s) => {
row.push(to_string_helper(s)?);
}
None => {
row.push(String::new());
}
}
}
wtr.write_record(&row).expect("can not write");
}
return Ok(String::from_utf8(wtr.into_inner().map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?)
.map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?);
}
_ => return to_string_helper(tagged_value),
}
}
fn to_csv(
ToCSVArgs { headerless }: ToCSVArgs,
RunnableContext { input, name, .. }: RunnableContext,
ToCSVArgs {
separator,
headerless,
}: ToCSVArgs,
runnable_context: RunnableContext,
) -> Result<OutputStream, ShellError> {
let name_tag = name;
let stream = async_stream! {
let input: Vec<Tagged<Value>> = input.values.collect().await;
let to_process_input = if input.len() > 1 {
let tag = input[0].tag.clone();
vec![Tagged { item: Value::Table(input), tag } ]
} else if input.len() == 1 {
input
let sep = match separator {
Some(Tagged {
item: Value::Primitive(Primitive::String(s)),
tag,
..
}) => {
if s == r"\t" {
'\t'
} else {
vec![]
let vec_s: Vec<char> = s.chars().collect();
if vec_s.len() != 1 {
return Err(ShellError::labeled_error(
"Expected a single separator char from --separator",
"requires a single character string input",
tag,
));
};
for value in to_process_input {
match to_string(&value_to_csv_value(&value)) {
Ok(x) => {
let converted = if headerless {
x.lines().skip(1).collect()
} else {
x
vec_s[0]
}
}
_ => ',',
};
yield ReturnSuccess::value(Value::Primitive(Primitive::String(converted)).tagged(&name_tag))
}
_ => {
yield Err(ShellError::labeled_error_with_secondary(
"Expected a table with CSV-compatible structure.tag() from pipeline",
"requires CSV-compatible input",
&name_tag,
"originates from here".to_string(),
value.tag(),
))
}
}
}
};
Ok(stream.to_output_stream())
to_delimited_data(headerless, sep, "CSV", runnable_context)
}

View File

@ -0,0 +1,188 @@
use crate::data::{Primitive, Value};
use crate::prelude::*;
use csv::WriterBuilder;
fn from_value_to_delimited_string(
tagged_value: &Tagged<Value>,
separator: char,
) -> Result<String, ShellError> {
let v = &tagged_value.item;
match v {
Value::Row(o) => {
let mut wtr = WriterBuilder::new()
.delimiter(separator as u8)
.from_writer(vec![]);
let mut fields: VecDeque<String> = VecDeque::new();
let mut values: VecDeque<String> = VecDeque::new();
for (k, v) in o.entries.iter() {
fields.push_back(k.clone());
values.push_back(to_string_tagged_value(&v)?);
}
wtr.write_record(fields).expect("can not write.");
wtr.write_record(values).expect("can not write.");
return Ok(String::from_utf8(wtr.into_inner().map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?)
.map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?);
}
Value::Table(list) => {
let mut wtr = WriterBuilder::new()
.delimiter(separator as u8)
.from_writer(vec![]);
let merged_descriptors = merge_descriptors(&list);
wtr.write_record(&merged_descriptors)
.expect("can not write.");
for l in list {
let mut row = vec![];
for desc in &merged_descriptors {
match l.item.get_data_by_key(&desc) {
Some(s) => {
row.push(to_string_tagged_value(s)?);
}
None => {
row.push(String::new());
}
}
}
wtr.write_record(&row).expect("can not write");
}
return Ok(String::from_utf8(wtr.into_inner().map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?)
.map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?);
}
_ => return to_string_tagged_value(tagged_value),
}
}
// NOTE: could this be useful more widely and implemented on Tagged<Value> ?
pub fn clone_tagged_value(v: &Tagged<Value>) -> Tagged<Value> {
match &v.item {
Value::Primitive(Primitive::String(s)) => Value::Primitive(Primitive::String(s.clone())),
Value::Primitive(Primitive::Nothing) => Value::Primitive(Primitive::Nothing),
Value::Primitive(Primitive::Boolean(b)) => Value::Primitive(Primitive::Boolean(b.clone())),
Value::Primitive(Primitive::Decimal(f)) => Value::Primitive(Primitive::Decimal(f.clone())),
Value::Primitive(Primitive::Int(i)) => Value::Primitive(Primitive::Int(i.clone())),
Value::Primitive(Primitive::Path(x)) => Value::Primitive(Primitive::Path(x.clone())),
Value::Primitive(Primitive::Bytes(b)) => Value::Primitive(Primitive::Bytes(b.clone())),
Value::Primitive(Primitive::Date(d)) => Value::Primitive(Primitive::Date(d.clone())),
Value::Row(o) => Value::Row(o.clone()),
Value::Table(l) => Value::Table(l.clone()),
Value::Block(_) => Value::Primitive(Primitive::Nothing),
_ => Value::Primitive(Primitive::Nothing),
}
.tagged(v.tag.clone())
}
// NOTE: could this be useful more widely and implemented on Tagged<Value> ?
fn to_string_tagged_value(v: &Tagged<Value>) -> Result<String, ShellError> {
match &v.item {
Value::Primitive(Primitive::Date(d)) => Ok(d.to_string()),
Value::Primitive(Primitive::Bytes(b)) => {
let tmp = format!("{}", b);
Ok(tmp)
}
Value::Primitive(Primitive::Boolean(_)) => Ok(v.as_string()?),
Value::Primitive(Primitive::Decimal(_)) => Ok(v.as_string()?),
Value::Primitive(Primitive::Int(_)) => Ok(v.as_string()?),
Value::Primitive(Primitive::Path(_)) => Ok(v.as_string()?),
Value::Table(_) => return Ok(String::from("[Table]")),
Value::Row(_) => return Ok(String::from("[Row]")),
Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()),
_ => {
return Err(ShellError::labeled_error(
"Unexpected value",
"",
v.tag.clone(),
))
}
}
}
fn merge_descriptors(values: &[Tagged<Value>]) -> Vec<String> {
let mut ret = vec![];
for value in values {
for desc in value.data_descriptors() {
if !ret.contains(&desc) {
ret.push(desc);
}
}
}
ret
}
pub fn to_delimited_data(
headerless: bool,
sep: char,
format_name: &'static str,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let name_tag = name;
let stream = async_stream! {
let input: Vec<Tagged<Value>> = input.values.collect().await;
let to_process_input = if input.len() > 1 {
let tag = input[0].tag.clone();
vec![Tagged { item: Value::Table(input), tag } ]
} else if input.len() == 1 {
input
} else {
vec![]
};
for value in to_process_input {
match from_value_to_delimited_string(&clone_tagged_value(&value), sep) {
Ok(x) => {
let converted = if headerless {
x.lines().skip(1).collect()
} else {
x
};
yield ReturnSuccess::value(Value::Primitive(Primitive::String(converted)).tagged(&name_tag))
}
_ => {
let expected = format!("Expected a table with {}-compatible structure.tag() from pipeline", format_name);
let requires = format!("requires {}-compatible input", format_name);
yield Err(ShellError::labeled_error_with_secondary(
expected,
requires,
&name_tag,
"originates from here".to_string(),
value.tag(),
))
}
}
}
};
Ok(stream.to_output_stream())
}

View File

@ -1,7 +1,6 @@
use crate::commands::to_delimited_data::to_delimited_data;
use crate::commands::WholeStreamCommand;
use crate::data::{Primitive, Value};
use crate::prelude::*;
use csv::WriterBuilder;
pub struct ToTSV;
@ -35,172 +34,9 @@ impl WholeStreamCommand for ToTSV {
}
}
pub fn value_to_tsv_value(tagged_value: &Tagged<Value>) -> Tagged<Value> {
let v = &tagged_value.item;
match v {
Value::Primitive(Primitive::String(s)) => Value::Primitive(Primitive::String(s.clone())),
Value::Primitive(Primitive::Nothing) => Value::Primitive(Primitive::Nothing),
Value::Primitive(Primitive::Boolean(b)) => Value::Primitive(Primitive::Boolean(b.clone())),
Value::Primitive(Primitive::Decimal(f)) => Value::Primitive(Primitive::Decimal(f.clone())),
Value::Primitive(Primitive::Int(i)) => Value::Primitive(Primitive::Int(i.clone())),
Value::Primitive(Primitive::Path(x)) => Value::Primitive(Primitive::Path(x.clone())),
Value::Primitive(Primitive::Bytes(b)) => Value::Primitive(Primitive::Bytes(b.clone())),
Value::Primitive(Primitive::Date(d)) => Value::Primitive(Primitive::Date(d.clone())),
Value::Row(o) => Value::Row(o.clone()),
Value::Table(l) => Value::Table(l.clone()),
Value::Block(_) => Value::Primitive(Primitive::Nothing),
_ => Value::Primitive(Primitive::Nothing),
}
.tagged(&tagged_value.tag)
}
fn to_string_helper(tagged_value: &Tagged<Value>) -> Result<String, ShellError> {
let v = &tagged_value.item;
match v {
Value::Primitive(Primitive::Date(d)) => Ok(d.to_string()),
Value::Primitive(Primitive::Bytes(b)) => Ok(format!("{}", b)),
Value::Primitive(Primitive::Boolean(_)) => Ok(tagged_value.as_string()?),
Value::Primitive(Primitive::Decimal(_)) => Ok(tagged_value.as_string()?),
Value::Primitive(Primitive::Int(_)) => Ok(tagged_value.as_string()?),
Value::Primitive(Primitive::Path(_)) => Ok(tagged_value.as_string()?),
Value::Table(_) => return Ok(String::from("[table]")),
Value::Row(_) => return Ok(String::from("[row]")),
Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()),
_ => {
return Err(ShellError::labeled_error(
"Unexpected value",
"original value",
&tagged_value.tag,
))
}
}
}
fn merge_descriptors(values: &[Tagged<Value>]) -> Vec<String> {
let mut ret = vec![];
for value in values {
for desc in value.data_descriptors() {
if !ret.contains(&desc) {
ret.push(desc);
}
}
}
ret
}
pub fn to_string(tagged_value: &Tagged<Value>) -> Result<String, ShellError> {
let v = &tagged_value.item;
match v {
Value::Row(o) => {
let mut wtr = WriterBuilder::new().delimiter(b'\t').from_writer(vec![]);
let mut fields: VecDeque<String> = VecDeque::new();
let mut values: VecDeque<String> = VecDeque::new();
for (k, v) in o.entries.iter() {
fields.push_back(k.clone());
values.push_back(to_string_helper(&v)?);
}
wtr.write_record(fields).expect("can not write.");
wtr.write_record(values).expect("can not write.");
return Ok(String::from_utf8(wtr.into_inner().map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?)
.map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?);
}
Value::Table(list) => {
let mut wtr = WriterBuilder::new().delimiter(b'\t').from_writer(vec![]);
let merged_descriptors = merge_descriptors(&list);
wtr.write_record(&merged_descriptors)
.expect("can not write.");
for l in list {
let mut row = vec![];
for desc in &merged_descriptors {
match l.item.get_data_by_key(&desc) {
Some(s) => {
row.push(to_string_helper(s)?);
}
None => {
row.push(String::new());
}
}
}
wtr.write_record(&row).expect("can not write");
}
return Ok(String::from_utf8(wtr.into_inner().map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?)
.map_err(|_| {
ShellError::labeled_error(
"Could not convert record",
"original value",
&tagged_value.tag,
)
})?);
}
_ => return to_string_helper(tagged_value),
}
}
fn to_tsv(
ToTSVArgs { headerless }: ToTSVArgs,
RunnableContext { input, name, .. }: RunnableContext,
runnable_context: RunnableContext,
) -> Result<OutputStream, ShellError> {
let name_tag = name;
let stream = async_stream! {
let input: Vec<Tagged<Value>> = input.values.collect().await;
let to_process_input = if input.len() > 1 {
let tag = input[0].tag.clone();
vec![Tagged { item: Value::Table(input), tag } ]
} else if input.len() == 1 {
input
} else {
vec![]
};
for value in to_process_input {
match to_string(&value_to_tsv_value(&value)) {
Ok(x) => {
let converted = if headerless {
x.lines().skip(1).collect()
} else {
x
};
yield ReturnSuccess::value(Value::Primitive(Primitive::String(converted)).tagged(&name_tag))
}
_ => {
yield Err(ShellError::labeled_error_with_secondary(
"Expected a table with TSV-compatible structure.tag() from pipeline",
"requires TSV-compatible input",
&name_tag,
"originates from here".to_string(),
value.tag(),
))
}
}
}
};
Ok(stream.to_output_stream())
to_delimited_data(headerless, '\t', "TSV", runnable_context)
}

View File

@ -129,6 +129,35 @@ fn converts_from_csv_text_with_separator_to_structured_table() {
})
}
#[test]
fn converts_from_csv_text_with_tab_separator_to_structured_table() {
Playground::setup("filter_from_csv_test_1", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name last_name rusty_luck
Andrés Robalino 1
Jonathan Turner 1
Yehuda Katz 1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), h::pipeline(
r#"
open los_tres_caballeros.txt
| from-csv --separator '\t'
| get rusty_luck
| str --to-int
| sum
| echo $it
"#
));
assert_eq!(actual, "3");
})
}
#[test]
fn converts_from_csv_text_skipping_headers_to_structured_table() {
Playground::setup("filter_from_csv_test_2", |dirs, sandbox| {
@ -267,6 +296,16 @@ fn can_convert_table_to_tsv_text_and_from_tsv_text_back_into_table() {
assert_eq!(actual, "SPAIN");
}
#[test]
fn can_convert_table_to_tsv_text_and_from_tsv_text_back_into_table_using_csv_separator() {
let actual = nu!(
cwd: "tests/fixtures/formats",
r"open caco3_plastics.tsv | to-tsv | from-csv --separator '\t' | first 1 | get origin | echo $it"
);
assert_eq!(actual, "SPAIN");
}
#[test]
fn converts_structured_table_to_tsv_text() {
Playground::setup("filter_to_tsv_test_1", |dirs, sandbox| {