activates optional trim in 'from csv' and 'from tsv' (#5326)

This commit is contained in:
Xavier Gillard 2022-04-25 19:54:14 +02:00 committed by GitHub
parent 5c2bc73d7b
commit 190f379ff3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 90 additions and 11 deletions

View File

@ -1,4 +1,4 @@
use super::delimited::from_delimited_data;
use super::delimited::{from_delimited_data, trim_from_str};
use nu_engine::CallExt;
use nu_protocol::ast::Call;
@ -26,6 +26,12 @@ impl Command for FromCsv {
"don't treat the first row as column names",
Some('n'),
)
.named(
"trim",
SyntaxShape::String,
"drop leading and trailing whitespaces around headers names and/or field values",
Some('t'),
)
.category(Category::Formats)
}
@ -65,6 +71,21 @@ impl Command for FromCsv {
example: "open data.txt | from csv --separator ';'",
result: None,
},
Example {
description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around header names and field values",
example: "open data.txt | from csv --trim all",
result: None,
},
Example {
description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around header names",
example: "open data.txt | from csv --trim headers",
result: None,
},
Example {
description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around field values",
example: "open data.txt | from csv --trim fields",
result: None,
},
]
}
}
@ -79,6 +100,7 @@ fn from_csv(
let noheaders = call.has_flag("noheaders");
let separator: Option<Value> = call.get_flag(engine_state, stack, "separator")?;
let trim: Option<Value> = call.get_flag(engine_state, stack, "trim")?;
let config = engine_state.get_config();
let sep = match separator {
@ -99,7 +121,9 @@ fn from_csv(
_ => ',',
};
from_delimited_data(noheaders, sep, input, name, config)
let trim = trim_from_str(trim)?;
from_delimited_data(noheaders, sep, trim, input, name, config)
}
#[cfg(test)]

View File

@ -1,15 +1,17 @@
use csv::ReaderBuilder;
use csv::{ReaderBuilder, Trim};
use nu_protocol::{Config, IntoPipelineData, PipelineData, ShellError, Span, Value};
fn from_delimited_string_to_value(
s: String,
noheaders: bool,
separator: char,
trim: Trim,
span: Span,
) -> Result<Value, csv::Error> {
let mut reader = ReaderBuilder::new()
.has_headers(!noheaders)
.delimiter(separator as u8)
.trim(trim)
.from_reader(s.as_bytes());
let headers = if noheaders {
@ -48,6 +50,7 @@ fn from_delimited_string_to_value(
pub fn from_delimited_data(
noheaders: bool,
sep: char,
trim: Trim,
input: PipelineData,
name: Span,
config: &Config,
@ -55,8 +58,25 @@ pub fn from_delimited_data(
let concat_string = input.collect_string("", config)?;
Ok(
from_delimited_string_to_value(concat_string, noheaders, sep, name)
from_delimited_string_to_value(concat_string, noheaders, sep, trim, name)
.map_err(|x| ShellError::DelimiterError(x.to_string(), name))?
.into_pipeline_data(),
)
}
pub fn trim_from_str(trim: Option<Value>) -> Result<Trim, ShellError> {
match trim {
Some(Value::String { val: item, span }) => match item.as_str() {
"all" => Ok(Trim::All),
"headers" => Ok(Trim::Headers),
"fields" => Ok(Trim::Fields),
"none" => Ok(Trim::None),
_ => Err(ShellError::UnsupportedInput(
"the only possible values for trim are 'all', 'headers', 'fields' and 'none'"
.into(),
span,
)),
},
_ => Ok(Trim::None),
}
}

View File

@ -1,8 +1,9 @@
use super::delimited::from_delimited_data;
use super::delimited::{from_delimited_data, trim_from_str};
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{Category, Config, Example, PipelineData, ShellError, Signature};
use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value};
#[derive(Clone)]
pub struct FromTsv;
@ -19,6 +20,12 @@ impl Command for FromTsv {
"don't treat the first row as column names",
Some('n'),
)
.named(
"trim",
SyntaxShape::String,
"drop leading and trailing whitespaces around headers names and/or field values",
Some('t'),
)
.category(Category::Formats)
}
@ -29,12 +36,11 @@ impl Command for FromTsv {
fn run(
&self,
engine_state: &EngineState,
_stack: &mut Stack,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
let config = engine_state.get_config();
from_tsv(call, input, config)
from_tsv(engine_state, stack, call, input)
}
fn examples(&self) -> Vec<Example> {
@ -49,16 +55,45 @@ impl Command for FromTsv {
example: r#"echo $'a1(char tab)b1(char tab)c1(char nl)a2(char tab)b2(char tab)c2' | save tsv-data | open tsv-data | from tsv -n"#,
result: None,
},
Example {
description: "Create a tsv file without header columns and open it, removing all unnecessary whitespaces",
example: r#"echo $'a1(char tab)b1(char tab)c1(char nl)a2(char tab)b2(char tab)c2' | save tsv-data | open tsv-data | from tsv --trim all"#,
result: None,
},
Example {
description: "Create a tsv file without header columns and open it, removing all unnecessary whitespaces in the header names",
example: r#"echo $'a1(char tab)b1(char tab)c1(char nl)a2(char tab)b2(char tab)c2' | save tsv-data | open tsv-data | from tsv --trim headers"#,
result: None,
},
Example {
description: "Create a tsv file without header columns and open it, removing all unnecessary whitespaces in the field values",
example: r#"echo $'a1(char tab)b1(char tab)c1(char nl)a2(char tab)b2(char tab)c2' | save tsv-data | open tsv-data | from tsv --trim fields"#,
result: None,
},
]
}
}
fn from_tsv(call: &Call, input: PipelineData, config: &Config) -> Result<PipelineData, ShellError> {
fn from_tsv(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let name = call.head;
let noheaders = call.has_flag("noheaders");
let trim: Option<Value> = call.get_flag(engine_state, stack, "trim")?;
let trim = trim_from_str(trim)?;
from_delimited_data(noheaders, '\t', input, name, config)
from_delimited_data(
noheaders,
'\t',
trim,
input,
name,
engine_state.get_config(),
)
}
#[cfg(test)]