activates optional trim in 'from csv' and 'from tsv' (#5326)

This commit is contained in:
Xavier Gillard 2022-04-25 19:54:14 +02:00 committed by GitHub
parent 5c2bc73d7b
commit 190f379ff3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 90 additions and 11 deletions

View File

@ -1,4 +1,4 @@
use super::delimited::from_delimited_data; use super::delimited::{from_delimited_data, trim_from_str};
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_protocol::ast::Call; use nu_protocol::ast::Call;
@ -26,6 +26,12 @@ impl Command for FromCsv {
"don't treat the first row as column names", "don't treat the first row as column names",
Some('n'), Some('n'),
) )
.named(
"trim",
SyntaxShape::String,
"drop leading and trailing whitespaces around headers names and/or field values",
Some('t'),
)
.category(Category::Formats) .category(Category::Formats)
} }
@ -65,6 +71,21 @@ impl Command for FromCsv {
example: "open data.txt | from csv --separator ';'", example: "open data.txt | from csv --separator ';'",
result: None, result: None,
}, },
Example {
description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around header names and field values",
example: "open data.txt | from csv --trim all",
result: None,
},
Example {
description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around header names",
example: "open data.txt | from csv --trim headers",
result: None,
},
Example {
description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around field values",
example: "open data.txt | from csv --trim fields",
result: None,
},
] ]
} }
} }
@ -79,6 +100,7 @@ fn from_csv(
let noheaders = call.has_flag("noheaders"); let noheaders = call.has_flag("noheaders");
let separator: Option<Value> = call.get_flag(engine_state, stack, "separator")?; let separator: Option<Value> = call.get_flag(engine_state, stack, "separator")?;
let trim: Option<Value> = call.get_flag(engine_state, stack, "trim")?;
let config = engine_state.get_config(); let config = engine_state.get_config();
let sep = match separator { let sep = match separator {
@ -99,7 +121,9 @@ fn from_csv(
_ => ',', _ => ',',
}; };
from_delimited_data(noheaders, sep, input, name, config) let trim = trim_from_str(trim)?;
from_delimited_data(noheaders, sep, trim, input, name, config)
} }
#[cfg(test)] #[cfg(test)]

View File

@ -1,15 +1,17 @@
use csv::ReaderBuilder; use csv::{ReaderBuilder, Trim};
use nu_protocol::{Config, IntoPipelineData, PipelineData, ShellError, Span, Value}; use nu_protocol::{Config, IntoPipelineData, PipelineData, ShellError, Span, Value};
fn from_delimited_string_to_value( fn from_delimited_string_to_value(
s: String, s: String,
noheaders: bool, noheaders: bool,
separator: char, separator: char,
trim: Trim,
span: Span, span: Span,
) -> Result<Value, csv::Error> { ) -> Result<Value, csv::Error> {
let mut reader = ReaderBuilder::new() let mut reader = ReaderBuilder::new()
.has_headers(!noheaders) .has_headers(!noheaders)
.delimiter(separator as u8) .delimiter(separator as u8)
.trim(trim)
.from_reader(s.as_bytes()); .from_reader(s.as_bytes());
let headers = if noheaders { let headers = if noheaders {
@ -48,6 +50,7 @@ fn from_delimited_string_to_value(
pub fn from_delimited_data( pub fn from_delimited_data(
noheaders: bool, noheaders: bool,
sep: char, sep: char,
trim: Trim,
input: PipelineData, input: PipelineData,
name: Span, name: Span,
config: &Config, config: &Config,
@ -55,8 +58,25 @@ pub fn from_delimited_data(
let concat_string = input.collect_string("", config)?; let concat_string = input.collect_string("", config)?;
Ok( Ok(
from_delimited_string_to_value(concat_string, noheaders, sep, name) from_delimited_string_to_value(concat_string, noheaders, sep, trim, name)
.map_err(|x| ShellError::DelimiterError(x.to_string(), name))? .map_err(|x| ShellError::DelimiterError(x.to_string(), name))?
.into_pipeline_data(), .into_pipeline_data(),
) )
} }
pub fn trim_from_str(trim: Option<Value>) -> Result<Trim, ShellError> {
match trim {
Some(Value::String { val: item, span }) => match item.as_str() {
"all" => Ok(Trim::All),
"headers" => Ok(Trim::Headers),
"fields" => Ok(Trim::Fields),
"none" => Ok(Trim::None),
_ => Err(ShellError::UnsupportedInput(
"the only possible values for trim are 'all', 'headers', 'fields' and 'none'"
.into(),
span,
)),
},
_ => Ok(Trim::None),
}
}

View File

@ -1,8 +1,9 @@
use super::delimited::from_delimited_data; use super::delimited::{from_delimited_data, trim_from_str};
use nu_engine::CallExt;
use nu_protocol::ast::Call; use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack}; use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{Category, Config, Example, PipelineData, ShellError, Signature}; use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value};
#[derive(Clone)] #[derive(Clone)]
pub struct FromTsv; pub struct FromTsv;
@ -19,6 +20,12 @@ impl Command for FromTsv {
"don't treat the first row as column names", "don't treat the first row as column names",
Some('n'), Some('n'),
) )
.named(
"trim",
SyntaxShape::String,
"drop leading and trailing whitespaces around headers names and/or field values",
Some('t'),
)
.category(Category::Formats) .category(Category::Formats)
} }
@ -29,12 +36,11 @@ impl Command for FromTsv {
fn run( fn run(
&self, &self,
engine_state: &EngineState, engine_state: &EngineState,
_stack: &mut Stack, stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> { ) -> Result<nu_protocol::PipelineData, ShellError> {
let config = engine_state.get_config(); from_tsv(engine_state, stack, call, input)
from_tsv(call, input, config)
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
@ -49,16 +55,45 @@ impl Command for FromTsv {
example: r#"echo $'a1(char tab)b1(char tab)c1(char nl)a2(char tab)b2(char tab)c2' | save tsv-data | open tsv-data | from tsv -n"#, example: r#"echo $'a1(char tab)b1(char tab)c1(char nl)a2(char tab)b2(char tab)c2' | save tsv-data | open tsv-data | from tsv -n"#,
result: None, result: None,
}, },
Example {
description: "Create a tsv file without header columns and open it, removing all unnecessary whitespaces",
example: r#"echo $'a1(char tab)b1(char tab)c1(char nl)a2(char tab)b2(char tab)c2' | save tsv-data | open tsv-data | from tsv --trim all"#,
result: None,
},
Example {
description: "Create a tsv file without header columns and open it, removing all unnecessary whitespaces in the header names",
example: r#"echo $'a1(char tab)b1(char tab)c1(char nl)a2(char tab)b2(char tab)c2' | save tsv-data | open tsv-data | from tsv --trim headers"#,
result: None,
},
Example {
description: "Create a tsv file without header columns and open it, removing all unnecessary whitespaces in the field values",
example: r#"echo $'a1(char tab)b1(char tab)c1(char nl)a2(char tab)b2(char tab)c2' | save tsv-data | open tsv-data | from tsv --trim fields"#,
result: None,
},
] ]
} }
} }
fn from_tsv(call: &Call, input: PipelineData, config: &Config) -> Result<PipelineData, ShellError> { fn from_tsv(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let name = call.head; let name = call.head;
let noheaders = call.has_flag("noheaders"); let noheaders = call.has_flag("noheaders");
let trim: Option<Value> = call.get_flag(engine_state, stack, "trim")?;
let trim = trim_from_str(trim)?;
from_delimited_data(noheaders, '\t', input, name, config) from_delimited_data(
noheaders,
'\t',
trim,
input,
name,
engine_state.get_config(),
)
} }
#[cfg(test)] #[cfg(test)]