from xlsx from ods and from toml (#352)

* MathEval Variance and Stddev

* Fix tests and linting

* Typo

* Deal with streams when they are not tables

* `from toml` command

* From ods

* From XLSX
This commit is contained in:
Luccas Mateus
2021-11-19 16:23:35 -03:00
committed by GitHub
parent e01e73cb67
commit 00aac850fd
7 changed files with 659 additions and 0 deletions

View File

@ -39,7 +39,9 @@ serde = { version="1.0.123", features=["derive"] }
serde_yaml = "0.8.16"
serde_urlencoded = "0.7.0"
eml-parser = "0.1.0"
toml = "0.5.8"
itertools = "0.10.0"
calamine = "0.18.0"
rand = "0.8"
[features]

View File

@ -55,8 +55,11 @@ pub fn create_default_context() -> EngineState {
FromYaml,
FromYml,
FromTsv,
FromToml,
FromUrl,
FromEml,
FromOds,
FromXlsx,
Get,
Griddle,
Help,

View File

@ -3,15 +3,21 @@ mod csv;
mod delimited;
mod eml;
mod json;
mod ods;
mod toml;
mod tsv;
mod url;
mod xlsx;
mod yaml;
pub use self::csv::FromCsv;
pub use self::toml::FromToml;
pub use command::From;
pub use eml::FromEml;
pub use json::FromJson;
pub use ods::FromOds;
pub use tsv::FromTsv;
pub use url::FromUrl;
pub use xlsx::FromXlsx;
pub use yaml::FromYaml;
pub use yaml::FromYml;

View File

@ -0,0 +1,210 @@
use calamine::*;
use indexmap::map::IndexMap;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use std::io::Cursor;
#[derive(Clone)]
pub struct FromOds;
impl Command for FromOds {
fn name(&self) -> &str {
"from ods"
}
fn signature(&self) -> Signature {
Signature::build("from ods")
.named(
"sheets",
SyntaxShape::List(Box::new(SyntaxShape::String)),
"Only convert specified sheets",
Some('s'),
)
.category(Category::Formats)
}
fn usage(&self) -> &str {
"Parse OpenDocument Spreadsheet(.ods) data and create table."
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
let head = call.head;
let sel_sheets = if let Some(Value::List { vals: columns, .. }) =
call.get_flag(engine_state, stack, "sheets")?
{
convert_columns(columns.as_slice())?
} else {
vec![]
};
from_ods(input, head, sel_sheets)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Convert binary .ods data to a table",
example: "open test.txt | from ods",
result: None,
},
Example {
description: "Convert binary .ods data to a table, specifying the tables",
example: "open test.txt | from ods -s [Spreadsheet1]",
result: None,
},
]
}
}
fn convert_columns(columns: &[Value]) -> Result<Vec<String>, ShellError> {
let res = columns
.iter()
.map(|value| match &value {
Value::String { val: s, .. } => Ok(s.clone()),
_ => Err(ShellError::IncompatibleParametersSingle(
"Incorrect column format, Only string as column name".to_string(),
value.span().unwrap_or_else(|_| Span::unknown()),
)),
})
.collect::<Result<Vec<String>, _>>()?;
Ok(res)
}
fn collect_binary(input: PipelineData) -> Result<Vec<u8>, ShellError> {
let mut bytes = vec![];
let mut values = input.into_iter();
loop {
match values.next() {
Some(Value::Binary { val: b, .. }) => {
bytes.extend_from_slice(&b);
}
Some(x) => {
return Err(ShellError::UnsupportedInput(
"Expected binary from pipeline".to_string(),
x.span().unwrap_or_else(|_| Span::unknown()),
))
}
None => break,
}
}
Ok(bytes)
}
fn from_ods(
input: PipelineData,
head: Span,
sel_sheets: Vec<String>,
) -> Result<PipelineData, ShellError> {
let bytes = collect_binary(input)?;
let buf: Cursor<Vec<u8>> = Cursor::new(bytes);
let mut ods = Ods::<_>::new(buf)
.map_err(|_| ShellError::UnsupportedInput("Could not load ods file".to_string(), head))?;
let mut dict = IndexMap::new();
let mut sheet_names = ods.sheet_names().to_owned();
if !sel_sheets.is_empty() {
sheet_names.retain(|e| sel_sheets.contains(e));
}
for sheet_name in &sheet_names {
let mut sheet_output = vec![];
if let Some(Ok(current_sheet)) = ods.worksheet_range(sheet_name) {
for row in current_sheet.rows() {
let mut row_output = IndexMap::new();
for (i, cell) in row.iter().enumerate() {
let value = match cell {
DataType::Empty => Value::nothing(head),
DataType::String(s) => Value::string(s, head),
DataType::Float(f) => Value::Float {
val: *f,
span: head,
},
DataType::Int(i) => Value::Int {
val: *i,
span: head,
},
DataType::Bool(b) => Value::Bool {
val: *b,
span: head,
},
_ => Value::nothing(head),
};
row_output.insert(format!("Column{}", i), value);
}
let (cols, vals) =
row_output
.into_iter()
.fold((vec![], vec![]), |mut acc, (k, v)| {
acc.0.push(k);
acc.1.push(v);
acc
});
let record = Value::Record {
cols,
vals,
span: head,
};
sheet_output.push(record);
}
dict.insert(
sheet_name,
Value::List {
vals: sheet_output,
span: head,
},
);
} else {
return Err(ShellError::UnsupportedInput(
"Could not load sheet".to_string(),
head,
));
}
}
let (cols, vals) = dict.into_iter().fold((vec![], vec![]), |mut acc, (k, v)| {
acc.0.push(k.clone());
acc.1.push(v);
acc
});
let record = Value::Record {
cols,
vals,
span: head,
};
Ok(PipelineData::Value(record))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(FromOds {})
}
}

View File

@ -0,0 +1,141 @@
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Value,
};
#[derive(Clone)]
pub struct FromToml;
impl Command for FromToml {
fn name(&self) -> &str {
"from toml"
}
fn signature(&self) -> Signature {
Signature::build("from toml").category(Category::Formats)
}
fn usage(&self) -> &str {
"Parse text as .toml and create table."
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
example: "'a = 1' | from toml",
description: "Converts toml formatted string to table",
result: Some(Value::Record {
cols: vec!["a".to_string()],
vals: vec![Value::Int {
val: 1,
span: Span::unknown(),
}],
span: Span::unknown(),
}),
},
Example {
example: "'a = 1
b = [1, 2]' | from toml",
description: "Converts toml formatted string to table",
result: Some(Value::Record {
cols: vec!["a".to_string(), "b".to_string()],
vals: vec![
Value::Int {
val: 1,
span: Span::unknown(),
},
Value::List {
vals: vec![
Value::Int {
val: 1,
span: Span::unknown(),
},
Value::Int {
val: 2,
span: Span::unknown(),
},
],
span: Span::unknown(),
},
],
span: Span::unknown(),
}),
},
]
}
fn run(
&self,
_engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
let span = call.head;
let config = stack.get_config()?;
let mut string_input = input.collect_string("", &config);
string_input.push('\n');
Ok(convert_string_to_value(string_input, span)?.into_pipeline_data())
}
}
fn convert_toml_to_value(value: &toml::Value, span: Span) -> Value {
match value {
toml::Value::Array(array) => {
let v: Vec<Value> = array
.iter()
.map(|x| convert_toml_to_value(x, span))
.collect();
Value::List { vals: v, span }
}
toml::Value::Boolean(b) => Value::Bool { val: *b, span },
toml::Value::Float(f) => Value::Float { val: *f, span },
toml::Value::Integer(i) => Value::Int { val: *i, span },
toml::Value::Table(k) => {
let mut cols = vec![];
let mut vals = vec![];
for item in k {
cols.push(item.0.clone());
vals.push(convert_toml_to_value(item.1, span));
}
Value::Record { cols, vals, span }
}
toml::Value::String(s) => Value::String {
val: s.clone(),
span,
},
toml::Value::Datetime(d) => Value::String {
val: d.to_string(),
span,
},
}
}
pub fn convert_string_to_value(string_input: String, span: Span) -> Result<Value, ShellError> {
let result: Result<toml::Value, toml::de::Error> = toml::from_str(&string_input);
match result {
Ok(value) => Ok(convert_toml_to_value(&value, span)),
Err(_x) => Err(ShellError::CantConvert(
"structured data from toml".into(),
"string".into(),
span,
)),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(FromToml {})
}
}

View File

@ -0,0 +1,210 @@
use calamine::*;
use indexmap::map::IndexMap;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use std::io::Cursor;
#[derive(Clone)]
pub struct FromXlsx;
impl Command for FromXlsx {
fn name(&self) -> &str {
"from xlsx"
}
fn signature(&self) -> Signature {
Signature::build("from xlsx")
.named(
"sheets",
SyntaxShape::List(Box::new(SyntaxShape::String)),
"Only convert specified sheets",
Some('s'),
)
.category(Category::Formats)
}
fn usage(&self) -> &str {
"Parse binary Excel(.xlsx) data and create table."
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
let head = call.head;
let sel_sheets = if let Some(Value::List { vals: columns, .. }) =
call.get_flag(engine_state, stack, "sheets")?
{
convert_columns(columns.as_slice())?
} else {
vec![]
};
from_xlsx(input, head, sel_sheets)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Convert binary .xlsx data to a table",
example: "open test.txt | from xlsx",
result: None,
},
Example {
description: "Convert binary .xlsx data to a table, specifying the tables",
example: "open test.txt | from xlsx -s [Spreadsheet1]",
result: None,
},
]
}
}
fn convert_columns(columns: &[Value]) -> Result<Vec<String>, ShellError> {
let res = columns
.iter()
.map(|value| match &value {
Value::String { val: s, .. } => Ok(s.clone()),
_ => Err(ShellError::IncompatibleParametersSingle(
"Incorrect column format, Only string as column name".to_string(),
value.span().unwrap_or_else(|_| Span::unknown()),
)),
})
.collect::<Result<Vec<String>, _>>()?;
Ok(res)
}
fn collect_binary(input: PipelineData) -> Result<Vec<u8>, ShellError> {
let mut bytes = vec![];
let mut values = input.into_iter();
loop {
match values.next() {
Some(Value::Binary { val: b, .. }) => {
bytes.extend_from_slice(&b);
}
Some(x) => {
return Err(ShellError::UnsupportedInput(
"Expected binary from pipeline".to_string(),
x.span().unwrap_or_else(|_| Span::unknown()),
))
}
None => break,
}
}
Ok(bytes)
}
fn from_xlsx(
input: PipelineData,
head: Span,
sel_sheets: Vec<String>,
) -> Result<PipelineData, ShellError> {
let bytes = collect_binary(input)?;
let buf: Cursor<Vec<u8>> = Cursor::new(bytes);
let mut xlsx = Xlsx::<_>::new(buf)
.map_err(|_| ShellError::UnsupportedInput("Could not load xlsx file".to_string(), head))?;
let mut dict = IndexMap::new();
let mut sheet_names = xlsx.sheet_names().to_owned();
if !sel_sheets.is_empty() {
sheet_names.retain(|e| sel_sheets.contains(e));
}
for sheet_name in &sheet_names {
let mut sheet_output = vec![];
if let Some(Ok(current_sheet)) = xlsx.worksheet_range(sheet_name) {
for row in current_sheet.rows() {
let mut row_output = IndexMap::new();
for (i, cell) in row.iter().enumerate() {
let value = match cell {
DataType::Empty => Value::nothing(head),
DataType::String(s) => Value::string(s, head),
DataType::Float(f) => Value::Float {
val: *f,
span: head,
},
DataType::Int(i) => Value::Int {
val: *i,
span: head,
},
DataType::Bool(b) => Value::Bool {
val: *b,
span: head,
},
_ => Value::nothing(head),
};
row_output.insert(format!("Column{}", i), value);
}
let (cols, vals) =
row_output
.into_iter()
.fold((vec![], vec![]), |mut acc, (k, v)| {
acc.0.push(k);
acc.1.push(v);
acc
});
let record = Value::Record {
cols,
vals,
span: head,
};
sheet_output.push(record);
}
dict.insert(
sheet_name,
Value::List {
vals: sheet_output,
span: head,
},
);
} else {
return Err(ShellError::UnsupportedInput(
"Could not load sheet".to_string(),
head,
));
}
}
let (cols, vals) = dict.into_iter().fold((vec![], vec![]), |mut acc, (k, v)| {
acc.0.push(k.clone());
acc.1.push(v);
acc
});
let record = Value::Record {
cols,
vals,
span: head,
};
Ok(PipelineData::Value(record))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(FromXlsx {})
}
}