from xlsx from ods and from toml (#352)

* MathEval Variance and Stddev

* Fix tests and linting

* Typo

* Deal with streams when they are not tables

* `from toml` command

* From ods

* From XLSX
This commit is contained in:
Luccas Mateus 2021-11-19 16:23:35 -03:00 committed by GitHub
parent e01e73cb67
commit 00aac850fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 659 additions and 0 deletions

87
Cargo.lock generated
View File

@ -177,6 +177,21 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c58ec36aac5066d5ca17df51b3e70279f5670a72102f5752cb7e7c856adfc70" checksum = "6c58ec36aac5066d5ca17df51b3e70279f5670a72102f5752cb7e7c856adfc70"
[[package]]
name = "calamine"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b86ca78da4bdce5ac0f0bdbc0218ad14232f1e668376e044233f64c527cf5abb"
dependencies = [
"byteorder",
"codepage",
"encoding_rs",
"log",
"quick-xml",
"serde",
"zip",
]
[[package]] [[package]]
name = "capnp" name = "capnp"
version = "0.14.3" version = "0.14.3"
@ -249,6 +264,15 @@ dependencies = [
"phf_codegen", "phf_codegen",
] ]
[[package]]
name = "codepage"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b0e9222c0cdf2c6ac27d73f664f9520266fa911c3106329d359f8861cb8bde9"
dependencies = [
"encoding_rs",
]
[[package]] [[package]]
name = "console" name = "console"
version = "0.15.0" version = "0.15.0"
@ -270,6 +294,15 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]]
name = "crc32fast"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.5.1" version = "0.5.1"
@ -491,6 +524,15 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "encoding_rs"
version = "0.8.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a74ea89a0a1b98f6332de42c95baff457ada66d1cb4030f9ff151b2041a1c746"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "engine-q" name = "engine-q"
version = "0.1.0" version = "0.1.0"
@ -515,6 +557,18 @@ dependencies = [
"tempfile", "tempfile",
] ]
[[package]]
name = "flate2"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f"
dependencies = [
"cfg-if",
"crc32fast",
"libc",
"miniz_oxide",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -884,6 +938,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"Inflector", "Inflector",
"bytesize", "bytesize",
"calamine",
"chrono", "chrono",
"chrono-humanize", "chrono-humanize",
"chrono-tz", "chrono-tz",
@ -912,6 +967,7 @@ dependencies = [
"terminal_size", "terminal_size",
"thiserror", "thiserror",
"titlecase", "titlecase",
"toml",
"trash", "trash",
"unicode-segmentation", "unicode-segmentation",
] ]
@ -1219,6 +1275,16 @@ dependencies = [
"unicode-xid", "unicode-xid",
] ]
[[package]]
name = "quick-xml"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3d72d5477478f85bd00b6521780dfba1ec6cdaadcf90b8b181c36d7de561f9b"
dependencies = [
"encoding_rs",
"memchr",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.10" version = "1.0.10"
@ -1691,6 +1757,15 @@ dependencies = [
"regex", "regex",
] ]
[[package]]
name = "toml"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "trash" name = "trash"
version = "1.3.0" version = "1.3.0"
@ -1844,3 +1919,15 @@ name = "zeroize"
version = "1.4.3" version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d68d9dcec5f9b43a30d38c49f91dfedfaac384cb8f085faca366c26207dd1619" checksum = "d68d9dcec5f9b43a30d38c49f91dfedfaac384cb8f085faca366c26207dd1619"
[[package]]
name = "zip"
version = "0.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93ab48844d61251bb3835145c521d88aa4031d7139e8485990f60ca911fa0815"
dependencies = [
"byteorder",
"crc32fast",
"flate2",
"thiserror",
]

View File

@ -39,7 +39,9 @@ serde = { version="1.0.123", features=["derive"] }
serde_yaml = "0.8.16" serde_yaml = "0.8.16"
serde_urlencoded = "0.7.0" serde_urlencoded = "0.7.0"
eml-parser = "0.1.0" eml-parser = "0.1.0"
toml = "0.5.8"
itertools = "0.10.0" itertools = "0.10.0"
calamine = "0.18.0"
rand = "0.8" rand = "0.8"
[features] [features]

View File

@ -55,8 +55,11 @@ pub fn create_default_context() -> EngineState {
FromYaml, FromYaml,
FromYml, FromYml,
FromTsv, FromTsv,
FromToml,
FromUrl, FromUrl,
FromEml, FromEml,
FromOds,
FromXlsx,
Get, Get,
Griddle, Griddle,
Help, Help,

View File

@ -3,15 +3,21 @@ mod csv;
mod delimited; mod delimited;
mod eml; mod eml;
mod json; mod json;
mod ods;
mod toml;
mod tsv; mod tsv;
mod url; mod url;
mod xlsx;
mod yaml; mod yaml;
pub use self::csv::FromCsv; pub use self::csv::FromCsv;
pub use self::toml::FromToml;
pub use command::From; pub use command::From;
pub use eml::FromEml; pub use eml::FromEml;
pub use json::FromJson; pub use json::FromJson;
pub use ods::FromOds;
pub use tsv::FromTsv; pub use tsv::FromTsv;
pub use url::FromUrl; pub use url::FromUrl;
pub use xlsx::FromXlsx;
pub use yaml::FromYaml; pub use yaml::FromYaml;
pub use yaml::FromYml; pub use yaml::FromYml;

View File

@ -0,0 +1,210 @@
use calamine::*;
use indexmap::map::IndexMap;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use std::io::Cursor;
#[derive(Clone)]
pub struct FromOds;
impl Command for FromOds {
fn name(&self) -> &str {
"from ods"
}
fn signature(&self) -> Signature {
Signature::build("from ods")
.named(
"sheets",
SyntaxShape::List(Box::new(SyntaxShape::String)),
"Only convert specified sheets",
Some('s'),
)
.category(Category::Formats)
}
fn usage(&self) -> &str {
"Parse OpenDocument Spreadsheet(.ods) data and create table."
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
let head = call.head;
let sel_sheets = if let Some(Value::List { vals: columns, .. }) =
call.get_flag(engine_state, stack, "sheets")?
{
convert_columns(columns.as_slice())?
} else {
vec![]
};
from_ods(input, head, sel_sheets)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Convert binary .ods data to a table",
example: "open test.txt | from ods",
result: None,
},
Example {
description: "Convert binary .ods data to a table, specifying the tables",
example: "open test.txt | from ods -s [Spreadsheet1]",
result: None,
},
]
}
}
fn convert_columns(columns: &[Value]) -> Result<Vec<String>, ShellError> {
let res = columns
.iter()
.map(|value| match &value {
Value::String { val: s, .. } => Ok(s.clone()),
_ => Err(ShellError::IncompatibleParametersSingle(
"Incorrect column format, Only string as column name".to_string(),
value.span().unwrap_or_else(|_| Span::unknown()),
)),
})
.collect::<Result<Vec<String>, _>>()?;
Ok(res)
}
fn collect_binary(input: PipelineData) -> Result<Vec<u8>, ShellError> {
let mut bytes = vec![];
let mut values = input.into_iter();
loop {
match values.next() {
Some(Value::Binary { val: b, .. }) => {
bytes.extend_from_slice(&b);
}
Some(x) => {
return Err(ShellError::UnsupportedInput(
"Expected binary from pipeline".to_string(),
x.span().unwrap_or_else(|_| Span::unknown()),
))
}
None => break,
}
}
Ok(bytes)
}
fn from_ods(
input: PipelineData,
head: Span,
sel_sheets: Vec<String>,
) -> Result<PipelineData, ShellError> {
let bytes = collect_binary(input)?;
let buf: Cursor<Vec<u8>> = Cursor::new(bytes);
let mut ods = Ods::<_>::new(buf)
.map_err(|_| ShellError::UnsupportedInput("Could not load ods file".to_string(), head))?;
let mut dict = IndexMap::new();
let mut sheet_names = ods.sheet_names().to_owned();
if !sel_sheets.is_empty() {
sheet_names.retain(|e| sel_sheets.contains(e));
}
for sheet_name in &sheet_names {
let mut sheet_output = vec![];
if let Some(Ok(current_sheet)) = ods.worksheet_range(sheet_name) {
for row in current_sheet.rows() {
let mut row_output = IndexMap::new();
for (i, cell) in row.iter().enumerate() {
let value = match cell {
DataType::Empty => Value::nothing(head),
DataType::String(s) => Value::string(s, head),
DataType::Float(f) => Value::Float {
val: *f,
span: head,
},
DataType::Int(i) => Value::Int {
val: *i,
span: head,
},
DataType::Bool(b) => Value::Bool {
val: *b,
span: head,
},
_ => Value::nothing(head),
};
row_output.insert(format!("Column{}", i), value);
}
let (cols, vals) =
row_output
.into_iter()
.fold((vec![], vec![]), |mut acc, (k, v)| {
acc.0.push(k);
acc.1.push(v);
acc
});
let record = Value::Record {
cols,
vals,
span: head,
};
sheet_output.push(record);
}
dict.insert(
sheet_name,
Value::List {
vals: sheet_output,
span: head,
},
);
} else {
return Err(ShellError::UnsupportedInput(
"Could not load sheet".to_string(),
head,
));
}
}
let (cols, vals) = dict.into_iter().fold((vec![], vec![]), |mut acc, (k, v)| {
acc.0.push(k.clone());
acc.1.push(v);
acc
});
let record = Value::Record {
cols,
vals,
span: head,
};
Ok(PipelineData::Value(record))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(FromOds {})
}
}

View File

@ -0,0 +1,141 @@
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Value,
};
#[derive(Clone)]
pub struct FromToml;
impl Command for FromToml {
fn name(&self) -> &str {
"from toml"
}
fn signature(&self) -> Signature {
Signature::build("from toml").category(Category::Formats)
}
fn usage(&self) -> &str {
"Parse text as .toml and create table."
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
example: "'a = 1' | from toml",
description: "Converts toml formatted string to table",
result: Some(Value::Record {
cols: vec!["a".to_string()],
vals: vec![Value::Int {
val: 1,
span: Span::unknown(),
}],
span: Span::unknown(),
}),
},
Example {
example: "'a = 1
b = [1, 2]' | from toml",
description: "Converts toml formatted string to table",
result: Some(Value::Record {
cols: vec!["a".to_string(), "b".to_string()],
vals: vec![
Value::Int {
val: 1,
span: Span::unknown(),
},
Value::List {
vals: vec![
Value::Int {
val: 1,
span: Span::unknown(),
},
Value::Int {
val: 2,
span: Span::unknown(),
},
],
span: Span::unknown(),
},
],
span: Span::unknown(),
}),
},
]
}
fn run(
&self,
_engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
let span = call.head;
let config = stack.get_config()?;
let mut string_input = input.collect_string("", &config);
string_input.push('\n');
Ok(convert_string_to_value(string_input, span)?.into_pipeline_data())
}
}
fn convert_toml_to_value(value: &toml::Value, span: Span) -> Value {
match value {
toml::Value::Array(array) => {
let v: Vec<Value> = array
.iter()
.map(|x| convert_toml_to_value(x, span))
.collect();
Value::List { vals: v, span }
}
toml::Value::Boolean(b) => Value::Bool { val: *b, span },
toml::Value::Float(f) => Value::Float { val: *f, span },
toml::Value::Integer(i) => Value::Int { val: *i, span },
toml::Value::Table(k) => {
let mut cols = vec![];
let mut vals = vec![];
for item in k {
cols.push(item.0.clone());
vals.push(convert_toml_to_value(item.1, span));
}
Value::Record { cols, vals, span }
}
toml::Value::String(s) => Value::String {
val: s.clone(),
span,
},
toml::Value::Datetime(d) => Value::String {
val: d.to_string(),
span,
},
}
}
pub fn convert_string_to_value(string_input: String, span: Span) -> Result<Value, ShellError> {
let result: Result<toml::Value, toml::de::Error> = toml::from_str(&string_input);
match result {
Ok(value) => Ok(convert_toml_to_value(&value, span)),
Err(_x) => Err(ShellError::CantConvert(
"structured data from toml".into(),
"string".into(),
span,
)),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(FromToml {})
}
}

View File

@ -0,0 +1,210 @@
use calamine::*;
use indexmap::map::IndexMap;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use std::io::Cursor;
#[derive(Clone)]
pub struct FromXlsx;
impl Command for FromXlsx {
fn name(&self) -> &str {
"from xlsx"
}
fn signature(&self) -> Signature {
Signature::build("from xlsx")
.named(
"sheets",
SyntaxShape::List(Box::new(SyntaxShape::String)),
"Only convert specified sheets",
Some('s'),
)
.category(Category::Formats)
}
fn usage(&self) -> &str {
"Parse binary Excel(.xlsx) data and create table."
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
let head = call.head;
let sel_sheets = if let Some(Value::List { vals: columns, .. }) =
call.get_flag(engine_state, stack, "sheets")?
{
convert_columns(columns.as_slice())?
} else {
vec![]
};
from_xlsx(input, head, sel_sheets)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Convert binary .xlsx data to a table",
example: "open test.txt | from xlsx",
result: None,
},
Example {
description: "Convert binary .xlsx data to a table, specifying the tables",
example: "open test.txt | from xlsx -s [Spreadsheet1]",
result: None,
},
]
}
}
fn convert_columns(columns: &[Value]) -> Result<Vec<String>, ShellError> {
let res = columns
.iter()
.map(|value| match &value {
Value::String { val: s, .. } => Ok(s.clone()),
_ => Err(ShellError::IncompatibleParametersSingle(
"Incorrect column format, Only string as column name".to_string(),
value.span().unwrap_or_else(|_| Span::unknown()),
)),
})
.collect::<Result<Vec<String>, _>>()?;
Ok(res)
}
fn collect_binary(input: PipelineData) -> Result<Vec<u8>, ShellError> {
let mut bytes = vec![];
let mut values = input.into_iter();
loop {
match values.next() {
Some(Value::Binary { val: b, .. }) => {
bytes.extend_from_slice(&b);
}
Some(x) => {
return Err(ShellError::UnsupportedInput(
"Expected binary from pipeline".to_string(),
x.span().unwrap_or_else(|_| Span::unknown()),
))
}
None => break,
}
}
Ok(bytes)
}
fn from_xlsx(
input: PipelineData,
head: Span,
sel_sheets: Vec<String>,
) -> Result<PipelineData, ShellError> {
let bytes = collect_binary(input)?;
let buf: Cursor<Vec<u8>> = Cursor::new(bytes);
let mut xlsx = Xlsx::<_>::new(buf)
.map_err(|_| ShellError::UnsupportedInput("Could not load xlsx file".to_string(), head))?;
let mut dict = IndexMap::new();
let mut sheet_names = xlsx.sheet_names().to_owned();
if !sel_sheets.is_empty() {
sheet_names.retain(|e| sel_sheets.contains(e));
}
for sheet_name in &sheet_names {
let mut sheet_output = vec![];
if let Some(Ok(current_sheet)) = xlsx.worksheet_range(sheet_name) {
for row in current_sheet.rows() {
let mut row_output = IndexMap::new();
for (i, cell) in row.iter().enumerate() {
let value = match cell {
DataType::Empty => Value::nothing(head),
DataType::String(s) => Value::string(s, head),
DataType::Float(f) => Value::Float {
val: *f,
span: head,
},
DataType::Int(i) => Value::Int {
val: *i,
span: head,
},
DataType::Bool(b) => Value::Bool {
val: *b,
span: head,
},
_ => Value::nothing(head),
};
row_output.insert(format!("Column{}", i), value);
}
let (cols, vals) =
row_output
.into_iter()
.fold((vec![], vec![]), |mut acc, (k, v)| {
acc.0.push(k);
acc.1.push(v);
acc
});
let record = Value::Record {
cols,
vals,
span: head,
};
sheet_output.push(record);
}
dict.insert(
sheet_name,
Value::List {
vals: sheet_output,
span: head,
},
);
} else {
return Err(ShellError::UnsupportedInput(
"Could not load sheet".to_string(),
head,
));
}
}
let (cols, vals) = dict.into_iter().fold((vec![], vec![]), |mut acc, (k, v)| {
acc.0.push(k.clone());
acc.1.push(v);
acc
});
let record = Value::Record {
cols,
vals,
span: head,
};
Ok(PipelineData::Value(record))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(FromXlsx {})
}
}