Merge pull request #977 from jonathandturner/from_xls

Add from-xlsx for importing excel files
This commit is contained in:
Jonathan Turner 2019-11-17 16:36:22 +13:00 committed by GitHub
commit a7d7098b1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 190 additions and 3 deletions

61
Cargo.lock generated
View File

@ -252,6 +252,20 @@ dependencies = [
"ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "calamine"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"codepage 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)",
"zip 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "cc"
version = "1.0.47"
@ -328,6 +342,14 @@ dependencies = [
"bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "codepage"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "config"
version = "0.9.3"
@ -702,6 +724,14 @@ name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "encoding_rs"
version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "env_logger"
version = "0.6.2"
@ -1580,6 +1610,7 @@ dependencies = [
"bson 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
"byte-unit 3.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)",
"calamine 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)",
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono-humanize 0.0.11 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1875,6 +1906,11 @@ dependencies = [
"inflate 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "podio"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ppv-lite86"
version = "0.2.6"
@ -1948,6 +1984,15 @@ name = "quick-error"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "quick-xml"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quote"
version = "1.0.2"
@ -3028,6 +3073,16 @@ dependencies = [
"linked-hash-map 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "zip"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)",
"podio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"
@ -3060,6 +3115,7 @@ dependencies = [
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c"
"checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb"
"checksum calamine 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "213df9241db37007bf06fb3da8f61f1cddb3badb9a702c62f4e80299d4d2982f"
"checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8"
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
"checksum chrono 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e8493056968583b0193c1bb04d6f7684586f3726992d6c573261941a895dbd68"
@ -3068,6 +3124,7 @@ dependencies = [
"checksum clipboard 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "25a904646c0340239dcf7c51677b33928bf24fdf424b79a57909c0109075b2e7"
"checksum clipboard-win 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e3a093d6fed558e5fe24c3dfc85a68bb68f1c824f440d3ba5aca189e2998786b"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum codepage 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8b0e9222c0cdf2c6ac27d73f664f9520266fa911c3106329d359f8861cb8bde9"
"checksum config 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f9107d78ed62b3fa5a86e7d18e647abed48cfd8f8fab6c72f4cdb982d196f7e6"
"checksum constant_time_eq 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "995a44c877f9212528ccc74b21a232f66ad69001e40ede5bcee2ac9ef2657120"
"checksum core-foundation 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "25b9e03f145fd4f2bf705e07b900cd41fc636598fe5dc452fd0db1441c3f496d"
@ -3109,6 +3166,7 @@ dependencies = [
"checksum dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0ad6bf6a88548d1126045c413548df1453d9be094a8ab9fd59bf1fdd338da4f"
"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
"checksum encode_unicode 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
"checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9"
"checksum env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3"
"checksum failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "f8273f13c977665c5db7eb2b99ae520952fe5ac831ae4cd09d80c4c7042b5ed9"
"checksum failure_derive 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0bc225b78e0391e4b8683440bf2e63c2deeeb2ce5189eab46e2b68c6d3725d08"
@ -3231,6 +3289,7 @@ dependencies = [
"checksum platforms 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "feb3b2b1033b8a60b4da6ee470325f887758c95d5320f52f9ce0df055a55940e"
"checksum plist 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5f2a9f075f6394100e7c105ed1af73fb1859d6fd14e49d4290d578120beb167f"
"checksum png 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8422b27bb2c013dd97b9aef69e161ce262236f49aaf46a0489011c8ff0264602"
"checksum podio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "780fb4b6698bbf9cf2444ea5d22411cef2953f0824b98f33cf454ec5615645bd"
"checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
"checksum pretty-hex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be91bcc43e73799dc46a6c194a55e7aae1d86cc867c860fd4a436019af21bd8c"
"checksum pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f81e1644e1b54f5a68959a29aa86cde704219254669da328ecfdf6a1f09d427"
@ -3239,6 +3298,7 @@ dependencies = [
"checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27"
"checksum ptree 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6b0a3be00b19ee7bd33238c1c523a7ab4df697345f6b36f90827a7860ea938d4"
"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0"
"checksum quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcdba8c8d71275493d966ef052a88726ac8590c15a09968b32158205c672ef"
"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
"checksum rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae1b169243eaf61759b8475a998f0a385e42042370f3a7dbaf35246eacc8412"
"checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853"
@ -3365,3 +3425,4 @@ dependencies = [
"checksum xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5"
"checksum xmlparser 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8110496c5bcc0d966b0b2da38d5a791aa139eeb0b80e7840a7463c2b806921eb"
"checksum yaml-rust 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "65923dd1784f44da1d2c3dbbc5e822045628c590ba72123e1c73d3c230c4434d"
"checksum zip 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3c21bb410afa2bd823a047f5bda3adb62f51074ac7e06263b2c97ecdd47e9fc6"

View File

@ -77,6 +77,7 @@ trash = "1.0.0"
regex = "1"
cfg-if = "0.1"
strip-ansi-escapes = "0.1.0"
calamine = "0.16"
neso = { version = "0.5.0", optional = true }
crossterm = { version = "0.10.2", optional = true }

View File

@ -297,6 +297,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
whole_stream_command(FromSQLite),
whole_stream_command(FromTOML),
whole_stream_command(FromURL),
whole_stream_command(FromXLSX),
whole_stream_command(FromXML),
whole_stream_command(FromYAML),
whole_stream_command(FromYML),

View File

@ -32,6 +32,7 @@ pub(crate) mod from_ssv;
pub(crate) mod from_toml;
pub(crate) mod from_tsv;
pub(crate) mod from_url;
pub(crate) mod from_xlsx;
pub(crate) mod from_xml;
pub(crate) mod from_yaml;
pub(crate) mod get;
@ -118,6 +119,7 @@ pub(crate) use from_ssv::FromSSV;
pub(crate) use from_toml::FromTOML;
pub(crate) use from_tsv::FromTSV;
pub(crate) use from_url::FromURL;
pub(crate) use from_xlsx::FromXLSX;
pub(crate) use from_xml::FromXML;
pub(crate) use from_yaml::FromYAML;
pub(crate) use from_yaml::FromYML;

View File

@ -158,8 +158,8 @@ fn from_sqlite(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputSt
}
}
_ => yield Err(ShellError::labeled_error_with_secondary(
"Expected a string from pipeline",
"requires string input",
"Expected binary data from pipeline",
"requires binary data input",
&tag,
"value originates from here",
value_tag,

102
src/commands/from_xlsx.rs Normal file
View File

@ -0,0 +1,102 @@
use crate::commands::WholeStreamCommand;
use crate::data::{Primitive, Value};
use crate::prelude::*;
use crate::{TaggedDictBuilder, TaggedListBuilder};
use calamine::*;
use std::io::Cursor;
pub struct FromXLSX;
#[derive(Deserialize)]
pub struct FromXLSXArgs {
headerless: bool,
}
impl WholeStreamCommand for FromXLSX {
fn name(&self) -> &str {
"from-xlsx"
}
fn signature(&self) -> Signature {
Signature::build("from-xlsx")
.switch("headerless", "don't treat the first row as column names")
}
fn usage(&self) -> &str {
"Parse binary Excel(.xlsx) data and create table."
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, from_xlsx)?.run()
}
}
fn from_xlsx(
FromXLSXArgs {
headerless: _headerless,
}: FromXLSXArgs,
runnable_context: RunnableContext,
) -> Result<OutputStream, ShellError> {
let input = runnable_context.input;
let tag = runnable_context.name;
let stream = async_stream! {
let values: Vec<Tagged<Value>> = input.values.collect().await;
for value in values {
let value_tag = value.tag();
match value.item {
Value::Primitive(Primitive::Binary(vb)) => {
let mut buf: Cursor<Vec<u8>> = Cursor::new(vb);
let mut xls = Xlsx::<_>::new(buf).unwrap();
let mut dict = TaggedDictBuilder::new(&tag);
let sheet_names = xls.sheet_names().to_owned();
for sheet_name in &sheet_names {
let mut sheet_output = TaggedListBuilder::new(&tag);
let current_sheet = xls.worksheet_range(sheet_name).unwrap().unwrap();
for row in current_sheet.rows() {
let mut row_output = TaggedDictBuilder::new(&tag);
for (i, cell) in row.iter().enumerate() {
let value = match cell {
DataType::Empty => Value::nothing(),
DataType::String(s) => Value::string(s),
DataType::Float(f) => Value::decimal(*f),
DataType::Int(i) => Value::int(*i),
DataType::Bool(b) => Value::boolean(*b),
_ => Value::nothing(),
};
row_output.insert(&format!("Column{}", i), value);
}
sheet_output.push(row_output.into_tagged_value().item);
}
dict.insert(sheet_name, sheet_output.into_tagged_value().item);
}
yield ReturnSuccess::value(dict.into_tagged_value());
}
_ => yield Err(ShellError::labeled_error_with_secondary(
"Expected binary data from pipeline",
"requires binary data input",
&tag,
"value originates from here",
value_tag,
)),
}
}
};
Ok(stream.to_output_stream())
}

View File

@ -35,7 +35,7 @@ pub use crate::utils::{did_you_mean, AbsoluteFile, AbsolutePath, RelativePath};
pub use cli::cli;
pub use data::base::{Primitive, Value};
pub use data::config::{config_path, APP_INFO};
pub use data::dict::{Dictionary, TaggedDictBuilder};
pub use data::dict::{Dictionary, TaggedDictBuilder, TaggedListBuilder};
pub use data::meta::{
tag_for_tagged_list, HasFallibleSpan, HasSpan, Span, Spanned, SpannedItem, Tag, Tagged,
TaggedItem,

View File

@ -498,6 +498,10 @@ mod tests {
loc: fixtures().join("sample.url"),
at: 0
},
Res {
loc: fixtures().join("sample_data.xlsx"),
at: 0
},
Res {
loc: fixtures().join("sgml_description.json"),
at: 0

View File

@ -495,6 +495,22 @@ fn can_convert_table_to_bson_and_back_into_table() {
assert_eq!(actual, "whel");
}
#[test]
fn can_read_excel_file() {
let actual = nu!(
cwd: "tests/fixtures/formats", h::pipeline(
r#"
open sample_data.xlsx
| get SalesOrders
| nth 4
| get Column2
| echo $it
"#
));
assert_eq!(actual, "Gill");
}
#[test]
fn can_convert_table_to_sqlite_and_back_into_table() {
let actual = nu!(

BIN
tests/fixtures/formats/sample_data.xlsx vendored Normal file

Binary file not shown.