From ae5f3c8210cd149209698aac33c130c8b4552939 Mon Sep 17 00:00:00 2001 From: Sam Hedin Date: Sun, 29 Mar 2020 04:05:57 +0200 Subject: [PATCH] WIP: 1486/first row as headers (#1530) * headers plugin * Remove plugin * Add non-functioning headers command * Add ability to extract headers from first row * Refactor header extraction * Rebuild indexmap with proper headers * Rebuild result properly * Compiling, probably wrapped too much? * Refactoring * Deal with case of empty header cell * Deal with case of empty header cell * Fix formatting * Fix linting, attempt 2. * Move whole_stream_command(Headers) to more appropriate section * ... more linting * Return Err(ShellError...) instead of panic, yield each row instead of entire table * Insert Column[index] if no header info is found. * Update error description * Add initial test * Add tests for headers command * Lint test cases in headers * Change ShellError for headers, Add sample_headers file to utils.rs * Add empty sheet to test file * Revert "Add empty sheet to test file" This reverts commit a4bf38a31d7e242a5a9f9045f22eb4975ec5d6d0. * Show error message when given empty table --- crates/nu-cli/src/cli.rs | 1 + crates/nu-cli/src/commands.rs | 2 + crates/nu-cli/src/commands/headers.rs | 80 +++++++++++++++++++++ crates/nu-cli/src/utils.rs | 4 ++ crates/nu-cli/tests/commands/headers.rs | 31 ++++++++ crates/nu-cli/tests/commands/mod.rs | 1 + tests/fixtures/formats/sample_headers.xlsx | Bin 0 -> 4807 bytes 7 files changed, 119 insertions(+) create mode 100644 crates/nu-cli/src/commands/headers.rs create mode 100644 crates/nu-cli/tests/commands/headers.rs create mode 100644 tests/fixtures/formats/sample_headers.xlsx diff --git a/crates/nu-cli/src/cli.rs b/crates/nu-cli/src/cli.rs index ff8df1ea6..fd639a770 100644 --- a/crates/nu-cli/src/cli.rs +++ b/crates/nu-cli/src/cli.rs @@ -311,6 +311,7 @@ pub fn create_default_context( whole_stream_command(Shuffle), whole_stream_command(Wrap), whole_stream_command(Pivot), + whole_stream_command(Headers), // Data processing whole_stream_command(Histogram), whole_stream_command(Sum), diff --git a/crates/nu-cli/src/commands.rs b/crates/nu-cli/src/commands.rs index 9afd76f45..03b9a826c 100644 --- a/crates/nu-cli/src/commands.rs +++ b/crates/nu-cli/src/commands.rs @@ -45,6 +45,7 @@ pub(crate) mod from_xml; pub(crate) mod from_yaml; pub(crate) mod get; pub(crate) mod group_by; +pub(crate) mod headers; pub(crate) mod help; pub(crate) mod histogram; pub(crate) mod history; @@ -155,6 +156,7 @@ pub(crate) use from_yaml::FromYAML; pub(crate) use from_yaml::FromYML; pub(crate) use get::Get; pub(crate) use group_by::GroupBy; +pub(crate) use headers::Headers; pub(crate) use help::Help; pub(crate) use histogram::Histogram; pub(crate) use history::History; diff --git a/crates/nu-cli/src/commands/headers.rs b/crates/nu-cli/src/commands/headers.rs new file mode 100644 index 000000000..e03983095 --- /dev/null +++ b/crates/nu-cli/src/commands/headers.rs @@ -0,0 +1,80 @@ +use crate::commands::WholeStreamCommand; +use crate::context::CommandRegistry; +use crate::prelude::*; +use futures::stream::StreamExt; +use indexmap::IndexMap; +use nu_errors::ShellError; +use nu_protocol::Dictionary; +use nu_protocol::{ReturnSuccess, Signature, UntaggedValue, Value}; + +pub struct Headers; +#[derive(Deserialize)] +pub struct HeadersArgs {} + +impl WholeStreamCommand for Headers { + fn name(&self) -> &str { + "headers" + } + fn signature(&self) -> Signature { + Signature::build("headers") + } + fn usage(&self) -> &str { + "Use the first row of the table as column names" + } + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, headers)?.run() + } +} + +pub fn headers( + HeadersArgs {}: HeadersArgs, + RunnableContext { input, .. }: RunnableContext, +) -> Result { + let stream = async_stream! { + let rows: Vec = input.values.collect().await; + + if rows.len() < 1 { + yield Err(ShellError::untagged_runtime_error("Couldn't find headers, was the input a properly formatted, non-empty table?")); + } + + //the headers are the first row in the table + let headers: Vec = match &rows[0].value { + UntaggedValue::Row(d) => { + Ok(d.entries.iter().map(|(k, v)| { + match v.as_string() { + Ok(s) => s, + Err(_) => { //If a cell that should contain a header name is empty, we name the column Column[index] + match d.entries.get_full(k) { + Some((index, _, _)) => format!("Column{}", index), + None => "unknownColumn".to_string() + } + } + } + }).collect()) + } + _ => Err(ShellError::unexpected_eof("Could not get headers, is the table empty?", rows[0].tag.span)) + }?; + + //Each row is a dictionary with the headers as keys + for r in rows.iter().skip(1) { + match &r.value { + UntaggedValue::Row(d) => { + let mut i = 0; + let mut entries = IndexMap::new(); + for (_, v) in d.entries.iter() { + entries.insert(headers[i].clone(), v.clone()); + i += 1; + } + yield Ok(ReturnSuccess::Value(UntaggedValue::Row(Dictionary{entries}).into_value(r.tag.clone()))) + } + _ => yield Err(ShellError::unexpected_eof("Couldn't iterate through rows, was the input a properly formatted table?", r.tag.span)) + } + } + }; + + Ok(stream.to_output_stream()) +} diff --git a/crates/nu-cli/src/utils.rs b/crates/nu-cli/src/utils.rs index 0fddf91ff..6653a8018 100644 --- a/crates/nu-cli/src/utils.rs +++ b/crates/nu-cli/src/utils.rs @@ -317,6 +317,10 @@ mod tests { loc: fixtures().join("sample_data.xlsx"), at: 0 }, + Res { + loc: fixtures().join("sample_headers.xlsx"), + at: 0 + }, Res { loc: fixtures().join("script.nu"), at: 0 diff --git a/crates/nu-cli/tests/commands/headers.rs b/crates/nu-cli/tests/commands/headers.rs new file mode 100644 index 000000000..be5ffb5c7 --- /dev/null +++ b/crates/nu-cli/tests/commands/headers.rs @@ -0,0 +1,31 @@ +use nu_test_support::{nu, pipeline}; + +#[test] +fn headers_uses_first_row_as_header() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + open sample_headers.xlsx + | get Sheet1 + | headers + | get header0 + | from-json"# + )); + + assert_eq!(actual, "r1c0r2c0") +} + +#[test] +fn headers_adds_missing_column_name() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + open sample_headers.xlsx + | get Sheet1 + | headers + | get Column1 + | from-json"# + )); + + assert_eq!(actual, "r1c1r2c1") +} diff --git a/crates/nu-cli/tests/commands/mod.rs b/crates/nu-cli/tests/commands/mod.rs index 49826b1f3..b2587415f 100644 --- a/crates/nu-cli/tests/commands/mod.rs +++ b/crates/nu-cli/tests/commands/mod.rs @@ -10,6 +10,7 @@ mod first; mod format; mod get; mod group_by; +mod headers; mod histogram; mod insert; mod last; diff --git a/tests/fixtures/formats/sample_headers.xlsx b/tests/fixtures/formats/sample_headers.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f05428dd584703152595f25f1aa865d70a63cf8a GIT binary patch literal 4807 zcmaJ_1yoe+x~7Ki7#iu65G18bNhwKbhHg+g2c$a`gkLG?E@?zMhHmLbMoJivx`Up3 z<#^BW*=tR${eFAC=gsF+Q$j`|MnFeLN9a!URYSM~Ecj<*SCG9MH`ncJY2vUV@&o*^ zeZRpD^^T~OK3;RmhVy8320MS-t#53@hHhCDT)zz8_9G+&`1k)t3<3NO3nz0mS0`sTZgVGB5SOQeeT;_T zQ!o#)A515{RpwlmS)iSe-^VqgkB4H(w!!=vtIB)~bjiS^qR{66`j3z5@ReY;okwE%3DbGf zvP&N%nv~1}ZHvT;@l(G%Ec(zb{me{ZmXs4QtW~0ipG;p(#>Hy8zmFjEK9+!9e`b+4 zIM}q{J*w$L+-8J#wWwO9<>K3ESfHUH^^RvdPgHlon@JT82IVo{g;0rhr{eq<5=8@Y z_3WsuN~$sV9|9>@sq7?QxiTD6h>310C5QTNH~Fu>s`tWAn#~yOfT7%YFOlQA3KrYHHUh%rSY}?7dfa>0KFBkMVsHPk(Bw(n%DbC4e#p080%{fu zSs@}I%>UI(_;g;^$q|Eps+s{1sA^U!q46d~sAG>j+w5{310CQjiZ@$PHRjuz~ zHMo8AIu3Mw4fWKP=P#{hHZ4?KLf+JPi0Ntbv$ZRf*UZD#{XE%pa-Xnf5OzP>?jt9h zJ$Q5q^a~89&`7}GI#~}Sn@VQ(Lg3Iyad@5C_n3oc;A6gjdQFHGw-pwyjpj6UC{eKH z`+jG{M2XcO;Wwe|+yU20_dW|-&{!eQn4ln8eHX!{2_>%qv^EFycFR@u;76w-xv}^b zdk`1}#Eq2M7Jsk>8bCM4&l+~S%Qzi@-BhbsjAM01qC;~9Ri@v8-P^T-*EqLKkx84G z?ZJ8Qg|qeF;~58jho?Qao4c1i$nBOUsIKADd3Y=@l~-JfC#GtcD1yl}^IMdyi7{;E zzzP|i9rqHT(x1zhUS#BAgsQM7O)2_S?e~fGuuM-Ez1=or67tTS<6*M$bSE~G1!Ze6 zACtEl4oduEYH>e-ZAEd6ysY38>0wy!Oyv06V2vnAsdRF#5>5#~kOC%qxUdFO$^oOM zK0a0!1X*M-N|iJ5j9G2JX;(qiq6Fva{4 z`>U8!d`0rU6voTCjFo6-85cA^^`}*Uwea!5;0jBx75U=+_6Xf#6lMKbgE4C{i<(vHxx-Gdl2Mwy^lCLv1G)71xruFsG zXMQYCI#VSG5RfQVTgAVmd?Oh14N?e-0AF61S?u&*tEGC99DL7JXh5jopii=C;SuEv zwLCJ-+AQ(G2C+Y)3@=^}`cT>zoF@qdp%(AS<#oKsl%Jm}JUp*b^ZRIASch}kGk9jQ ziNj~`t`wa1zn8G!SH49i!L6HF<&n15iviyonl>oEe~N?1{)Jcm ziC@!rTfY5`<7e1=TdDEDmbMf_K2;Yyxo>zsume1nxID|9VM^VcN#d9S7&?mRW1DfR zs3B{D7~587#SAKFxjV(Vc~e@<&w7NZotXnxv8O~R4^)lU3^g;`Y21^K5xW9-bV-SV zPpm$i3El>@v@&O8Dm?9r3I7(*c)tSL!^zdo%*n~_XL;7_bDZbFZ-H^3EIM!+*;1w= z>%_(*Ea2z&r6tM(?IMYL4^y*m770LkY;!Vr0>W5J&1qixiy3x1)Sq;-o(*=Gwu`DY zaD~iu?aiBuym>H8uLF3kFEz+zGCQ=paFjbFKJL9g>H{*@rW3$gs`mo$*UKyfWby;EKe-Mt`hcUmn1=W-On3)}uE;WL1N=d>MU3lnlle zAZeYe%q|0&I%Hh72)dYC){t8R5UD5MXrR{9_*XQ z4*EC0lOMMbh(R3{p~C4nc*%RlL-7=K$02=eIZ=i8X+UBL)o!rA!sDgvET?tEerTw+ z&o{=pm&v~UyVulwZ91L1q_;5><;e$y!3&@x^50^H_+LfP+SC7RDq< z*>>_^%fZA#NBz4aNb^wUt69IJWFnJ2R>7{!(1-B2T@-~%dz>8Qz-|nBjlOwl3YKJG z3X=^Zv*zk<%0_{|Ud-O}cu^gN2TU88w8#xnH z>79+%iyW`gbUIT79bT>IZN;N&-ACg`TNPsUqdEKWwq;`-8fUgDCi`Fj)x3r7>{)jO zuC}1li6D&Iu5m`ByHy>r$EZTn!F%$9j>C0P?d3oK;w=GzM(W@;`1YWG?E)0P2>3Pe z-A$ce(_ey$-De)`SNm5a@~|!iH1lYomT0N+t24xI_0^G1fyAlW8-FfJ15D7~NYcpk zaqE+l-G?!|;hi$rs72W*p`W^5qPGNHeS@x$YY$~FE?YCjRoo*iZYG)uBvs(nuRa#i z<1*`re`J{3r>I&u-PZ_)s&fwBYeq za-~V&8`X$2Tjt>AMf>h>z9F_DK~Fuy&|eOIry<~hF==c`v+s@NgfU;L=D&Bzczjf! zCRbX%xZsO)%gob*bMG61v*c=fp}4PO^qtI(5vyfE6>nPrsHT+y?T5m093DgrMH6Afi5X&lcUL(gqQ86E-;-|dK)p!r&F1VkhWM(-c zW9ZnTn~=$#{VGm^w}psU*E>6k<@I&m1djJY!r7?U^ff+DU*4f$Cq(P~q)XBGfXY zSR>WYB3u^fhmhD7r_pxIE2fGWa$M)(*BQ$pPTR{~kdH1AD%BrHn>)%?&9(0eHLYoX z(F)$2CK)?1iMP*PTZt#;vQZ_4s{`AUj19hzr;*$ZR;g$YWsqt@^^60tMkK&H>_>w6aSQB3L49F5{u8r~BkABt=CdVd1)Zr-Ln4mV1nE9kh~4K6%-= zt1`PQ6OM!uXy^!g8Hnc9TL@5tqDlepOv+A$3uP)iqR+EYyTtf`59Jlti19wYAgONF zJ;Izh@$~(SnS;C?|7u(6!z^;L?_|ij3%G}xbc3;B|A?NQ<;$ErP}Jm27wgwaVe3nR z75ymi?wBEnIK?5hJnuxOji{sy7eS*7vg*~gIxgLoi}QXH&;!j4w160TZekM6`0Ry@ z23_nhU}z3zR??JrA&QBgF*Fyba#93g5keTUPc|eqQ>zfZ>#`-Qg@b{#5DO=Y&Chj+ zH&zw(xE~MrCDm}C%1`6lq7D4Xs;Sbcb&5Vt-|EJLZG`}m3ZpGqnw87iK|C#?wI{s;WcM`o7P`|MQ55a$v zQ-7M?9Y}6R)!%4D{b_o)`~GQsx0&D0EWhy${nvB