From 004d7b5ff0826ad8182c42006f26fb078ec8de97 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Tue, 1 Feb 2022 12:45:48 -0600 Subject: [PATCH] query command with json, web, xml (#870) * query command with json, web, xml * query xml now working * clippy * comment out web tests * Initial work on query web For now we can query everything except tables * Support for querying tables Now we can query multiple tables just like before, now the only thing missing is the test coverage * finish off * comment out web test Co-authored-by: Luccas Mateus de Medeiros Gomes --- Cargo.lock | 435 +++++- Cargo.toml | 12 +- crates/nu-engine/src/documentation.rs | 2 +- crates/nu-engine/src/lib.rs | 2 +- crates/nu-plugin/src/plugin_capnp.rs | 26 +- .../src/serializers/capnp/schema/plugin.capnp | 12 +- .../src/serializers/capnp/signature.rs | 12 + crates/nu_plugin_query/Cargo.toml | 19 + crates/nu_plugin_query/src/lib.rs | 12 + crates/nu_plugin_query/src/main.rs | 6 + crates/nu_plugin_query/src/nu/mod.rs | 70 + crates/nu_plugin_query/src/query.rs | 75 + crates/nu_plugin_query/src/query_json.rs | 151 ++ crates/nu_plugin_query/src/query_web.rs | 303 ++++ crates/nu_plugin_query/src/query_xml.rs | 188 +++ crates/nu_plugin_query/src/web_tables.rs | 1227 +++++++++++++++++ src/plugins/nu_plugin_extra_query.rs | 6 + 17 files changed, 2527 insertions(+), 31 deletions(-) create mode 100644 crates/nu_plugin_query/Cargo.toml create mode 100644 crates/nu_plugin_query/src/lib.rs create mode 100644 crates/nu_plugin_query/src/main.rs create mode 100644 crates/nu_plugin_query/src/nu/mod.rs create mode 100644 crates/nu_plugin_query/src/query.rs create mode 100644 crates/nu_plugin_query/src/query_json.rs create mode 100644 crates/nu_plugin_query/src/query_web.rs create mode 100644 crates/nu_plugin_query/src/query_xml.rs create mode 100644 crates/nu_plugin_query/src/web_tables.rs create mode 100644 src/plugins/nu_plugin_extra_query.rs diff --git a/Cargo.lock b/Cargo.lock index 6476e8e807..cf85e3a9e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -463,7 +463,7 @@ checksum = "58549f1842da3080ce63002102d5bc954c7bc843d4f47818e642abdc36253552" dependencies = [ "chrono", "chrono-tz-build", - "phf", + "phf 0.10.0", ] [[package]] @@ -473,8 +473,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db058d493fb2f65f41861bfed7e3fe6335264a9f0f92710cab5bdf01fef09069" dependencies = [ "parse-zoneinfo", - "phf", - "phf_codegen", + "phf 0.10.0", + "phf_codegen 0.10.0", ] [[package]] @@ -513,6 +513,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + [[package]] name = "core-foundation" version = "0.9.2" @@ -632,6 +638,33 @@ dependencies = [ "generic-array 0.14.4", ] +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa 0.4.8", + "matches", + "phf 0.8.0", + "proc-macro2", + "quote", + "smallvec", + "syn", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "cstr_core" version = "0.2.4" @@ -690,6 +723,19 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35" +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + [[package]] name = "dialoguer" version = "0.9.0" @@ -798,6 +844,15 @@ version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0" +[[package]] +name = "dtoa-short" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bde03329ae10e79ede66c9ce4dc930aa8599043b0743008548680f25b91502d6" +dependencies = [ + "dtoa", +] + [[package]] name = "dtparse" version = "1.2.0" @@ -817,6 +872,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541" +[[package]] +name = "ego-tree" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" + [[package]] name = "either" version = "1.6.1" @@ -874,6 +935,7 @@ dependencies = [ "nu_plugin_example", "nu_plugin_gstat", "nu_plugin_inc", + "nu_plugin_query", "pretty_assertions", "pretty_env_logger", "reedline", @@ -983,6 +1045,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.18" @@ -1072,6 +1144,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.12.4" @@ -1100,6 +1181,15 @@ dependencies = [ "version_check 0.9.3", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -1154,6 +1244,12 @@ dependencies = [ "url", ] +[[package]] +name = "gjson" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4599d0e9dce476280e2da1f334811e2b26d63a6b000e13b7b50cc980bae49698" + [[package]] name = "glob" version = "0.3.0" @@ -1251,6 +1347,20 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "html5ever" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "htmlescape" version = "0.3.1" @@ -1676,6 +1786,12 @@ dependencies = [ "libc", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + [[package]] name = "malloc_buf" version = "0.0.6" @@ -1685,6 +1801,20 @@ dependencies = [ "libc", ] +[[package]] +name = "markup5ever" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd" +dependencies = [ + "log", + "phf 0.8.0", + "phf_codegen 0.8.0", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "matches" version = "0.1.9" @@ -1840,6 +1970,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + [[package]] name = "nix" version = "0.23.0" @@ -1966,7 +2102,7 @@ dependencies = [ "pathdiff", "polars", "quick-xml 0.22.0", - "rand", + "rand 0.8.4", "rayon", "regex", "reqwest", @@ -2056,7 +2192,7 @@ version = "0.41.0" dependencies = [ "heapless 0.7.9", "nu-ansi-term", - "rand", + "rand 0.8.4", ] [[package]] @@ -2137,7 +2273,20 @@ version = "0.1.0" dependencies = [ "nu-plugin", "nu-protocol", - "semver", + "semver 0.11.0", +] + +[[package]] +name = "nu_plugin_query" +version = "0.1.0" +dependencies = [ + "gjson", + "nu-engine", + "nu-plugin", + "nu-protocol", + "scraper", + "sxd-document", + "sxd-xpath", ] [[package]] @@ -2454,6 +2603,12 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" +[[package]] +name = "peresil" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f658886ed52e196e850cfbbfddab9eaa7f6d90dd0929e264c31e5cec07e09e57" + [[package]] name = "pest" version = "2.1.3" @@ -2463,13 +2618,34 @@ dependencies = [ "ucd-trie", ] +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros", + "phf_shared 0.8.0", + "proc-macro-hack", +] + [[package]] name = "phf" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9fc3db1018c4b59d7d582a739436478b6035138b6aecbce989fc91c3e98409f" dependencies = [ - "phf_shared", + "phf_shared 0.10.0", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", ] [[package]] @@ -2478,8 +2654,18 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.10.0", + "phf_shared 0.10.0", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", ] [[package]] @@ -2488,8 +2674,31 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" dependencies = [ - "phf_shared", - "rand", + "phf_shared 0.10.0", + "rand 0.8.4", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", ] [[package]] @@ -2559,7 +2768,7 @@ dependencies = [ "num_cpus", "polars-arrow", "prettytable-rs", - "rand", + "rand 0.8.4", "rand_distr", "rayon", "regex", @@ -2613,6 +2822,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "predicates" version = "2.1.0" @@ -2676,6 +2891,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + [[package]] name = "proc-macro2" version = "1.0.33" @@ -2734,6 +2955,20 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc 0.2.0", + "rand_pcg", +] + [[package]] name = "rand" version = "0.8.4" @@ -2741,9 +2976,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" dependencies = [ "libc", - "rand_chacha", + "rand_chacha 0.3.1", "rand_core 0.6.3", - "rand_hc", + "rand_hc 0.3.1", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", ] [[package]] @@ -2761,6 +3006,9 @@ name = "rand_core" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] [[package]] name = "rand_core" @@ -2778,7 +3026,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "964d548f8e7d12e102ef183a0de7e98180c9f8729f555897a857b96e48122d2f" dependencies = [ "num-traits", - "rand", + "rand 0.8.4", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", ] [[package]] @@ -2790,6 +3047,15 @@ dependencies = [ "rand_core 0.6.3", ] +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + [[package]] name = "rand_xoshiro" version = "0.4.0" @@ -3020,6 +3286,15 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver 1.0.4", +] + [[package]] name = "ryu" version = "1.0.6" @@ -3051,6 +3326,22 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "scraper" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e02aa790c80c2e494130dec6a522033b6a23603ffc06360e9fe6c611ea2c12" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "matches", + "selectors", + "smallvec", + "tendril", +] + [[package]] name = "security-framework" version = "2.4.2" @@ -3074,6 +3365,26 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags", + "cssparser", + "derive_more", + "fxhash", + "log", + "matches", + "phf 0.8.0", + "phf_codegen 0.8.0", + "precomputed-hash", + "servo_arc", + "smallvec", + "thin-slice", +] + [[package]] name = "semver" version = "0.11.0" @@ -3083,6 +3394,12 @@ dependencies = [ "semver-parser", ] +[[package]] +name = "semver" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" + [[package]] name = "semver-parser" version = "0.10.2" @@ -3168,6 +3485,16 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + [[package]] name = "sha2" version = "0.9.8" @@ -3331,6 +3658,32 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3ff2f71c82567c565ba4b3009a9350a96a7269eaa4001ebedae926230bc2254" +[[package]] +name = "string_cache" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "923f0f39b6267d37d23ce71ae7235602134b250ace715dd2c90421998ddac0c6" +dependencies = [ + "lazy_static", + "new_debug_unreachable", + "parking_lot", + "phf_shared 0.8.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro2", + "quote", +] + [[package]] name = "strip-ansi-escapes" version = "0.1.1" @@ -3368,6 +3721,27 @@ dependencies = [ "atty", ] +[[package]] +name = "sxd-document" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94d82f37be9faf1b10a82c4bd492b74f698e40082f0f40de38ab275f31d42078" +dependencies = [ + "peresil", + "typed-arena", +] + +[[package]] +name = "sxd-xpath" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36e39da5d30887b5690e29de4c5ebb8ddff64ebd9933f98a01daaa4fd11b36ea" +dependencies = [ + "peresil", + "quick-error", + "sxd-document", +] + [[package]] name = "syn" version = "1.0.82" @@ -3415,12 +3789,23 @@ checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ "cfg-if", "libc", - "rand", + "rand 0.8.4", "redox_syscall 0.2.10", "remove_dir_all", "winapi", ] +[[package]] +name = "tendril" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9ef557cb397a4f0a5a3a628f06515f78563f2209e64d47055d9dc6052bf5e33" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "term" version = "0.5.2" @@ -3478,6 +3863,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + [[package]] name = "thiserror" version = "1.0.30" @@ -3629,6 +4020,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" +[[package]] +name = "typed-arena" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9b2228007eba4120145f785df0f6c92ea538f5a3635a612ecf4e334c8c1446d" + [[package]] name = "typenum" version = "1.14.0" @@ -3750,6 +4147,12 @@ dependencies = [ "log", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8-width" version = "0.1.5" diff --git a/Cargo.toml b/Cargo.toml index 3d62988315..1029f669a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ members = [ "crates/nu_plugin_inc", "crates/nu_plugin_gstat", "crates/nu_plugin_example", + "crates/nu_plugin_query", ] [dependencies] @@ -48,7 +49,7 @@ pretty_env_logger = "0.4.0" nu_plugin_inc = { version = "0.1.0", path = "./crates/nu_plugin_inc", optional = true } nu_plugin_example = { version = "0.1.0", path = "./crates/nu_plugin_example", optional = true } nu_plugin_gstat = { version = "0.1.0", path = "./crates/nu_plugin_gstat", optional = true } - +nu_plugin_query = { version = "0.1.0", path = "./crates/nu_plugin_query", optional = true } [dev-dependencies] tempfile = "3.2.0" @@ -73,6 +74,7 @@ extra = [ "dataframe", "gstat", "zip-support", + "query", ] wasi = ["inc"] @@ -80,13 +82,12 @@ wasi = ["inc"] # Stable (Default) inc = ["nu_plugin_inc"] example = ["nu_plugin_example"] - which = ["nu-command/which"] # Extra gstat = ["nu_plugin_gstat"] - zip-support = ["nu-command/zip"] +query = ["nu_plugin_query"] # Dataframe feature for nushell dataframe = ["nu-command/dataframe"] @@ -111,6 +112,11 @@ name = "nu_plugin_gstat" path = "src/plugins/nu_plugin_extra_gstat.rs" required-features = ["gstat"] +[[bin]] +name = "nu_plugin_query" +path = "src/plugins/nu_plugin_extra_query.rs" +required-features = ["query"] + # Main nu binary [[bin]] name = "engine-q" diff --git a/crates/nu-engine/src/documentation.rs b/crates/nu-engine/src/documentation.rs index f470e69021..357b549c90 100644 --- a/crates/nu-engine/src/documentation.rs +++ b/crates/nu-engine/src/documentation.rs @@ -262,7 +262,7 @@ pub fn get_documentation( long_desc } -fn get_flags_section(signature: &Signature) -> String { +pub fn get_flags_section(signature: &Signature) -> String { let mut long_desc = String::new(); long_desc.push_str("\nFlags:\n"); for flag in &signature.named { diff --git a/crates/nu-engine/src/lib.rs b/crates/nu-engine/src/lib.rs index 6bee8e6c77..f43b780d01 100644 --- a/crates/nu-engine/src/lib.rs +++ b/crates/nu-engine/src/lib.rs @@ -1,6 +1,6 @@ mod call_ext; pub mod column; -mod documentation; +pub mod documentation; pub mod env; mod eval; mod glob_from; diff --git a/crates/nu-plugin/src/plugin_capnp.rs b/crates/nu-plugin/src/plugin_capnp.rs index c322e9927e..15f4b51cdf 100644 --- a/crates/nu-plugin/src/plugin_capnp.rs +++ b/crates/nu-plugin/src/plugin_capnp.rs @@ -1,6 +1,6 @@ // @generated by the capnpc-rust plugin to the Cap'n Proto schema compiler. // DO NOT EDIT. -// source: plugin.capnp +// source: crates/nu-plugin/src/serializers/capnp/schema/plugin.capnp pub mod err { /* T */ @@ -2252,9 +2252,15 @@ pub enum Category { Filters = 7, Formats = 8, Math = 9, - Strings = 10, - System = 11, - Viewers = 12, + Network = 10, + Random = 11, + Platform = 12, + Shells = 13, + Strings = 14, + System = 15, + Viewers = 16, + Hash = 17, + Generators = 18, } impl ::capnp::traits::FromU16 for Category { #[inline] @@ -2270,9 +2276,15 @@ impl ::capnp::traits::FromU16 for Category { 7 => ::core::result::Result::Ok(Category::Filters), 8 => ::core::result::Result::Ok(Category::Formats), 9 => ::core::result::Result::Ok(Category::Math), - 10 => ::core::result::Result::Ok(Category::Strings), - 11 => ::core::result::Result::Ok(Category::System), - 12 => ::core::result::Result::Ok(Category::Viewers), + 10 => ::core::result::Result::Ok(Category::Network), + 11 => ::core::result::Result::Ok(Category::Random), + 12 => ::core::result::Result::Ok(Category::Platform), + 13 => ::core::result::Result::Ok(Category::Shells), + 14 => ::core::result::Result::Ok(Category::Strings), + 15 => ::core::result::Result::Ok(Category::System), + 16 => ::core::result::Result::Ok(Category::Viewers), + 17 => ::core::result::Result::Ok(Category::Hash), + 18 => ::core::result::Result::Ok(Category::Generators), n => ::core::result::Result::Err(::capnp::NotInSchema(n)), } } diff --git a/crates/nu-plugin/src/serializers/capnp/schema/plugin.capnp b/crates/nu-plugin/src/serializers/capnp/schema/plugin.capnp index ffec75e720..41533d1903 100644 --- a/crates/nu-plugin/src/serializers/capnp/schema/plugin.capnp +++ b/crates/nu-plugin/src/serializers/capnp/schema/plugin.capnp @@ -75,9 +75,15 @@ enum Category { filters @7; formats @8; math @9; - strings @10; - system @11; - viewers @12; + network @10; + random @11; + platform @12; + shells @13; + strings @14; + system @15; + viewers @16; + hash @17; + generators @18; } struct Flag { diff --git a/crates/nu-plugin/src/serializers/capnp/signature.rs b/crates/nu-plugin/src/serializers/capnp/signature.rs index 416da794ec..51f19da743 100644 --- a/crates/nu-plugin/src/serializers/capnp/signature.rs +++ b/crates/nu-plugin/src/serializers/capnp/signature.rs @@ -18,9 +18,15 @@ pub(crate) fn serialize_signature(signature: &Signature, mut builder: signature: Category::Filters => builder.set_category(PluginCategory::Filters), Category::Formats => builder.set_category(PluginCategory::Formats), Category::Math => builder.set_category(PluginCategory::Math), + Category::Network => builder.set_category(PluginCategory::Network), + Category::Random => builder.set_category(PluginCategory::Random), + Category::Platform => builder.set_category(PluginCategory::Platform), + Category::Shells => builder.set_category(PluginCategory::Shells), Category::Strings => builder.set_category(PluginCategory::Strings), Category::System => builder.set_category(PluginCategory::System), Category::Viewers => builder.set_category(PluginCategory::Viewers), + Category::Hash => builder.set_category(PluginCategory::Hash), + Category::Generators => builder.set_category(PluginCategory::Generators), _ => builder.set_category(PluginCategory::Default), } @@ -122,6 +128,12 @@ pub(crate) fn deserialize_signature(reader: signature::Reader) -> Result Category::Strings, PluginCategory::System => Category::System, PluginCategory::Viewers => Category::Viewers, + PluginCategory::Network => Category::Network, + PluginCategory::Random => Category::Random, + PluginCategory::Platform => Category::Platform, + PluginCategory::Shells => Category::Shells, + PluginCategory::Hash => Category::Hash, + PluginCategory::Generators => Category::Generators, }; // Deserializing required arguments diff --git a/crates/nu_plugin_query/Cargo.toml b/crates/nu_plugin_query/Cargo.toml new file mode 100644 index 0000000000..188dce7b3f --- /dev/null +++ b/crates/nu_plugin_query/Cargo.toml @@ -0,0 +1,19 @@ +[package] +authors = ["The Nu Project Contributors"] +description = "A set of query commands for Nushell" +edition = "2021" +license = "MIT" +name = "nu_plugin_query" +version = "0.1.0" + +[lib] +doctest = false + +[dependencies] +nu-plugin = { path="../nu-plugin", version = "0.1.0" } +nu-protocol = { path="../nu-protocol", version = "0.1.0" } +nu-engine = { path="../nu-engine", version = "0.1.0" } +gjson = "0.8.0" +scraper = "0.12.0" +sxd-document = "0.3.2" +sxd-xpath = "0.4.2" diff --git a/crates/nu_plugin_query/src/lib.rs b/crates/nu_plugin_query/src/lib.rs new file mode 100644 index 0000000000..4b8ebba399 --- /dev/null +++ b/crates/nu_plugin_query/src/lib.rs @@ -0,0 +1,12 @@ +mod nu; +mod query; +mod query_json; +mod query_web; +mod query_xml; +mod web_tables; + +pub use query::Query; +pub use query_json::execute_json_query; +pub use query_web::parse_selector_params; +pub use query_xml::execute_xpath_query; +pub use web_tables::WebTable; diff --git a/crates/nu_plugin_query/src/main.rs b/crates/nu_plugin_query/src/main.rs new file mode 100644 index 0000000000..c43203a8be --- /dev/null +++ b/crates/nu_plugin_query/src/main.rs @@ -0,0 +1,6 @@ +use nu_plugin::{serve_plugin, CapnpSerializer}; +use nu_plugin_query::Query; + +fn main() { + serve_plugin(&mut Query {}, CapnpSerializer {}) +} diff --git a/crates/nu_plugin_query/src/nu/mod.rs b/crates/nu_plugin_query/src/nu/mod.rs new file mode 100644 index 0000000000..8322f877a5 --- /dev/null +++ b/crates/nu_plugin_query/src/nu/mod.rs @@ -0,0 +1,70 @@ +use crate::Query; +use nu_plugin::{EvaluatedCall, LabeledError, Plugin}; +use nu_protocol::{Category, Signature, Spanned, SyntaxShape, Value}; + +impl Plugin for Query { + fn signature(&self) -> Vec { + vec![ + Signature::build("query") + .desc("Show all the query commands") + .category(Category::Filters), + + Signature::build("query json") + .desc("execute json query on json file (open --raw | query json 'query string')") + .required("query", SyntaxShape::String, "json query") + .category(Category::Filters), + + Signature::build("query xml") + .desc("execute xpath query on xml") + .required("query", SyntaxShape::String, "xpath query") + .category(Category::Filters), + + Signature::build("query web") + .desc("execute selector query on html/web") + .named("query", SyntaxShape::String, "selector query", Some('q')) + .switch("as_html", "return the query output as html", Some('m')) + .named( + "attribute", + SyntaxShape::String, + "downselect based on the given attribute", + Some('a'), + ) + .named( + "as_table", + SyntaxShape::Table, + "find table based on column header list", + Some('t'), + ) + .switch( + "inspect", + "run in inspect mode to provide more information for determining column headers", + Some('i'), + ) + .category(Category::Network), + ] + } + + fn run( + &mut self, + name: &str, + call: &EvaluatedCall, + input: &Value, + ) -> Result { + // You can use the name to identify what plugin signature was called + let path: Option> = call.opt(0)?; + + match name { + "query" => { + self.query(name, call, input, path) + } + "query json" => self.query_json( name, call, input, path), + "query web" => self.query_web(name, call, input, path), + "query xml" => self.query_xml(name, call, input, path), + _ => Err(LabeledError { + label: "Plugin call with wrong name signature".into(), + msg: "the signature used to call the plugin does not match any name in the plugin signature vector".into(), + span: Some(call.head), + }), + } + } +} diff --git a/crates/nu_plugin_query/src/query.rs b/crates/nu_plugin_query/src/query.rs new file mode 100644 index 0000000000..41e22d5514 --- /dev/null +++ b/crates/nu_plugin_query/src/query.rs @@ -0,0 +1,75 @@ +use crate::query_json::execute_json_query; +use crate::query_web::parse_selector_params; +use crate::query_xml::execute_xpath_query; +use nu_engine::documentation::get_flags_section; +use nu_plugin::{EvaluatedCall, LabeledError, Plugin}; +use nu_protocol::{Signature, Spanned, Value}; + +#[derive(Default)] +pub struct Query; + +impl Query { + pub fn new() -> Self { + Default::default() + } + + pub fn usage() -> &'static str { + "Usage: query" + } + + pub fn query( + &self, + _name: &str, + call: &EvaluatedCall, + _value: &Value, + _path: Option>, + ) -> Result { + let help = get_brief_subcommand_help(&Query.signature()); + Ok(Value::string(help, call.head)) + } + + pub fn query_json( + &self, + name: &str, + call: &EvaluatedCall, + input: &Value, + query: Option>, + ) -> Result { + execute_json_query(name, call, input, query) + } + pub fn query_web( + &self, + _name: &str, + call: &EvaluatedCall, + input: &Value, + _rest: Option>, + ) -> Result { + parse_selector_params(call, input) + } + pub fn query_xml( + &self, + name: &str, + call: &EvaluatedCall, + input: &Value, + query: Option>, + ) -> Result { + execute_xpath_query(name, call, input, query) + } +} + +pub fn get_brief_subcommand_help(sigs: &[Signature]) -> String { + let mut help = String::new(); + help.push_str(&format!("{}\n\n", sigs[0].usage)); + help.push_str(&format!("Usage:\n > {}\n\n", sigs[0].name)); + help.push_str("Subcommands:\n"); + + for x in sigs.iter().enumerate() { + if x.0 == 0 { + continue; + } + help.push_str(&format!(" {} - {}\n", x.1.name, x.1.usage)); + } + + help.push_str(&get_flags_section(&sigs[0])); + help +} diff --git a/crates/nu_plugin_query/src/query_json.rs b/crates/nu_plugin_query/src/query_json.rs new file mode 100644 index 0000000000..616fd18c21 --- /dev/null +++ b/crates/nu_plugin_query/src/query_json.rs @@ -0,0 +1,151 @@ +use gjson::Value as gjValue; +use nu_plugin::{EvaluatedCall, LabeledError}; +use nu_protocol::{Span, Spanned, Value}; + +pub fn execute_json_query( + _name: &str, + call: &EvaluatedCall, + input: &Value, + query: Option>, +) -> Result { + let input_string = match &input.as_string() { + Ok(s) => s.clone(), + Err(e) => { + return Err(LabeledError { + span: Some(call.head), + msg: e.to_string(), + label: "problem with input data".to_string(), + }) + } + }; + + let query_string = match &query { + Some(v) => &v.item, + None => { + return Err(LabeledError { + msg: "problem with input data".to_string(), + label: "problem with input data".to_string(), + span: Some(call.head), + }) + } + }; + + // Validate the json before trying to query it + let is_valid_json = gjson::valid(&input_string); + + if !is_valid_json { + return Err(LabeledError { + msg: "invalid json".to_string(), + label: "invalid json".to_string(), + span: Some(call.head), + }); + } + + let val: gjValue = gjson::get(&input_string, query_string); + + if query_contains_modifiers(query_string) { + let json_str = val.json(); + Ok(Value::string(json_str, Span::test_data())) + } else { + Ok(convert_gjson_value_to_nu_value(&val, &call.head)) + } +} + +fn query_contains_modifiers(query: &str) -> bool { + // https://github.com/tidwall/gjson.rs documents 7 modifiers as of 4/19/21 + // Some of these modifiers mean we really need to output the data as a string + // instead of tabular data. Others don't matter. + + // Output as String + // @ugly: Remove all whitespace from a json document. + // @pretty: Make the json document more human readable. + query.contains("@ugly") || query.contains("@pretty") + + // Output as Tablular + // Since it's output as tabular, which is our default, we can just ignore these + // @reverse: Reverse an array or the members of an object. + // @this: Returns the current element. It can be used to retrieve the root element. + // @valid: Ensure the json document is valid. + // @flatten: Flattens an array. + // @join: Joins multiple objects into a single object. +} + +fn convert_gjson_value_to_nu_value(v: &gjValue, span: &Span) -> Value { + match v.kind() { + gjson::Kind::Array => { + let mut vals = vec![]; + v.each(|_k, v| { + vals.push(convert_gjson_value_to_nu_value(&v, span)); + true + }); + + Value::List { vals, span: *span } + } + gjson::Kind::Null => Value::nothing(*span), + gjson::Kind::False => Value::boolean(false, *span), + gjson::Kind::Number => { + let str_value = v.str(); + if str_value.contains('.') { + Value::float(v.f64(), *span) + } else { + Value::int(v.i64(), *span) + } + } + gjson::Kind::String => Value::string(v.str(), *span), + gjson::Kind::True => Value::boolean(true, *span), + gjson::Kind::Object => { + let mut cols = vec![]; + let mut vals = vec![]; + v.each(|k, v| { + cols.push(k.to_string()); + vals.push(convert_gjson_value_to_nu_value(&v, span)); + true + }); + Value::Record { + cols, + vals, + span: *span, + } + } + } +} + +#[cfg(test)] +mod tests { + use gjson::{valid, Value as gjValue}; + + #[test] + fn validate_string() { + let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#; + let val = valid(json); + assert!(val); + } + + #[test] + fn answer_from_get_age() { + let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#; + let val: gjValue = gjson::get(json, "age"); + assert_eq!(val.str(), "37"); + } + + #[test] + fn answer_from_get_children() { + let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#; + let val: gjValue = gjson::get(json, "children"); + assert_eq!(val.str(), r#"["Sara", "Alex", "Jack"]"#); + } + + #[test] + fn answer_from_get_children_count() { + let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#; + let val: gjValue = gjson::get(json, "children.#"); + assert_eq!(val.str(), "3"); + } + + #[test] + fn answer_from_get_friends_first_name() { + let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#; + let val: gjValue = gjson::get(json, "friends.#.first"); + assert_eq!(val.str(), r#"["James","Roger"]"#); + } +} diff --git a/crates/nu_plugin_query/src/query_web.rs b/crates/nu_plugin_query/src/query_web.rs new file mode 100644 index 0000000000..422f7e8e7a --- /dev/null +++ b/crates/nu_plugin_query/src/query_web.rs @@ -0,0 +1,303 @@ +use crate::web_tables::WebTable; +use nu_plugin::{EvaluatedCall, LabeledError}; +use nu_protocol::{Span, Value}; +use scraper::{Html, Selector as ScraperSelector}; + +pub struct Selector { + pub query: String, + pub as_html: bool, + pub attribute: String, + pub as_table: Value, + pub inspect: bool, +} + +impl Selector { + pub fn new() -> Selector { + Selector { + query: String::new(), + as_html: false, + attribute: String::new(), + as_table: Value::string("".to_string(), Span::test_data()), + inspect: false, + } + } +} + +impl Default for Selector { + fn default() -> Self { + Self::new() + } +} + +pub fn parse_selector_params(call: &EvaluatedCall, input: &Value) -> Result { + let head = call.head; + let query: String = match call.get_flag("query")? { + Some(q2) => q2, + None => "".to_string(), + }; + let as_html = call.has_flag("as_html"); + let attribute: String = match call.get_flag("attribute")? { + Some(a) => a, + None => "".to_string(), + }; + let as_table: Value = match call.get_flag("as_table")? { + Some(v) => v, + None => Value::nothing(head), + }; + + let inspect = call.has_flag("inspect"); + + if !&query.is_empty() && ScraperSelector::parse(&query).is_err() { + return Err(LabeledError { + msg: "Cannot parse this query as a valid css selector".to_string(), + label: "Parse error".to_string(), + span: Some(head), + }); + } + + let selector = Selector { + query, + as_html, + attribute, + as_table, + inspect, + }; + + match input { + Value::String { val, span } => Ok(begin_selector_query(val.to_string(), selector, *span)), + _ => Err(LabeledError { + label: "requires text input".to_string(), + msg: "Expected text from pipeline".to_string(), + span: Some(input.span()?), + }), + } +} + +fn begin_selector_query(input_html: String, selector: Selector, span: Span) -> Value { + if let Value::List { .. } = selector.as_table { + return retrieve_tables( + input_html.as_str(), + &selector.as_table, + selector.inspect, + span, + ); + } else { + match selector.attribute.is_empty() { + true => execute_selector_query( + input_html.as_str(), + selector.query.as_str(), + selector.as_html, + span, + ), + false => execute_selector_query_with_attribute( + input_html.as_str(), + selector.query.as_str(), + selector.attribute.as_str(), + span, + ), + } + } +} + +pub fn retrieve_tables( + input_string: &str, + columns: &Value, + inspect_mode: bool, + span: Span, +) -> Value { + let html = input_string; + let mut cols: Vec = Vec::new(); + if let Value::List { vals, .. } = &columns { + for x in vals { + // TODO Find a way to get the Config object here + if let Value::String { val, .. } = x { + cols.push(val.to_string()) + } + } + } + + if inspect_mode { + eprintln!("Passed in Column Headers = {:#?}", &cols,); + } + + let tables = match WebTable::find_by_headers(html, &cols) { + Some(t) => { + if inspect_mode { + eprintln!("Table Found = {:#?}", &t); + } + t + } + None => vec![WebTable::empty()], + }; + + if tables.len() == 1 { + return retrieve_table( + tables + .into_iter() + .next() + .expect("This should never trigger"), + columns, + span, + ); + } + + let vals = tables + .into_iter() + .map(move |table| retrieve_table(table, columns, span)) + .collect(); + + Value::List { vals, span } +} + +fn retrieve_table(mut table: WebTable, columns: &Value, span: Span) -> Value { + let mut cols: Vec = Vec::new(); + if let Value::List { vals, .. } = &columns { + for x in vals { + // TODO Find a way to get the Config object here + if let Value::String { val, .. } = x { + cols.push(val.to_string()) + } + } + } + + if cols.is_empty() && !table.headers().is_empty() { + for col in table.headers().keys() { + cols.push(col.to_string()); + } + } + + let mut table_out = Vec::new(); + // sometimes there are tables where the first column is the headers, kind of like + // a table has ben rotated ccw 90 degrees, in these cases all columns will be missing + // we keep track of this with this variable so we can deal with it later + let mut at_least_one_row_filled = false; + // if columns are still empty, let's just make a single column table with the data + if cols.is_empty() { + at_least_one_row_filled = true; + let table_with_no_empties: Vec<_> = table.iter().filter(|item| !item.is_empty()).collect(); + + let mut cols = vec![]; + let mut vals = vec![]; + for row in &table_with_no_empties { + for (counter, cell) in row.iter().enumerate() { + cols.push(format!("Column{}", counter)); + vals.push(Value::string(cell.to_string(), span)) + } + } + table_out.push(Value::Record { cols, vals, span }) + } else { + for row in &table { + let mut vals = vec![]; + let record_cols = &cols; + for col in &cols { + let val = row + .get(col) + .unwrap_or(&format!("Missing column: '{}'", &col)) + .to_string(); + + if !at_least_one_row_filled && val != format!("Missing column: '{}'", &col) { + at_least_one_row_filled = true; + } + vals.push(Value::string(val, span)); + } + table_out.push(Value::Record { + cols: record_cols.to_vec(), + vals, + span, + }) + } + } + if !at_least_one_row_filled { + let mut data2 = Vec::new(); + for x in &table.data { + data2.push(x.join(", ")); + } + table.data = vec![data2]; + return retrieve_table(table, columns, span); + } + // table_out + + Value::List { + vals: table_out, + span, + } +} + +fn execute_selector_query_with_attribute( + input_string: &str, + query_string: &str, + attribute: &str, + span: Span, +) -> Value { + let doc = Html::parse_fragment(input_string); + + let vals: Vec = doc + .select(&css(query_string)) + .map(|selection| { + Value::string( + selection.value().attr(attribute).unwrap_or("").to_string(), + span, + ) + }) + .collect(); + Value::List { vals, span } +} + +fn execute_selector_query( + input_string: &str, + query_string: &str, + as_html: bool, + span: Span, +) -> Value { + let doc = Html::parse_fragment(input_string); + + let vals: Vec = match as_html { + true => doc + .select(&css(query_string)) + .map(|selection| Value::string(selection.html(), span)) + .collect(), + false => doc + .select(&css(query_string)) + .map(|selection| { + Value::string( + selection + .text() + .fold("".to_string(), |acc, x| format!("{}{}", acc, x)), + span, + ) + }) + .collect(), + }; + + Value::List { vals, span } +} + +pub fn css(selector: &str) -> ScraperSelector { + ScraperSelector::parse(selector).expect("this should never trigger") +} + +// #[cfg(test)] +// mod tests { +// use super::*; + +// const SIMPLE_LIST: &str = r#" +//
    +//
  • Coffee
  • +//
  • Tea
  • +//
  • Milk
  • +//
+// "#; + +// #[test] +// fn test_first_child_is_not_empty() { +// assert!(!execute_selector_query(SIMPLE_LIST, "li:first-child", false).is_empty()) +// } + +// #[test] +// fn test_first_child() { +// assert_eq!( +// vec!["Coffee".to_string()], +// execute_selector_query(SIMPLE_LIST, "li:first-child", false) +// ) +// } +// } diff --git a/crates/nu_plugin_query/src/query_xml.rs b/crates/nu_plugin_query/src/query_xml.rs new file mode 100644 index 0000000000..75e08f56ab --- /dev/null +++ b/crates/nu_plugin_query/src/query_xml.rs @@ -0,0 +1,188 @@ +use nu_plugin::{EvaluatedCall, LabeledError}; +use nu_protocol::{Span, Spanned, Value}; +use sxd_document::parser; +use sxd_xpath::{Context, Factory}; + +pub fn execute_xpath_query( + _name: &str, + call: &EvaluatedCall, + input: &Value, + query: Option>, +) -> Result { + let (query_string, span) = match &query { + Some(v) => (&v.item, &v.span), + None => { + return Err(LabeledError { + msg: "problem with input data".to_string(), + label: "problem with input data".to_string(), + span: Some(call.head), + }) + } + }; + + let xpath = build_xpath(query_string, span)?; + let input_string = input.as_string()?; + let package = parser::parse(&input_string); + + if package.is_err() { + return Err(LabeledError { + label: "invalid xml document".to_string(), + msg: "invalid xml document".to_string(), + span: Some(call.head), + }); + } + + let package = package.expect("invalid xml document"); + + let document = package.as_document(); + let context = Context::new(); + + // leaving this here for augmentation at some point + // build_variables(&arguments, &mut context); + // build_namespaces(&arguments, &mut context); + let res = xpath.evaluate(&context, document.root()); + + // Some xpath statements can be long, so let's truncate it with ellipsis + let mut key = query_string.clone(); + if query_string.len() >= 20 { + key.truncate(17); + key += "..."; + } else { + key = query_string.to_string(); + }; + + match res { + Ok(r) => { + let mut cols: Vec = vec![]; + let mut vals: Vec = vec![]; + let mut records: Vec = vec![]; + + match r { + sxd_xpath::Value::Nodeset(ns) => { + for n in ns.into_iter() { + cols.push(key.to_string()); + vals.push(Value::string(n.string_value(), Span::test_data())); + } + } + sxd_xpath::Value::Boolean(b) => { + cols.push(key.to_string()); + vals.push(Value::boolean(b, Span::test_data())); + } + sxd_xpath::Value::Number(n) => { + cols.push(key.to_string()); + vals.push(Value::float(n, Span::test_data())); + } + sxd_xpath::Value::String(s) => { + cols.push(key.to_string()); + vals.push(Value::string(s, Span::test_data())); + } + }; + + // convert the cols and vecs to a table by creating individual records + // for each item so we can then use a list to make a table + for (k, v) in cols.iter().zip(vals.iter()) { + records.push(Value::Record { + cols: vec![k.to_string()], + vals: vec![v.clone()], + span: Span::test_data(), + }) + } + + Ok(Value::List { + vals: records, + span: Span::test_data(), + }) + } + Err(_) => Err(LabeledError { + label: "xpath query error".to_string(), + msg: "xpath query error".to_string(), + span: Some(Span::test_data()), + }), + } +} + +fn build_xpath(xpath_str: &str, span: &Span) -> Result { + let factory = Factory::new(); + + match factory.build(xpath_str) { + Ok(xpath) => xpath.ok_or_else(|| LabeledError { + label: "invalid xpath query".to_string(), + msg: "invalid xpath query".to_string(), + span: Some(*span), + }), + Err(_) => Err(LabeledError { + label: "expected valid xpath query".to_string(), + msg: "expected valid xpath query".to_string(), + span: Some(*span), + }), + } +} + +#[cfg(test)] +mod tests { + use super::execute_xpath_query as query; + use nu_plugin::EvaluatedCall; + use nu_protocol::{Span, Spanned, Value}; + + #[test] + fn position_function_in_predicate() { + let call = EvaluatedCall { + head: Span::test_data(), + positional: vec![], + named: vec![], + }; + + let text = Value::string( + r#""#, + Span::test_data(), + ); + + let spanned_str: Spanned = Spanned { + item: "count(//a/*[position() = 2])".to_string(), + span: Span::test_data(), + }; + + let actual = query("", &call, &text, Some(spanned_str)).expect("test should not fail"); + let expected = Value::List { + vals: vec![Value::Record { + cols: vec!["count(//a/*[posit...".to_string()], + vals: vec![Value::float(1.0, Span::test_data())], + span: Span::test_data(), + }], + span: Span::test_data(), + }; + + assert_eq!(actual, expected); + } + + #[test] + fn functions_implicitly_coerce_argument_types() { + let call = EvaluatedCall { + head: Span::test_data(), + positional: vec![], + named: vec![], + }; + + let text = Value::string( + r#"true"#, + Span::test_data(), + ); + + let spanned_str: Spanned = Spanned { + item: "count(//*[contains(., true)])".to_string(), + span: Span::test_data(), + }; + + let actual = query("", &call, &text, Some(spanned_str)).expect("test should not fail"); + let expected = Value::List { + vals: vec![Value::Record { + cols: vec!["count(//*[contain...".to_string()], + vals: vec![Value::float(1.0, Span::test_data())], + span: Span::test_data(), + }], + span: Span::test_data(), + }; + + assert_eq!(actual, expected); + } +} diff --git a/crates/nu_plugin_query/src/web_tables.rs b/crates/nu_plugin_query/src/web_tables.rs new file mode 100644 index 0000000000..a2809dbd26 --- /dev/null +++ b/crates/nu_plugin_query/src/web_tables.rs @@ -0,0 +1,1227 @@ +use crate::query_web::css; +use scraper::{element_ref::ElementRef, Html, Selector as ScraperSelector}; +use std::collections::HashMap; + +pub type Headers = HashMap; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct WebTable { + headers: Headers, + pub data: Vec>, +} + +impl WebTable { + /// Finds the first table in `html`. + pub fn find_first(html: &str) -> Option { + let html = Html::parse_fragment(html); + html.select(&css("table")).next().map(WebTable::new) + } + + pub fn find_all_tables(html: &str) -> Option> { + let html = Html::parse_fragment(html); + let iter: Vec = html.select(&css("table")).map(WebTable::new).collect(); + if iter.is_empty() { + return None; + } + Some(iter) + } + + /// Finds the table in `html` with an id of `id`. + pub fn find_by_id(html: &str, id: &str) -> Option { + let html = Html::parse_fragment(html); + let selector = format!("table#{}", id); + ScraperSelector::parse(&selector) + .ok() + .as_ref() + .map(|s| html.select(s)) + .and_then(|mut s| s.next()) + .map(WebTable::new) + } + + /// Finds the table in `html` whose first row contains all of the headers + /// specified in `headers`. The order does not matter. + /// + /// If `headers` is empty, this is the same as + /// [`find_first`](#method.find_first). + pub fn find_by_headers(html: &str, headers: &[T]) -> Option> + where + T: AsRef, + { + if headers.is_empty() { + return WebTable::find_all_tables(html); + } + + let sel_table = css("table"); + let sel_tr = css("tr"); + let sel_th = css("th"); + + let html = Html::parse_fragment(html); + let mut tables = html + .select(&sel_table) + .filter(|table| { + table.select(&sel_tr).next().map_or(false, |tr| { + let cells = select_cells(tr, &sel_th, true); + headers.iter().all(|h| contains_str(&cells, h.as_ref())) + }) + }) + .peekable(); + tables.peek()?; + Some(tables.map(WebTable::new).collect()) + } + + /// Returns the headers of the table. + /// + /// This will be empty if the table had no `` tags in its first row. See + /// [`Headers`](type.Headers.html) for more. + pub fn headers(&self) -> &Headers { + &self.headers + } + + /// Returns an iterator over the [`Row`](struct.Row.html)s of the table. + /// + /// Only `` cells are considered when generating rows. If the first row + /// of the table is a header row, meaning it contains at least one `` + /// cell, the iterator will start on the second row. Use + /// [`headers`](#method.headers) to access the header row in that case. + pub fn iter(&self) -> Iter { + Iter { + headers: &self.headers, + iter: self.data.iter(), + } + } + + pub fn empty() -> WebTable { + WebTable { + headers: HashMap::new(), + data: vec![vec!["".to_string()]], + } + } + + // fn new(element: ElementRef) -> Table { + // let sel_tr = css("tr"); + // let sel_th = css("th"); + // let sel_td = css("td"); + + // let mut headers = HashMap::new(); + // let mut rows = element.select(&sel_tr).peekable(); + // if let Some(tr) = rows.peek() { + // for (i, th) in tr.select(&sel_th).enumerate() { + // headers.insert(cell_content(th), i); + // } + // } + // if !headers.is_empty() { + // rows.next(); + // } + // let data = rows.map(|tr| select_cells(tr, &sel_td, true)).collect(); + // Table { headers, data } + // } + + fn new(element: ElementRef) -> WebTable { + let sel_tr = css("tr"); + let sel_th = css("th"); + let sel_td = css("td"); + + let mut headers = HashMap::new(); + let mut rows = element.select(&sel_tr).peekable(); + if let Some(tr) = rows.clone().peek() { + for (i, th) in tr.select(&sel_th).enumerate() { + headers.insert(cell_content(th), i); + } + } + if !headers.is_empty() { + rows.next(); + } + + if headers.is_empty() { + // try looking for data as headers i.e. they're row headers not column headers + for (i, d) in rows + .clone() + .map(|tr| select_cells(tr, &sel_th, true)) + .enumerate() + { + headers.insert(d.join(", "), i); + } + // check if headers are there but empty + let mut empty_headers = true; + for (h, _i) in headers.clone() { + if !h.is_empty() { + empty_headers = false; + break; + } + } + if empty_headers { + headers = HashMap::new(); + } + let data = rows.map(|tr| select_cells(tr, &sel_td, true)).collect(); + WebTable { headers, data } + } else { + let data = rows.map(|tr| select_cells(tr, &sel_td, true)).collect(); + WebTable { headers, data } + } + } +} + +impl<'a> IntoIterator for &'a WebTable { + type Item = Row<'a>; + type IntoIter = Iter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An iterator over the rows in a [`Table`](struct.Table.html). +pub struct Iter<'a> { + headers: &'a Headers, + iter: std::slice::Iter<'a, Vec>, +} + +impl<'a> Iterator for Iter<'a> { + type Item = Row<'a>; + + fn next(&mut self) -> Option { + let headers = self.headers; + self.iter.next().map(|cells| Row { headers, cells }) + } +} + +/// A row in a [`Table`](struct.Table.html). +/// +/// A row consists of a number of data cells stored as strings. If the row +/// contains the same number of cells as the table's header row, its cells can +/// be safely accessed by header names using [`get`](#method.get). Otherwise, +/// the data should be accessed via [`as_slice`](#method.as_slice) or by +/// iterating over the row. +/// +/// This struct can be thought of as a lightweight reference into a table. As +/// such, it implements the `Copy` trait. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct Row<'a> { + headers: &'a Headers, + cells: &'a [String], +} + +impl<'a> Row<'a> { + /// Returns the number of cells in the row. + pub fn len(&self) -> usize { + self.cells.len() + } + + /// Returns `true` if the row contains no cells. + pub fn is_empty(&self) -> bool { + self.cells.is_empty() + } + + /// Returns the cell underneath `header`. + /// + /// Returns `None` if there is no such header, or if there is no cell at + /// that position in the row. + pub fn get(&self, header: &str) -> Option<&'a str> { + // eprintln!( + // "header={}, headers={:?}, cells={:?}", + // &header, &self.headers, &self.cells + // ); + self.headers.get(header).and_then(|&i| { + // eprintln!("i={}", i); + self.cells.get(i).map(String::as_str) + }) + } + + pub fn get_header_at(&self, index: usize) -> Option<&'a str> { + let mut a_match = ""; + for (key, val) in self.headers { + if *val == index { + a_match = key; + break; + } + } + if a_match.is_empty() { + None + } else { + Some(a_match) + } + } + + /// Returns a slice containing all the cells. + pub fn as_slice(&self) -> &'a [String] { + self.cells + } + + /// Returns an iterator over the cells of the row. + pub fn iter(&self) -> std::slice::Iter { + self.cells.iter() + } +} + +impl<'a> IntoIterator for Row<'a> { + type Item = &'a String; + type IntoIter = std::slice::Iter<'a, String>; + + fn into_iter(self) -> Self::IntoIter { + self.cells.iter() + } +} + +fn select_cells( + element: ElementRef, + selector: &ScraperSelector, + remove_html_tags: bool, +) -> Vec { + if remove_html_tags { + let scraped = element.select(selector).map(cell_content); + let mut dehtmlized: Vec = Vec::new(); + for item in scraped { + let frag = Html::parse_fragment(&item); + for node in frag.tree { + if let scraper::node::Node::Text(text) = node { + dehtmlized.push(text.text.to_string()); + } + } + } + dehtmlized + } else { + element.select(selector).map(cell_content).collect() + } +} + +fn cell_content(element: ElementRef) -> String { + // element.inner_html().trim().to_string() + let mut dehtmlize = String::new(); + let element = element.inner_html().trim().to_string(); + let frag = Html::parse_fragment(&element); + for node in frag.tree { + if let scraper::node::Node::Text(text) = node { + dehtmlize.push_str(&text.text.to_string()) + } + } + + // eprintln!("element={} dehtmlize={}", &element, &dehtmlize); + + if dehtmlize.is_empty() { + dehtmlize = element; + } + + dehtmlize +} + +fn contains_str(slice: &[String], item: &str) -> bool { + // slice.iter().any(|s| s == item) + + let mut dehtmlized = String::new(); + let frag = Html::parse_fragment(item); + for node in frag.tree { + if let scraper::node::Node::Text(text) = node { + dehtmlized.push_str(&text.text.to_string()); + } + } + + if dehtmlized.is_empty() { + dehtmlized = item.to_string(); + } + + slice.iter().any(|s| { + // eprintln!( + // "\ns={} item={} contains={}\n", + // &s, + // &dehtmlized, + // &dehtmlized.contains(s) + // ); + // s.starts_with(item) + dehtmlized.contains(s) + }) +} + +// #[cfg(test)] +// mod tests { +// use super::*; +// use crate::query_web::retrieve_tables; +// use indexmap::indexmap; +// use nu_protocol::Value; + +// const TABLE_EMPTY: &str = r#" +//
+// "#; + +// const TABLE_TH: &str = r#" +// +// +//
NameAge
+// "#; + +// const TABLE_TD: &str = r#" +// +// +//
NameAge
+// "#; + +// const TWO_TABLES_TD: &str = r#" +// +// +//
NameAge
+// +// +//
ProfessionCivil State
+// "#; + +// const TABLE_TH_TD: &str = r#" +// +// +// +//
NameAge
John20
+// "#; + +// const TWO_TABLES_TH_TD: &str = r#" +// +// +// +//
NameAge
John20
+// +// +// +//
ProfessionCivil State
MechanicSingle
+// "#; + +// const TABLE_TD_TD: &str = r#" +// +// +// +//
NameAge
John20
+// "#; + +// const TABLE_TH_TH: &str = r#" +// +// +// +//
NameAge
John20
+// "#; + +// const TABLE_COMPLEX: &str = r#" +// +// +// +// +// +// +//
NameAgeExtra
John20
May30foo
abcd
+// "#; + +// const TWO_TABLES_COMPLEX: &str = r#" +// +// +// foo +// +// +// +// +// +// +// +//
NameAgeExtra
John20
May30foo
abcd
+// +// +// +// +// +// +//
ProfessionCivil StateExtra
CarpenterSingle
MechanicMarriedbar
efgh
+// +// +// "#; + +// const HTML_NO_TABLE: &str = r#" +// +// +// foo +//

Hi.

+// +// "#; + +// const HTML_TWO_TABLES: &str = r#" +// +// +// foo +// +// +// +// +//
NameAge
John20
+// +// +// +//
NameWeight
John150
+// +// +// "#; + +// const HTML_TABLE_FRAGMENT: &str = r#" +// +// +// +//
NameAge
John20
+// +// +// "#; + +// const HTML_TABLE_WIKIPEDIA_WITH_COLUMN_NAMES: &str = r#" +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +//
Excel 2007 formats +//
Format +// Extension +// Description +//
Excel Workbook +// .xlsx +// The default Excel 2007 and later workbook format. In reality, a Zip compressed archive with a directory structure of XML text documents. +//Functions as the primary replacement for the former binary .xls format, although it does not support Excel macros for security reasons. Saving as .xlsx offers file size reduction over .xls[38] +//
Excel Macro-enabled Workbook +// .xlsm +// As Excel Workbook, but with macro support. +//
Excel Binary Workbook +// .xlsb +// As Excel Macro-enabled Workbook, but storing information in binary form rather than XML documents for opening and saving documents more quickly and efficiently. Intended especially for very large documents with tens of thousands of rows, and/or several hundreds +//of columns. This format is very useful for shrinking large Excel files as is often the case when doing data analysis. +//
Excel Macro-enabled Template +// .xltm +// A template document that forms a basis for actual workbooks, with macro support. The replacement for the old .xlt format. +//
Excel Add-in +// .xlam +// Excel add-in to add extra functionality and tools. Inherent macro support because of the file purpose. +//
+// "#; + +// const HTML_TABLE_WIKIPEDIA_COLUMNS_AS_ROWS: &str = r#" +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +//
+// Microsoft Excel +//
+// Microsoft Office Excel (2019–present).svg +//
+// Microsoft Excel.png +//
+// A simple +// line chart being +// created in Excel, running on +// Windows 10 +//
+//
+// Developer(s) +// +// Microsoft +//
+// Initial release +// +// 1987; 34 years ago (1987) +//
+// Stable release +// +//
+// 2103 (16.0.13901.20400) / April 13, 2021; 4 months ago (2021-04-13)[1] +//
+//
+// Operating system +// +// Microsoft Windows +//
+// Type +// +// Spreadsheet +//
+// License +// +// Trialware[2] +//
+// Website +// +// products.office.com/en-us/excel +//
+// "#; + +// #[test] +// fn test_find_first_none() { +// assert_eq!(None, Table::find_first("")); +// assert_eq!(None, Table::find_first("foo")); +// assert_eq!(None, Table::find_first(HTML_NO_TABLE)); +// } + +// #[test] +// fn test_find_first_empty() { +// let empty = Table { +// headers: HashMap::new(), +// data: Vec::new(), +// }; +// assert_eq!(Some(empty), Table::find_first(TABLE_EMPTY)); +// } + +// #[test] +// fn test_find_first_some() { +// assert!(Table::find_first(TABLE_TH).is_some()); +// assert!(Table::find_first(TABLE_TD).is_some()); +// } + +// #[test] +// fn test_find_by_id_none() { +// assert_eq!(None, Table::find_by_id("", "")); +// assert_eq!(None, Table::find_by_id("foo", "id")); +// assert_eq!(None, Table::find_by_id(HTML_NO_TABLE, "id")); + +// assert_eq!(None, Table::find_by_id(TABLE_EMPTY, "id")); +// assert_eq!(None, Table::find_by_id(TABLE_TH, "id")); +// assert_eq!(None, Table::find_by_id(TABLE_TH, "")); +// assert_eq!(None, Table::find_by_id(HTML_TWO_TABLES, "id")); +// } + +// #[test] +// fn test_find_by_id_some() { +// assert!(Table::find_by_id(HTML_TWO_TABLES, "first").is_some()); +// assert!(Table::find_by_id(HTML_TWO_TABLES, "second").is_some()); +// } + +// #[test] +// fn test_find_by_headers_empty() { +// let headers: [&str; 0] = []; + +// assert_eq!(None, Table::find_by_headers("", &headers)); +// assert_eq!(None, Table::find_by_headers("foo", &headers)); +// assert_eq!(None, Table::find_by_headers(HTML_NO_TABLE, &headers)); + +// assert!(Table::find_by_headers(TABLE_EMPTY, &headers).is_some()); +// assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); +// } + +// #[test] +// fn test_find_by_headers_none() { +// let headers = ["Name", "Age"]; +// let bad_headers = ["Name", "BAD"]; + +// assert_eq!(None, Table::find_by_headers("", &headers)); +// assert_eq!(None, Table::find_by_headers("foo", &headers)); +// assert_eq!(None, Table::find_by_headers(HTML_NO_TABLE, &headers)); + +// assert_eq!(None, Table::find_by_headers(TABLE_EMPTY, &bad_headers)); +// assert_eq!(None, Table::find_by_headers(TABLE_TH, &bad_headers)); + +// assert_eq!(None, Table::find_by_headers(TABLE_TD, &headers)); +// assert_eq!(None, Table::find_by_headers(TABLE_TD, &bad_headers)); +// } + +// #[test] +// fn test_find_by_headers_some() { +// let headers: [&str; 0] = []; +// assert!(Table::find_by_headers(TABLE_TH, &headers).is_some()); +// assert!(Table::find_by_headers(TABLE_TH_TD, &headers).is_some()); +// assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); + +// let headers = ["Name"]; +// assert!(Table::find_by_headers(TABLE_TH, &headers).is_some()); +// assert!(Table::find_by_headers(TABLE_TH_TD, &headers).is_some()); +// assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); + +// let headers = ["Age", "Name"]; +// assert!(Table::find_by_headers(TABLE_TH, &headers).is_some()); +// assert!(Table::find_by_headers(TABLE_TH_TD, &headers).is_some()); +// assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); +// } + +// #[test] +// fn test_find_first_incomplete_fragment() { +// assert!(Table::find_first(HTML_TABLE_FRAGMENT).is_some()); +// } + +// #[test] +// fn test_headers_empty() { +// let empty = HashMap::new(); +// assert_eq!(&empty, Table::find_first(TABLE_TD).unwrap().headers()); +// assert_eq!(&empty, Table::find_first(TABLE_TD_TD).unwrap().headers()); +// } + +// #[test] +// fn test_headers_nonempty() { +// let mut headers = HashMap::new(); +// headers.insert("Name".to_string(), 0); +// headers.insert("Age".to_string(), 1); + +// assert_eq!(&headers, Table::find_first(TABLE_TH).unwrap().headers()); +// assert_eq!(&headers, Table::find_first(TABLE_TH_TD).unwrap().headers()); +// assert_eq!(&headers, Table::find_first(TABLE_TH_TH).unwrap().headers()); + +// headers.insert("Extra".to_string(), 2); +// assert_eq!( +// &headers, +// Table::find_first(TABLE_COMPLEX).unwrap().headers() +// ); +// } + +// #[test] +// fn test_iter_empty() { +// assert_eq!(0, Table::find_first(TABLE_EMPTY).unwrap().iter().count()); +// assert_eq!(0, Table::find_first(TABLE_TH).unwrap().iter().count()); +// } + +// #[test] +// fn test_iter_nonempty() { +// assert_eq!(1, Table::find_first(TABLE_TD).unwrap().iter().count()); +// assert_eq!(1, Table::find_first(TABLE_TH_TD).unwrap().iter().count()); +// assert_eq!(2, Table::find_first(TABLE_TD_TD).unwrap().iter().count()); +// assert_eq!(1, Table::find_first(TABLE_TH_TH).unwrap().iter().count()); +// assert_eq!(4, Table::find_first(TABLE_COMPLEX).unwrap().iter().count()); +// } + +// #[test] +// fn test_row_is_empty() { +// let table = Table::find_first(TABLE_TD).unwrap(); +// assert_eq!( +// vec![false], +// table.iter().map(|r| r.is_empty()).collect::>() +// ); + +// let table = Table::find_first(TABLE_COMPLEX).unwrap(); +// assert_eq!( +// vec![false, false, true, false], +// table.iter().map(|r| r.is_empty()).collect::>() +// ); +// } + +// #[test] +// fn test_row_len() { +// let table = Table::find_first(TABLE_TD).unwrap(); +// assert_eq!(vec![2], table.iter().map(|r| r.len()).collect::>()); + +// let table = Table::find_first(TABLE_COMPLEX).unwrap(); +// assert_eq!( +// vec![2, 3, 0, 4], +// table.iter().map(|r| r.len()).collect::>() +// ); +// } + +// #[test] +// fn test_row_len_two_tables() { +// let tables = Table::find_all_tables(HTML_TWO_TABLES).unwrap(); +// let mut tables_iter = tables.iter(); +// let table_1 = tables_iter.next().unwrap(); +// let table_2 = tables_iter.next().unwrap(); +// assert_eq!(vec![2], table_1.iter().map(|r| r.len()).collect::>()); +// assert_eq!(vec![2], table_2.iter().map(|r| r.len()).collect::>()); + +// let tables = Table::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); +// let mut tables_iter = tables.iter(); +// let table_1 = tables_iter.next().unwrap(); +// let table_2 = tables_iter.next().unwrap(); +// assert_eq!( +// vec![2, 3, 0, 4], +// table_1.iter().map(|r| r.len()).collect::>() +// ); +// assert_eq!( +// vec![2, 3, 0, 4], +// table_2.iter().map(|r| r.len()).collect::>() +// ); +// } + +// #[test] +// fn test_row_get_without_headers() { +// let table = Table::find_first(TABLE_TD).unwrap(); +// let mut iter = table.iter(); +// let row = iter.next().unwrap(); + +// assert_eq!(None, row.get("")); +// assert_eq!(None, row.get("foo")); +// assert_eq!(None, row.get("Name")); +// assert_eq!(None, row.get("Age")); + +// assert_eq!(None, iter.next()); +// } + +// #[test] +// fn test_row_get_with_headers() { +// let table = Table::find_first(TABLE_TH_TD).unwrap(); +// let mut iter = table.iter(); +// let row = iter.next().unwrap(); + +// assert_eq!(None, row.get("")); +// assert_eq!(None, row.get("foo")); +// assert_eq!(Some("John"), row.get("Name")); +// assert_eq!(Some("20"), row.get("Age")); + +// assert_eq!(None, iter.next()); +// } + +// #[test] +// fn test_row_get_complex() { +// let table = Table::find_first(TABLE_COMPLEX).unwrap(); +// let mut iter = table.iter(); + +// let row = iter.next().unwrap(); +// assert_eq!(Some("John"), row.get("Name")); +// assert_eq!(Some("20"), row.get("Age")); +// assert_eq!(None, row.get("Extra")); + +// let row = iter.next().unwrap(); +// assert_eq!(Some("May"), row.get("Name")); +// assert_eq!(Some("30"), row.get("Age")); +// assert_eq!(Some("foo"), row.get("Extra")); + +// let row = iter.next().unwrap(); +// assert_eq!(None, row.get("Name")); +// assert_eq!(None, row.get("Age")); +// assert_eq!(None, row.get("Extra")); + +// let row = iter.next().unwrap(); +// assert_eq!(Some("a"), row.get("Name")); +// assert_eq!(Some("b"), row.get("Age")); +// assert_eq!(Some("c"), row.get("Extra")); + +// assert_eq!(None, iter.next()); +// } + +// #[test] +// fn test_two_tables_row_get_complex() { +// let tables = Table::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); +// let mut tables_iter = tables.iter(); +// let table_1 = tables_iter.next().unwrap(); +// let table_2 = tables_iter.next().unwrap(); +// let mut iter_1 = table_1.iter(); +// let mut iter_2 = table_2.iter(); + +// let row_table_1 = iter_1.next().unwrap(); +// let row_table_2 = iter_2.next().unwrap(); +// assert_eq!(Some("John"), row_table_1.get("Name")); +// assert_eq!(Some("20"), row_table_1.get("Age")); +// assert_eq!(None, row_table_1.get("Extra")); +// assert_eq!(Some("Carpenter"), row_table_2.get("Profession")); +// assert_eq!(Some("Single"), row_table_2.get("Civil State")); +// assert_eq!(None, row_table_2.get("Extra")); + +// let row_table_1 = iter_1.next().unwrap(); +// let row_table_2 = iter_2.next().unwrap(); +// assert_eq!(Some("May"), row_table_1.get("Name")); +// assert_eq!(Some("30"), row_table_1.get("Age")); +// assert_eq!(Some("foo"), row_table_1.get("Extra")); +// assert_eq!(Some("Mechanic"), row_table_2.get("Profession")); +// assert_eq!(Some("Married"), row_table_2.get("Civil State")); +// assert_eq!(Some("bar"), row_table_2.get("Extra")); + +// let row_table_1 = iter_1.next().unwrap(); +// let row_table_2 = iter_2.next().unwrap(); +// assert_eq!(None, row_table_1.get("Name")); +// assert_eq!(None, row_table_1.get("Age")); +// assert_eq!(None, row_table_1.get("Extra")); +// assert_eq!(None, row_table_2.get("Name")); +// assert_eq!(None, row_table_2.get("Age")); +// assert_eq!(None, row_table_2.get("Extra")); + +// let row_table_1 = iter_1.next().unwrap(); +// let row_table_2 = iter_2.next().unwrap(); +// assert_eq!(Some("a"), row_table_1.get("Name")); +// assert_eq!(Some("b"), row_table_1.get("Age")); +// assert_eq!(Some("c"), row_table_1.get("Extra")); +// assert_eq!(Some("e"), row_table_2.get("Profession")); +// assert_eq!(Some("f"), row_table_2.get("Civil State")); +// assert_eq!(Some("g"), row_table_2.get("Extra")); + +// assert_eq!(None, iter_1.next()); +// assert_eq!(None, iter_2.next()); +// } + +// #[test] +// fn test_row_as_slice_without_headers() { +// let table = Table::find_first(TABLE_TD).unwrap(); +// let mut iter = table.iter(); + +// assert_eq!(&["Name", "Age"], iter.next().unwrap().as_slice()); +// assert_eq!(None, iter.next()); +// } + +// #[test] +// fn test_row_as_slice_without_headers_two_tables() { +// let tables = Table::find_all_tables(TWO_TABLES_TD).unwrap(); +// let mut tables_iter = tables.iter(); +// let table_1 = tables_iter.next().unwrap(); +// let table_2 = tables_iter.next().unwrap(); +// let mut iter_1 = table_1.iter(); +// let mut iter_2 = table_2.iter(); + +// assert_eq!(&["Name", "Age"], iter_1.next().unwrap().as_slice()); +// assert_eq!( +// &["Profession", "Civil State"], +// iter_2.next().unwrap().as_slice() +// ); +// assert_eq!(None, iter_1.next()); +// assert_eq!(None, iter_2.next()); +// } + +// #[test] +// fn test_row_as_slice_with_headers() { +// let table = Table::find_first(TABLE_TH_TD).unwrap(); +// let mut iter = table.iter(); + +// assert_eq!(&["John", "20"], iter.next().unwrap().as_slice()); +// assert_eq!(None, iter.next()); +// } + +// #[test] +// fn test_row_as_slice_with_headers_two_tables() { +// let tables = Table::find_all_tables(TWO_TABLES_TH_TD).unwrap(); +// let mut tables_iter = tables.iter(); +// let table_1 = tables_iter.next().unwrap(); +// let table_2 = tables_iter.next().unwrap(); +// let mut iter_1 = table_1.iter(); +// let mut iter_2 = table_2.iter(); + +// assert_eq!(&["John", "20"], iter_1.next().unwrap().as_slice()); +// assert_eq!(&["Mechanic", "Single"], iter_2.next().unwrap().as_slice()); +// assert_eq!(None, iter_1.next()); +// assert_eq!(None, iter_2.next()); +// } + +// #[test] +// fn test_row_as_slice_complex() { +// let table = Table::find_first(TABLE_COMPLEX).unwrap(); +// let mut iter = table.iter(); +// let empty: [&str; 0] = []; + +// assert_eq!(&["John", "20"], iter.next().unwrap().as_slice()); +// assert_eq!(&["May", "30", "foo"], iter.next().unwrap().as_slice()); +// assert_eq!(&empty, iter.next().unwrap().as_slice()); +// assert_eq!(&["a", "b", "c", "d"], iter.next().unwrap().as_slice()); +// assert_eq!(None, iter.next()); +// } + +// #[test] +// fn test_row_as_slice_complex_two_tables() { +// let tables = Table::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); +// let mut tables_iter = tables.iter(); +// let table_1 = tables_iter.next().unwrap(); +// let table_2 = tables_iter.next().unwrap(); +// let mut iter_1 = table_1.iter(); +// let mut iter_2 = table_2.iter(); +// let empty: [&str; 0] = []; + +// assert_eq!(&["John", "20"], iter_1.next().unwrap().as_slice()); +// assert_eq!(&["May", "30", "foo"], iter_1.next().unwrap().as_slice()); +// assert_eq!(&empty, iter_1.next().unwrap().as_slice()); +// assert_eq!(&["a", "b", "c", "d"], iter_1.next().unwrap().as_slice()); +// assert_eq!(None, iter_1.next()); +// assert_eq!(&["Carpenter", "Single"], iter_2.next().unwrap().as_slice()); +// assert_eq!( +// &["Mechanic", "Married", "bar"], +// iter_2.next().unwrap().as_slice() +// ); +// assert_eq!(&empty, iter_2.next().unwrap().as_slice()); +// assert_eq!(&["e", "f", "g", "h"], iter_2.next().unwrap().as_slice()); +// assert_eq!(None, iter_2.next()); +// } + +// #[test] +// fn test_row_iter_simple() { +// let table = Table::find_first(TABLE_TD).unwrap(); +// let row = table.iter().next().unwrap(); +// let mut iter = row.iter(); + +// assert_eq!(Some("Name"), iter.next().map(String::as_str)); +// assert_eq!(Some("Age"), iter.next().map(String::as_str)); +// assert_eq!(None, iter.next()); +// } + +// #[test] +// fn test_row_iter_simple_two_tables() { +// let tables = Table::find_all_tables(TWO_TABLES_TD).unwrap(); +// let mut tables_iter = tables.iter(); +// let table_1 = tables_iter.next().unwrap(); +// let table_2 = tables_iter.next().unwrap(); +// let row_1 = table_1.iter().next().unwrap(); +// let row_2 = table_2.iter().next().unwrap(); +// let mut iter_1 = row_1.iter(); +// let mut iter_2 = row_2.iter(); + +// assert_eq!(Some("Name"), iter_1.next().map(String::as_str)); +// assert_eq!(Some("Age"), iter_1.next().map(String::as_str)); +// assert_eq!(None, iter_1.next()); +// assert_eq!(Some("Profession"), iter_2.next().map(String::as_str)); +// assert_eq!(Some("Civil State"), iter_2.next().map(String::as_str)); +// assert_eq!(None, iter_2.next()); +// } + +// #[test] +// fn test_row_iter_complex() { +// let table = Table::find_first(TABLE_COMPLEX).unwrap(); +// let mut table_iter = table.iter(); + +// let row = table_iter.next().unwrap(); +// let mut iter = row.iter(); +// assert_eq!(Some("John"), iter.next().map(String::as_str)); +// assert_eq!(Some("20"), iter.next().map(String::as_str)); +// assert_eq!(None, iter.next()); + +// let row = table_iter.next().unwrap(); +// let mut iter = row.iter(); +// assert_eq!(Some("May"), iter.next().map(String::as_str)); +// assert_eq!(Some("30"), iter.next().map(String::as_str)); +// assert_eq!(Some("foo"), iter.next().map(String::as_str)); +// assert_eq!(None, iter.next()); + +// let row = table_iter.next().unwrap(); +// let mut iter = row.iter(); +// assert_eq!(None, iter.next()); + +// let row = table_iter.next().unwrap(); +// let mut iter = row.iter(); +// assert_eq!(Some("a"), iter.next().map(String::as_str)); +// assert_eq!(Some("b"), iter.next().map(String::as_str)); +// assert_eq!(Some("c"), iter.next().map(String::as_str)); +// assert_eq!(Some("d"), iter.next().map(String::as_str)); +// assert_eq!(None, iter.next()); +// } + +// #[test] +// fn test_row_iter_complex_two_tables() { +// let tables = Table::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); +// let mut tables_iter = tables.iter(); +// let mut table_1 = tables_iter.next().unwrap().iter(); +// let mut table_2 = tables_iter.next().unwrap().iter(); + +// let row_1 = table_1.next().unwrap(); +// let row_2 = table_2.next().unwrap(); +// let mut iter_1 = row_1.iter(); +// let mut iter_2 = row_2.iter(); +// assert_eq!(Some("John"), iter_1.next().map(String::as_str)); +// assert_eq!(Some("20"), iter_1.next().map(String::as_str)); +// assert_eq!(None, iter_1.next()); +// assert_eq!(Some("Carpenter"), iter_2.next().map(String::as_str)); +// assert_eq!(Some("Single"), iter_2.next().map(String::as_str)); +// assert_eq!(None, iter_2.next()); + +// let row_1 = table_1.next().unwrap(); +// let row_2 = table_2.next().unwrap(); +// let mut iter_1 = row_1.iter(); +// let mut iter_2 = row_2.iter(); +// assert_eq!(Some("May"), iter_1.next().map(String::as_str)); +// assert_eq!(Some("30"), iter_1.next().map(String::as_str)); +// assert_eq!(Some("foo"), iter_1.next().map(String::as_str)); +// assert_eq!(None, iter_1.next()); +// assert_eq!(Some("Mechanic"), iter_2.next().map(String::as_str)); +// assert_eq!(Some("Married"), iter_2.next().map(String::as_str)); +// assert_eq!(Some("bar"), iter_2.next().map(String::as_str)); +// assert_eq!(None, iter_2.next()); + +// let row_1 = table_1.next().unwrap(); +// let row_2 = table_2.next().unwrap(); +// let mut iter_1 = row_1.iter(); +// let mut iter_2 = row_2.iter(); +// assert_eq!(None, iter_1.next()); +// assert_eq!(None, iter_2.next()); + +// let row_1 = table_1.next().unwrap(); +// let row_2 = table_2.next().unwrap(); +// let mut iter_1 = row_1.iter(); +// let mut iter_2 = row_2.iter(); +// assert_eq!(Some("a"), iter_1.next().map(String::as_str)); +// assert_eq!(Some("b"), iter_1.next().map(String::as_str)); +// assert_eq!(Some("c"), iter_1.next().map(String::as_str)); +// assert_eq!(Some("d"), iter_1.next().map(String::as_str)); +// assert_eq!(None, iter_1.next()); +// assert_eq!(Some("e"), iter_2.next().map(String::as_str)); +// assert_eq!(Some("f"), iter_2.next().map(String::as_str)); +// assert_eq!(Some("g"), iter_2.next().map(String::as_str)); +// assert_eq!(Some("h"), iter_2.next().map(String::as_str)); +// assert_eq!(None, iter_2.next()); +// } + +// #[test] +// fn test_wikipedia_swapped_rows_columns() { +// // empty columns +// let cols = nu_protocol::value::Value { +// value: nu_protocol::UntaggedValue::Primitive(nu_protocol::Primitive::String( +// "".to_string(), +// )), +// tag: nu_source::Tag::unknown(), +// }; + +// // this table is taken straight from wikipedia with no changes +// let table = retrieve_tables(HTML_TABLE_WIKIPEDIA_COLUMNS_AS_ROWS, &cols, true); + +// let expected = vec![UntaggedValue::row(indexmap! { +// "Stable release".to_string() => UntaggedValue::string("\n 2103 (16.0.13901.20400) / April\u{a0}13, 2021; 4 months ago\u{a0}(2021-04-13)[1]\n ").into(), +// "Developer(s)".to_string() => UntaggedValue::string("Microsoft").into(), +// "Operating system".to_string() => UntaggedValue::string("Microsoft Windows").into(), +// "Type".to_string() => UntaggedValue::string("Spreadsheet").into(), +// "License".to_string() => UntaggedValue::string("Trialware[2]").into(), +// "".to_string() => UntaggedValue::string("").into(), +// "Website".to_string() => UntaggedValue::string("products.office.com/en-us/excel").into(), +// "Initial release".to_string() => UntaggedValue::string("1987; 34\u{a0}years ago\u{a0}(1987)").into(), +// }).into()]; + +// assert_eq!(table, expected); +// } + +// #[test] +// fn test_wikipedia_table_with_column_headers() { +// let cols = UntaggedValue::table(&[ +// UntaggedValue::string("Format".to_string()).into(), +// UntaggedValue::string("Extension".to_string()).into(), +// UntaggedValue::string("Description".to_string()).into(), +// ]) +// .into(); + +// // this table is taken straight from wikipedia with no changes +// let table = retrieve_tables(HTML_TABLE_WIKIPEDIA_WITH_COLUMN_NAMES, &cols, true); +// let expected = vec![ +// UntaggedValue::row(indexmap! { +// "Format".to_string() => UntaggedValue::string("Excel Workbook").into(), +// "Extension".to_string() => UntaggedValue::string(".xlsx").into(), +// "Description".to_string() => UntaggedValue::string("The default Excel 2007 and later workbook format. In reality, a Zip compressed archive with a directory structure of XML text documents. Functions as the primary +// +//replacement for the former binary .xls format, although it does not support Excel macros for security reasons. Saving as .xlsx offers file size reduction over .xls[38]").into(), +// }).into(), +// UntaggedValue::row(indexmap! { +// "Format".to_string() => UntaggedValue::string("Excel Macro-enabled Workbook").into(), +// "Extension".to_string() => UntaggedValue::string(".xlsm").into(), +// "Description".to_string() => UntaggedValue::string("As Excel Workbook, but with macro support.").into(), +// }).into(), +// UntaggedValue::row(indexmap! { +// "Format".to_string() => UntaggedValue::string("Excel Binary Workbook").into(), +// "Extension".to_string() => UntaggedValue::string(".xlsb").into(), +// "Description".to_string() => UntaggedValue::string("As Excel Macro-enabled Workbook, but storing information in binary form rather than XML documents for opening and saving documents more quickly and efficiently. Intended especially for very large documents with tens of thousands of rows, and/or several hundreds of columns. This format is very useful for shrinking large Excel files as is often the case when doing data analysis.").into(), +// }).into(), +// UntaggedValue::row(indexmap! { +// "Format".to_string() => UntaggedValue::string("Excel Macro-enabled Template").into(), +// "Extension".to_string() => UntaggedValue::string(".xltm").into(), +// "Description".to_string() => UntaggedValue::string("A template document that forms a basis for actual workbooks, with macro support. The replacement for the old .xlt format.").into(), +// }).into(), +// UntaggedValue::row(indexmap! { +// "Format".to_string() => UntaggedValue::string("Excel Add-in").into(), +// "Extension".to_string() => UntaggedValue::string(".xlam").into(), +// "Description".to_string() => UntaggedValue::string("Excel add-in to add extra functionality and tools. Inherent macro support because of the file purpose.").into(), +// }).into(), +// ]; + +// assert_eq!(table, expected); +// } +// } diff --git a/src/plugins/nu_plugin_extra_query.rs b/src/plugins/nu_plugin_extra_query.rs new file mode 100644 index 0000000000..f0c6a26deb --- /dev/null +++ b/src/plugins/nu_plugin_extra_query.rs @@ -0,0 +1,6 @@ +use nu_plugin::{serve_plugin, CapnpSerializer}; +use nu_plugin_query::Query; + +fn main() { + serve_plugin(&mut Query::new(), CapnpSerializer {}) +}