mirror of
https://github.com/nushell/nushell.git
synced 2024-11-21 16:03:19 +01:00
web scraping with css selectors (#2725)
* first step of making selector * wip * wip tests working * probably good enough for a first pass * oops, missed something. * and something else... * grrrr version errors
This commit is contained in:
parent
b674cee9d2
commit
97f3671e2c
297
Cargo.lock
generated
297
Cargo.lock
generated
@ -981,6 +981,33 @@ dependencies = [
|
||||
"generic-array 0.8.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser"
|
||||
version = "0.27.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
|
||||
dependencies = [
|
||||
"cssparser-macros",
|
||||
"dtoa-short",
|
||||
"itoa",
|
||||
"matches",
|
||||
"phf",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"smallvec 1.4.2",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser-macros"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.1.3"
|
||||
@ -1096,6 +1123,17 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "0.99.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41cb0e6161ad61ed084a36ba71fbba9e3ac5aee3606fb607fe08da6acbcf3d8c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.6.2"
|
||||
@ -1205,6 +1243,15 @@ version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "134951f4028bdadb9b84baf4232681efbf277da25144b9b0ad65df75946c422b"
|
||||
|
||||
[[package]]
|
||||
name = "dtoa-short"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2"
|
||||
dependencies = [
|
||||
"dtoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtparse"
|
||||
version = "1.2.0"
|
||||
@ -1488,6 +1535,16 @@ version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b"
|
||||
dependencies = [
|
||||
"mac",
|
||||
"new_debug_unreachable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.1.29"
|
||||
@ -1704,6 +1761,15 @@ dependencies = [
|
||||
"pin-project",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gcc"
|
||||
version = "0.3.55"
|
||||
@ -2035,6 +2101,20 @@ dependencies = [
|
||||
"sha1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.25.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
|
||||
dependencies = [
|
||||
"log 0.4.11",
|
||||
"mac",
|
||||
"markup5ever",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "htmlescape"
|
||||
version = "0.3.1"
|
||||
@ -2566,6 +2646,12 @@ dependencies = [
|
||||
"linked-hash-map 0.5.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "macaddr"
|
||||
version = "1.0.1"
|
||||
@ -2596,6 +2682,23 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab"
|
||||
dependencies = [
|
||||
"log 0.4.11",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"serde 1.0.115",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"string_cache",
|
||||
"string_cache_codegen",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matches"
|
||||
version = "0.1.8"
|
||||
@ -2799,6 +2902,25 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
|
||||
|
||||
[[package]]
|
||||
name = "nipper"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "576d0e437aa08b447a207584463febe639d00b26b63121a9c038eff8371e0050"
|
||||
dependencies = [
|
||||
"cssparser",
|
||||
"html5ever",
|
||||
"markup5ever",
|
||||
"selectors",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.17.0"
|
||||
@ -2885,6 +3007,7 @@ dependencies = [
|
||||
"nu_plugin_post",
|
||||
"nu_plugin_ps",
|
||||
"nu_plugin_s3",
|
||||
"nu_plugin_selector",
|
||||
"nu_plugin_start",
|
||||
"nu_plugin_sys",
|
||||
"nu_plugin_textview",
|
||||
@ -3288,6 +3411,18 @@ dependencies = [
|
||||
"s3handler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu_plugin_selector"
|
||||
version = "0.22.0"
|
||||
dependencies = [
|
||||
"nipper",
|
||||
"nu-errors",
|
||||
"nu-plugin",
|
||||
"nu-protocol",
|
||||
"nu-source",
|
||||
"nu-test-support",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu_plugin_start"
|
||||
version = "0.22.0"
|
||||
@ -3808,6 +3943,60 @@ dependencies = [
|
||||
"indexmap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
|
||||
dependencies = [
|
||||
"phf_macros",
|
||||
"phf_shared",
|
||||
"proc-macro-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"rand 0.7.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "0.4.23"
|
||||
@ -3896,6 +4085,12 @@ version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20"
|
||||
|
||||
[[package]]
|
||||
name = "precomputed-hash"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
|
||||
[[package]]
|
||||
name = "pretty"
|
||||
version = "0.5.2"
|
||||
@ -4142,7 +4337,7 @@ dependencies = [
|
||||
"rand_isaac",
|
||||
"rand_jitter",
|
||||
"rand_os",
|
||||
"rand_pcg",
|
||||
"rand_pcg 0.1.2",
|
||||
"rand_xorshift",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
@ -4158,6 +4353,7 @@ dependencies = [
|
||||
"rand_chacha 0.2.2",
|
||||
"rand_core 0.5.1",
|
||||
"rand_hc 0.2.0",
|
||||
"rand_pcg 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -4266,6 +4462,15 @@ dependencies = [
|
||||
"rand_core 0.4.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_pcg"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
|
||||
dependencies = [
|
||||
"rand_core 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_xorshift"
|
||||
version = "0.1.1"
|
||||
@ -4697,6 +4902,26 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "selectors"
|
||||
version = "0.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cssparser",
|
||||
"derive_more",
|
||||
"fxhash",
|
||||
"log 0.4.11",
|
||||
"matches",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"precomputed-hash",
|
||||
"servo_arc",
|
||||
"smallvec 1.4.2",
|
||||
"thin-slice",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "0.9.0"
|
||||
@ -4865,6 +5090,16 @@ dependencies = [
|
||||
"yaml-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "servo_arc"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432"
|
||||
dependencies = [
|
||||
"nodrop",
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha-1"
|
||||
version = "0.8.2"
|
||||
@ -4945,6 +5180,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.3.0"
|
||||
@ -5022,6 +5263,12 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
@ -5043,6 +5290,31 @@ dependencies = [
|
||||
"bytes 0.4.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2940c75beb4e3bf3a494cef919a747a2cb81e52571e212bfbd185074add7208a"
|
||||
dependencies = [
|
||||
"lazy_static 1.4.0",
|
||||
"new_debug_unreachable",
|
||||
"phf_shared",
|
||||
"precomputed-hash",
|
||||
"serde 1.0.115",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache_codegen"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strip-ansi-escapes"
|
||||
version = "0.1.0"
|
||||
@ -5167,6 +5439,17 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b"
|
||||
dependencies = [
|
||||
"futf",
|
||||
"mac",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "term"
|
||||
version = "0.6.1"
|
||||
@ -5225,6 +5508,12 @@ dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thin-slice"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.20"
|
||||
@ -5718,6 +6007,12 @@ dependencies = [
|
||||
"log 0.4.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf-8"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7"
|
||||
|
||||
[[package]]
|
||||
name = "utf8-ranges"
|
||||
version = "1.0.4"
|
||||
|
@ -44,6 +44,7 @@ nu_plugin_to_bson = {version = "0.22.0", path = "./crates/nu_plugin_to_bson", op
|
||||
nu_plugin_to_sqlite = {version = "0.22.0", path = "./crates/nu_plugin_to_sqlite", optional = true}
|
||||
nu_plugin_tree = {version = "0.22.0", path = "./crates/nu_plugin_tree", optional = true}
|
||||
nu_plugin_xpath = {version = "0.22.0", path = "./crates/nu_plugin_xpath", optional = true}
|
||||
nu_plugin_selector = {version = "0.22.0", path = "./crates/nu_plugin_selector", optional = true}
|
||||
|
||||
# Required to bootstrap the main binary
|
||||
clap = "2.33.3"
|
||||
@ -88,7 +89,7 @@ default = [
|
||||
"fetch",
|
||||
"rich-benchmark",
|
||||
]
|
||||
extra = ["default", "binaryview", "tree", "clipboard-cli", "trash-support", "start", "bson", "sqlite", "s3", "chart", "xpath"]
|
||||
extra = ["default", "binaryview", "tree", "clipboard-cli", "trash-support", "start", "bson", "sqlite", "s3", "chart", "xpath", "selector"]
|
||||
stable = ["default"]
|
||||
|
||||
wasi = ["inc", "match", "directories-support", "ptree-support", "match", "tree", "rustyline-support"]
|
||||
@ -115,6 +116,7 @@ start = ["nu_plugin_start"]
|
||||
trash-support = ["nu-cli/trash-support"]
|
||||
tree = ["nu_plugin_tree"]
|
||||
xpath = ["nu_plugin_xpath"]
|
||||
selector = ["nu_plugin_selector"]
|
||||
|
||||
# Core plugins that ship with `cargo install nu` by default
|
||||
# Currently, Cargo limits us to installing only one binary
|
||||
@ -191,6 +193,11 @@ name = "nu_plugin_extra_xpath"
|
||||
path = "src/plugins/nu_plugin_extra_xpath.rs"
|
||||
required-features = ["xpath"]
|
||||
|
||||
[[bin]]
|
||||
name = "nu_plugin_extra_selector"
|
||||
path = "src/plugins/nu_plugin_extra_selector.rs"
|
||||
required-features = ["selector"]
|
||||
|
||||
[[bin]]
|
||||
name = "nu_plugin_extra_from_bson"
|
||||
path = "src/plugins/nu_plugin_extra_from_bson.rs"
|
||||
|
20
crates/nu_plugin_selector/Cargo.toml
Normal file
20
crates/nu_plugin_selector/Cargo.toml
Normal file
@ -0,0 +1,20 @@
|
||||
[package]
|
||||
authors = ["The Nu Project Contributors"]
|
||||
description = "web scraping using css selector"
|
||||
edition = "2018"
|
||||
license = "MIT"
|
||||
name = "nu_plugin_selector"
|
||||
version = "0.22.0"
|
||||
|
||||
[lib]
|
||||
doctest = false
|
||||
|
||||
[dependencies]
|
||||
nu-errors = {version = "0.22.0", path = "../nu-errors"}
|
||||
nu-plugin = {version = "0.22.0", path = "../nu-plugin"}
|
||||
nu-protocol = {version = "0.22.0", path = "../nu-protocol"}
|
||||
nu-source = {version = "0.22.0", path = "../nu-source"}
|
||||
nipper = "0.1.8"
|
||||
|
||||
[dev-dependencies]
|
||||
nu-test-support = {path = "../nu-test-support", version = "0.22.0"}
|
4
crates/nu_plugin_selector/src/lib.rs
Normal file
4
crates/nu_plugin_selector/src/lib.rs
Normal file
@ -0,0 +1,4 @@
|
||||
mod nu;
|
||||
mod selector;
|
||||
|
||||
pub use selector::Selector;
|
6
crates/nu_plugin_selector/src/main.rs
Normal file
6
crates/nu_plugin_selector/src/main.rs
Normal file
@ -0,0 +1,6 @@
|
||||
use nu_plugin::serve_plugin;
|
||||
use nu_plugin_selector::Selector;
|
||||
|
||||
fn main() {
|
||||
serve_plugin(&mut Selector::new());
|
||||
}
|
52
crates/nu_plugin_selector/src/nu/mod.rs
Normal file
52
crates/nu_plugin_selector/src/nu/mod.rs
Normal file
@ -0,0 +1,52 @@
|
||||
use nu_errors::ShellError;
|
||||
use nu_plugin::Plugin;
|
||||
use nu_protocol::{
|
||||
CallInfo, Primitive, ReturnSuccess, ReturnValue, Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use nu_source::TaggedItem;
|
||||
|
||||
use crate::{selector::begin_selector_query, Selector};
|
||||
|
||||
impl Plugin for Selector {
|
||||
fn config(&mut self) -> Result<Signature, ShellError> {
|
||||
Ok(Signature::build("selector")
|
||||
.desc("execute selector query on html/web")
|
||||
.required("query", SyntaxShape::String, "selector query")
|
||||
.filter())
|
||||
}
|
||||
|
||||
fn begin_filter(&mut self, call_info: CallInfo) -> Result<Vec<ReturnValue>, ShellError> {
|
||||
let tag = call_info.name_tag;
|
||||
let query = call_info.args.nth(0).ok_or_else(|| {
|
||||
ShellError::labeled_error(
|
||||
"selector query not passed",
|
||||
"selector query not passed",
|
||||
&tag,
|
||||
)
|
||||
})?;
|
||||
|
||||
self.query = query.as_string()?;
|
||||
self.tag = tag;
|
||||
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
fn filter(&mut self, input: Value) -> Result<Vec<ReturnValue>, ShellError> {
|
||||
match input {
|
||||
Value {
|
||||
value: UntaggedValue::Primitive(Primitive::String(s)),
|
||||
..
|
||||
} => Ok(begin_selector_query(s, (*self.query).tagged(&self.tag))?
|
||||
.into_iter()
|
||||
.map(ReturnSuccess::value)
|
||||
.collect()),
|
||||
Value { tag, .. } => Err(ShellError::labeled_error_with_secondary(
|
||||
"Expected text from pipeline",
|
||||
"requires text input",
|
||||
&self.tag,
|
||||
"value originates from here",
|
||||
tag,
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
100
crates/nu_plugin_selector/src/selector.rs
Normal file
100
crates/nu_plugin_selector/src/selector.rs
Normal file
@ -0,0 +1,100 @@
|
||||
use nipper::Document;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{value::StringExt, Value};
|
||||
use nu_source::{Tag, Tagged};
|
||||
|
||||
pub struct Selector {
|
||||
pub query: String,
|
||||
pub tag: Tag,
|
||||
}
|
||||
|
||||
impl Selector {
|
||||
pub fn new() -> Selector {
|
||||
Selector {
|
||||
query: String::new(),
|
||||
tag: Tag::unknown(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Selector {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn begin_selector_query(raw: String, query: Tagged<&str>) -> Result<Vec<Value>, ShellError> {
|
||||
execute_selector_query(raw, query.item.to_string(), query.tag())
|
||||
}
|
||||
|
||||
fn execute_selector_query(
|
||||
input_string: String,
|
||||
query_string: String,
|
||||
tag: impl Into<Tag>,
|
||||
) -> Result<Vec<Value>, ShellError> {
|
||||
let _tag = tag.into();
|
||||
let mut ret = vec![];
|
||||
let doc = Document::from(&input_string);
|
||||
|
||||
// How to internally iterate
|
||||
// doc.nip("tr.athing").iter().for_each(|athing| {
|
||||
// let title = format!("{}", athing.select(".title a").text().to_string());
|
||||
// let href = athing
|
||||
// .select(".storylink")
|
||||
// .attr("href")
|
||||
// .unwrap()
|
||||
// .to_string();
|
||||
// let title_url = format!("{} - {}\n", title, href);
|
||||
// ret.push(title_url.to_string_value_create_tag());
|
||||
// });
|
||||
|
||||
doc.nip(&query_string).iter().for_each(|athing| {
|
||||
ret.push(athing.text().to_string().to_string_value_create_tag());
|
||||
});
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use nipper::Document;
|
||||
use nu_errors::ShellError;
|
||||
|
||||
#[test]
|
||||
fn create_document_from_string() -> Result<(), ShellError> {
|
||||
let html = r#"<div name="foo" value="bar"></div>"#;
|
||||
let document = Document::from(html);
|
||||
let shouldbe =
|
||||
r#"<html><head></head><body><div name="foo" value="bar"></div></body></html>"#;
|
||||
|
||||
assert_eq!(shouldbe.to_string(), document.html().to_string());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn modify_html_document() -> Result<(), ShellError> {
|
||||
let html = r#"<div name="foo" value="bar"></div>"#;
|
||||
let document = Document::from(html);
|
||||
let mut input = document.select(r#"div[name="foo"]"#);
|
||||
input.set_attr("id", "input");
|
||||
input.remove_attr("name");
|
||||
|
||||
let shouldbe = "bar".to_string();
|
||||
let actual = input.attr("value").unwrap().to_string();
|
||||
|
||||
assert_eq!(shouldbe, actual);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// #[test]
|
||||
// fn test_hacker_news() -> Result<(), ShellError> {
|
||||
// let html = reqwest::blocking::get("https://news.ycombinator.com")?.text()?;
|
||||
// let document = Document::from(&html);
|
||||
// let result = query(html, ".hnname a".to_string(), Tag::unknown());
|
||||
// let shouldbe = Ok(vec!["Hacker News".to_str_value_create_tag()]);
|
||||
// assert_eq!(shouldbe, result);
|
||||
// Ok(())
|
||||
// }
|
||||
}
|
6
src/plugins/nu_plugin_extra_selector.rs
Normal file
6
src/plugins/nu_plugin_extra_selector.rs
Normal file
@ -0,0 +1,6 @@
|
||||
use nu_plugin::serve_plugin;
|
||||
use nu_plugin_selector::Selector;
|
||||
|
||||
fn main() {
|
||||
serve_plugin(&mut Selector::new());
|
||||
}
|
10
wix/main.wxs
10
wix/main.wxs
@ -272,7 +272,14 @@
|
||||
Source='target\$(var.Profile)\nu_plugin_to_sqlite.exe'
|
||||
KeyPath='yes'/>
|
||||
</Component>
|
||||
|
||||
<Component Id='binary23' Guid='*' Win64='$(var.Win64)'>
|
||||
<File
|
||||
Id='exe23'
|
||||
Name='nu_plugin_selector.exe'
|
||||
DiskId='1'
|
||||
Source='target\$(var.Profile)\nu_plugin_selector.exe'
|
||||
KeyPath='yes'/>
|
||||
</Component>
|
||||
</Directory>
|
||||
</Directory>
|
||||
</Directory>
|
||||
@ -316,6 +323,7 @@
|
||||
<ComponentRef Id='binary20'/>
|
||||
<ComponentRef Id='binary21'/>
|
||||
<ComponentRef Id='binary22'/>
|
||||
<ComponentRef Id='binary23'/>
|
||||
|
||||
<Feature
|
||||
Id='Environment'
|
||||
|
Loading…
Reference in New Issue
Block a user