2020-11-03 22:46:42 +01:00
|
|
|
use nipper::Document;
|
|
|
|
use nu_protocol::{value::StringExt, Value};
|
|
|
|
use nu_source::{Tag, Tagged};
|
|
|
|
|
|
|
|
pub struct Selector {
|
|
|
|
pub query: String,
|
|
|
|
pub tag: Tag,
|
2020-11-09 20:37:32 +01:00
|
|
|
pub as_html: bool,
|
2020-11-03 22:46:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Selector {
|
|
|
|
pub fn new() -> Selector {
|
|
|
|
Selector {
|
|
|
|
query: String::new(),
|
|
|
|
tag: Tag::unknown(),
|
2020-11-09 20:37:32 +01:00
|
|
|
as_html: false,
|
2020-11-03 22:46:42 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for Selector {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::new()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-12 11:13:14 +01:00
|
|
|
pub fn begin_selector_query(input: String, query: Tagged<&str>, as_html: bool) -> Vec<Value> {
|
2020-11-09 20:37:32 +01:00
|
|
|
execute_selector_query(input, query.item.to_string(), query.tag(), as_html)
|
2020-11-03 22:46:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
fn execute_selector_query(
|
|
|
|
input_string: String,
|
|
|
|
query_string: String,
|
|
|
|
tag: impl Into<Tag>,
|
2020-11-09 20:37:32 +01:00
|
|
|
as_html: bool,
|
2021-02-12 11:13:14 +01:00
|
|
|
) -> Vec<Value> {
|
2020-11-03 22:46:42 +01:00
|
|
|
let _tag = tag.into();
|
|
|
|
let mut ret = vec![];
|
|
|
|
let doc = Document::from(&input_string);
|
|
|
|
|
|
|
|
// How to internally iterate
|
|
|
|
// doc.nip("tr.athing").iter().for_each(|athing| {
|
|
|
|
// let title = format!("{}", athing.select(".title a").text().to_string());
|
|
|
|
// let href = athing
|
|
|
|
// .select(".storylink")
|
|
|
|
// .attr("href")
|
|
|
|
// .unwrap()
|
|
|
|
// .to_string();
|
|
|
|
// let title_url = format!("{} - {}\n", title, href);
|
|
|
|
// ret.push(title_url.to_string_value_create_tag());
|
|
|
|
// });
|
|
|
|
|
2020-11-09 20:37:32 +01:00
|
|
|
if as_html {
|
|
|
|
doc.nip(&query_string).iter().for_each(|athing| {
|
|
|
|
ret.push(athing.html().to_string().to_string_value_create_tag());
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
doc.nip(&query_string).iter().for_each(|athing| {
|
|
|
|
ret.push(athing.text().to_string().to_string_value_create_tag());
|
|
|
|
});
|
|
|
|
}
|
2021-02-12 11:13:14 +01:00
|
|
|
ret
|
2020-11-03 22:46:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use nipper::Document;
|
|
|
|
|
|
|
|
#[test]
|
2021-02-12 11:13:14 +01:00
|
|
|
fn create_document_from_string() {
|
2020-11-03 22:46:42 +01:00
|
|
|
let html = r#"<div name="foo" value="bar"></div>"#;
|
|
|
|
let document = Document::from(html);
|
|
|
|
let shouldbe =
|
|
|
|
r#"<html><head></head><body><div name="foo" value="bar"></div></body></html>"#;
|
|
|
|
|
|
|
|
assert_eq!(shouldbe.to_string(), document.html().to_string());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2021-02-12 11:13:14 +01:00
|
|
|
fn modify_html_document() {
|
2020-11-03 22:46:42 +01:00
|
|
|
let html = r#"<div name="foo" value="bar"></div>"#;
|
|
|
|
let document = Document::from(html);
|
|
|
|
let mut input = document.select(r#"div[name="foo"]"#);
|
|
|
|
input.set_attr("id", "input");
|
|
|
|
input.remove_attr("name");
|
|
|
|
|
|
|
|
let shouldbe = "bar".to_string();
|
|
|
|
let actual = input.attr("value").unwrap().to_string();
|
|
|
|
|
|
|
|
assert_eq!(shouldbe, actual);
|
|
|
|
}
|
|
|
|
|
|
|
|
// #[test]
|
|
|
|
// fn test_hacker_news() -> Result<(), ShellError> {
|
|
|
|
// let html = reqwest::blocking::get("https://news.ycombinator.com")?.text()?;
|
|
|
|
// let document = Document::from(&html);
|
|
|
|
// let result = query(html, ".hnname a".to_string(), Tag::unknown());
|
|
|
|
// let shouldbe = Ok(vec!["Hacker News".to_str_value_create_tag()]);
|
|
|
|
// assert_eq!(shouldbe, result);
|
|
|
|
// Ok(())
|
|
|
|
// }
|
|
|
|
}
|