2020-11-03 22:46:42 +01:00
|
|
|
use nipper::Document;
|
|
|
|
use nu_protocol::{value::StringExt, Value};
|
2021-05-30 18:49:43 +02:00
|
|
|
use nu_source::Tag;
|
2020-11-03 22:46:42 +01:00
|
|
|
|
|
|
|
pub struct Selector {
|
|
|
|
pub query: String,
|
|
|
|
pub tag: Tag,
|
2020-11-09 20:37:32 +01:00
|
|
|
pub as_html: bool,
|
2021-05-30 18:49:43 +02:00
|
|
|
pub attribute: String,
|
2020-11-03 22:46:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Selector {
|
|
|
|
pub fn new() -> Selector {
|
|
|
|
Selector {
|
|
|
|
query: String::new(),
|
|
|
|
tag: Tag::unknown(),
|
2020-11-09 20:37:32 +01:00
|
|
|
as_html: false,
|
2021-05-30 18:49:43 +02:00
|
|
|
attribute: String::new(),
|
2020-11-03 22:46:42 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for Selector {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::new()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-30 18:49:43 +02:00
|
|
|
pub fn begin_selector_query(input_html: String, selector: &Selector) -> Vec<Value> {
|
|
|
|
match selector.attribute.is_empty() {
|
|
|
|
true => execute_selector_query(
|
|
|
|
input_html.as_str(),
|
|
|
|
selector.query.as_str(),
|
|
|
|
selector.as_html,
|
|
|
|
),
|
|
|
|
false => execute_selector_query_with_attribute(
|
|
|
|
input_html.as_str(),
|
|
|
|
selector.query.as_str(),
|
|
|
|
selector.attribute.as_str(),
|
|
|
|
),
|
|
|
|
}
|
2020-11-03 22:46:42 +01:00
|
|
|
}
|
|
|
|
|
2021-05-30 18:49:43 +02:00
|
|
|
fn execute_selector_query_with_attribute(
|
|
|
|
input_string: &str,
|
|
|
|
query_string: &str,
|
|
|
|
attribute: &str,
|
2021-02-12 11:13:14 +01:00
|
|
|
) -> Vec<Value> {
|
2021-05-30 18:49:43 +02:00
|
|
|
let doc = Document::from(input_string);
|
|
|
|
|
|
|
|
doc.select(&query_string)
|
|
|
|
.iter()
|
|
|
|
.map(|selection| {
|
|
|
|
selection
|
|
|
|
.attr_or(attribute, "")
|
|
|
|
.to_string()
|
|
|
|
.to_string_value_create_tag()
|
|
|
|
})
|
|
|
|
.collect()
|
|
|
|
}
|
2020-11-03 22:46:42 +01:00
|
|
|
|
2021-05-30 18:49:43 +02:00
|
|
|
fn execute_selector_query(input_string: &str, query_string: &str, as_html: bool) -> Vec<Value> {
|
|
|
|
let doc = Document::from(input_string);
|
2020-11-03 22:46:42 +01:00
|
|
|
|
2021-05-30 18:49:43 +02:00
|
|
|
match as_html {
|
|
|
|
true => doc
|
|
|
|
.select(&query_string)
|
|
|
|
.iter()
|
|
|
|
.map(|selection| selection.html().to_string().to_string_value_create_tag())
|
|
|
|
.collect(),
|
|
|
|
false => doc
|
|
|
|
.select(&query_string)
|
|
|
|
.iter()
|
|
|
|
.map(|selection| selection.text().to_string().to_string_value_create_tag())
|
|
|
|
.collect(),
|
2020-11-09 20:37:32 +01:00
|
|
|
}
|
2020-11-03 22:46:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use nipper::Document;
|
|
|
|
|
|
|
|
#[test]
|
2021-02-12 11:13:14 +01:00
|
|
|
fn create_document_from_string() {
|
2020-11-03 22:46:42 +01:00
|
|
|
let html = r#"<div name="foo" value="bar"></div>"#;
|
|
|
|
let document = Document::from(html);
|
|
|
|
let shouldbe =
|
|
|
|
r#"<html><head></head><body><div name="foo" value="bar"></div></body></html>"#;
|
|
|
|
|
|
|
|
assert_eq!(shouldbe.to_string(), document.html().to_string());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2021-02-12 11:13:14 +01:00
|
|
|
fn modify_html_document() {
|
2020-11-03 22:46:42 +01:00
|
|
|
let html = r#"<div name="foo" value="bar"></div>"#;
|
|
|
|
let document = Document::from(html);
|
|
|
|
let mut input = document.select(r#"div[name="foo"]"#);
|
|
|
|
input.set_attr("id", "input");
|
|
|
|
input.remove_attr("name");
|
|
|
|
|
|
|
|
let shouldbe = "bar".to_string();
|
|
|
|
let actual = input.attr("value").unwrap().to_string();
|
|
|
|
|
|
|
|
assert_eq!(shouldbe, actual);
|
|
|
|
}
|
|
|
|
|
|
|
|
// #[test]
|
|
|
|
// fn test_hacker_news() -> Result<(), ShellError> {
|
|
|
|
// let html = reqwest::blocking::get("https://news.ycombinator.com")?.text()?;
|
|
|
|
// let document = Document::from(&html);
|
|
|
|
// let result = query(html, ".hnname a".to_string(), Tag::unknown());
|
|
|
|
// let shouldbe = Ok(vec!["Hacker News".to_str_value_create_tag()]);
|
|
|
|
// assert_eq!(shouldbe, result);
|
|
|
|
// Ok(())
|
|
|
|
// }
|
|
|
|
}
|