1
0
mirror of https://github.com/nushell/nushell.git synced 2025-05-05 18:44:24 +02:00
nushell/crates/nu_plugin_selector/src/selector.rs
2021-12-02 08:48:03 +13:00

232 lines
6.8 KiB
Rust

use crate::Table;
use nu_protocol::{value::StringExt, Primitive, TaggedDictBuilder, UntaggedValue, Value};
use nu_source::Tag;
use scraper::{Html, Selector as ScraperSelector};
pub struct Selector {
pub query: String,
pub tag: Tag,
pub as_html: bool,
pub attribute: String,
pub as_table: Value,
pub inspect: bool,
}
impl Selector {
pub fn new() -> Selector {
Selector {
query: String::new(),
tag: Tag::unknown(),
as_html: false,
attribute: String::new(),
as_table: Value::new(
UntaggedValue::Primitive(Primitive::String("".to_string())),
Tag::unknown(),
),
inspect: false,
}
}
}
impl Default for Selector {
fn default() -> Self {
Self::new()
}
}
pub fn begin_selector_query(input_html: String, selector: &Selector) -> Vec<Value> {
if !selector.as_table.value.is_string() {
retrieve_tables(input_html.as_str(), &selector.as_table, selector.inspect)
} else {
match selector.attribute.is_empty() {
true => execute_selector_query(
input_html.as_str(),
selector.query.as_str(),
selector.as_html,
),
false => execute_selector_query_with_attribute(
input_html.as_str(),
selector.query.as_str(),
selector.attribute.as_str(),
),
}
}
}
pub fn retrieve_tables(input_string: &str, columns: &Value, inspect_mode: bool) -> Vec<Value> {
let html = input_string;
let mut cols = Vec::new();
if let UntaggedValue::Table(t) = &columns.value {
for x in t {
cols.push(x.convert_to_string());
}
}
if inspect_mode {
eprintln!("Passed in Column Headers = {:#?}", &cols,);
}
let tables = match Table::find_by_headers(html, &cols) {
Some(t) => {
if inspect_mode {
eprintln!("Table Found = {:#?}", &t);
}
t
}
None => vec![Table::empty()],
};
if tables.len() == 1 {
return retrieve_table(
tables
.into_iter()
.next()
.expect("This should never trigger"),
columns,
);
}
tables
.into_iter()
.map(move |table| {
UntaggedValue::Table(retrieve_table(table, columns)).into_value(Tag::unknown())
})
.collect()
}
fn retrieve_table(mut table: Table, columns: &Value) -> Vec<Value> {
let mut cols = Vec::new();
if let UntaggedValue::Table(t) = &columns.value {
for x in t {
cols.push(x.convert_to_string());
}
}
if cols.is_empty() && !table.headers().is_empty() {
for col in table.headers().keys() {
cols.push(col.to_string());
}
}
let mut table_out = Vec::new();
// sometimes there are tables where the first column is the headers, kind of like
// a table has ben rotated ccw 90 degrees, in these cases all columns will be missing
// we keep track of this with this variable so we can deal with it later
let mut at_least_one_row_filled = false;
// if columns are still empty, let's just make a single column table with the data
if cols.is_empty() {
at_least_one_row_filled = true;
let table_with_no_empties: Vec<_> = table.iter().filter(|item| !item.is_empty()).collect();
for row in &table_with_no_empties {
let mut dict = TaggedDictBuilder::new(Tag::unknown());
for (counter, cell) in row.iter().enumerate() {
let col_name = format!("Column{}", counter);
dict.insert_value(
col_name,
UntaggedValue::Primitive(Primitive::String(cell.to_string()))
.into_value(Tag::unknown()),
);
}
table_out.push(dict.into_value());
}
} else {
for row in &table {
let mut dict = TaggedDictBuilder::new(Tag::unknown());
// eprintln!("row={:?}", &row);
for col in &cols {
//eprintln!("col={:?}", &col);
let key = col.to_string();
let val = row
.get(col)
.unwrap_or(&format!("Missing column: '{}'", &col))
.to_string();
if !at_least_one_row_filled && val != format!("Missing column: '{}'", &col) {
at_least_one_row_filled = true;
}
dict.insert_value(
key,
UntaggedValue::Primitive(Primitive::String(val)).into_value(Tag::unknown()),
);
}
table_out.push(dict.into_value());
}
}
if !at_least_one_row_filled {
let mut data2 = Vec::new();
for x in &table.data {
data2.push(x.join(", "));
}
table.data = vec![data2];
return retrieve_table(table, columns);
}
table_out
}
fn execute_selector_query_with_attribute(
input_string: &str,
query_string: &str,
attribute: &str,
) -> Vec<Value> {
let doc = Html::parse_fragment(input_string);
doc.select(&css(query_string))
.map(|selection| {
selection
.value()
.attr(attribute)
.unwrap_or("")
.to_string()
.to_string_value_create_tag()
})
.collect()
}
fn execute_selector_query(input_string: &str, query_string: &str, as_html: bool) -> Vec<Value> {
let doc = Html::parse_fragment(input_string);
match as_html {
true => doc
.select(&css(query_string))
.map(|selection| selection.html().to_string_value_create_tag())
.collect(),
false => doc
.select(&css(query_string))
.map(|selection| {
selection
.text()
.fold("".to_string(), |acc, x| format!("{}{}", acc, x))
.to_string_value_create_tag()
})
.collect(),
}
}
pub fn css(selector: &str) -> ScraperSelector {
ScraperSelector::parse(selector).expect("this should never trigger")
}
#[cfg(test)]
mod tests {
use super::*;
const SIMPLE_LIST: &str = r#"
<ul>
<li>Coffee</li>
<li>Tea</li>
<li>Milk</li>
</ul>
"#;
#[test]
fn test_first_child_is_not_empty() {
assert!(!execute_selector_query(SIMPLE_LIST, "li:first-child", false).is_empty())
}
#[test]
fn test_first_child() {
assert_eq!(
vec!["Coffee".to_string().to_string_value_create_tag()],
execute_selector_query(SIMPLE_LIST, "li:first-child", false)
)
}
}