mirror of
https://github.com/nushell/nushell.git
synced 2025-06-30 22:50:14 +02:00
query command with json, web, xml (#870)
* query command with json, web, xml * query xml now working * clippy * comment out web tests * Initial work on query web For now we can query everything except tables * Support for querying tables Now we can query multiple tables just like before, now the only thing missing is the test coverage * finish off * comment out web test Co-authored-by: Luccas Mateus de Medeiros Gomes <luccasmmg@gmail.com>
This commit is contained in:
19
crates/nu_plugin_query/Cargo.toml
Normal file
19
crates/nu_plugin_query/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[package]
|
||||
authors = ["The Nu Project Contributors"]
|
||||
description = "A set of query commands for Nushell"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
name = "nu_plugin_query"
|
||||
version = "0.1.0"
|
||||
|
||||
[lib]
|
||||
doctest = false
|
||||
|
||||
[dependencies]
|
||||
nu-plugin = { path="../nu-plugin", version = "0.1.0" }
|
||||
nu-protocol = { path="../nu-protocol", version = "0.1.0" }
|
||||
nu-engine = { path="../nu-engine", version = "0.1.0" }
|
||||
gjson = "0.8.0"
|
||||
scraper = "0.12.0"
|
||||
sxd-document = "0.3.2"
|
||||
sxd-xpath = "0.4.2"
|
12
crates/nu_plugin_query/src/lib.rs
Normal file
12
crates/nu_plugin_query/src/lib.rs
Normal file
@ -0,0 +1,12 @@
|
||||
mod nu;
|
||||
mod query;
|
||||
mod query_json;
|
||||
mod query_web;
|
||||
mod query_xml;
|
||||
mod web_tables;
|
||||
|
||||
pub use query::Query;
|
||||
pub use query_json::execute_json_query;
|
||||
pub use query_web::parse_selector_params;
|
||||
pub use query_xml::execute_xpath_query;
|
||||
pub use web_tables::WebTable;
|
6
crates/nu_plugin_query/src/main.rs
Normal file
6
crates/nu_plugin_query/src/main.rs
Normal file
@ -0,0 +1,6 @@
|
||||
use nu_plugin::{serve_plugin, CapnpSerializer};
|
||||
use nu_plugin_query::Query;
|
||||
|
||||
fn main() {
|
||||
serve_plugin(&mut Query {}, CapnpSerializer {})
|
||||
}
|
70
crates/nu_plugin_query/src/nu/mod.rs
Normal file
70
crates/nu_plugin_query/src/nu/mod.rs
Normal file
@ -0,0 +1,70 @@
|
||||
use crate::Query;
|
||||
use nu_plugin::{EvaluatedCall, LabeledError, Plugin};
|
||||
use nu_protocol::{Category, Signature, Spanned, SyntaxShape, Value};
|
||||
|
||||
impl Plugin for Query {
|
||||
fn signature(&self) -> Vec<Signature> {
|
||||
vec![
|
||||
Signature::build("query")
|
||||
.desc("Show all the query commands")
|
||||
.category(Category::Filters),
|
||||
|
||||
Signature::build("query json")
|
||||
.desc("execute json query on json file (open --raw <file> | query json 'query string')")
|
||||
.required("query", SyntaxShape::String, "json query")
|
||||
.category(Category::Filters),
|
||||
|
||||
Signature::build("query xml")
|
||||
.desc("execute xpath query on xml")
|
||||
.required("query", SyntaxShape::String, "xpath query")
|
||||
.category(Category::Filters),
|
||||
|
||||
Signature::build("query web")
|
||||
.desc("execute selector query on html/web")
|
||||
.named("query", SyntaxShape::String, "selector query", Some('q'))
|
||||
.switch("as_html", "return the query output as html", Some('m'))
|
||||
.named(
|
||||
"attribute",
|
||||
SyntaxShape::String,
|
||||
"downselect based on the given attribute",
|
||||
Some('a'),
|
||||
)
|
||||
.named(
|
||||
"as_table",
|
||||
SyntaxShape::Table,
|
||||
"find table based on column header list",
|
||||
Some('t'),
|
||||
)
|
||||
.switch(
|
||||
"inspect",
|
||||
"run in inspect mode to provide more information for determining column headers",
|
||||
Some('i'),
|
||||
)
|
||||
.category(Category::Network),
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&mut self,
|
||||
name: &str,
|
||||
call: &EvaluatedCall,
|
||||
input: &Value,
|
||||
) -> Result<Value, LabeledError> {
|
||||
// You can use the name to identify what plugin signature was called
|
||||
let path: Option<Spanned<String>> = call.opt(0)?;
|
||||
|
||||
match name {
|
||||
"query" => {
|
||||
self.query(name, call, input, path)
|
||||
}
|
||||
"query json" => self.query_json( name, call, input, path),
|
||||
"query web" => self.query_web(name, call, input, path),
|
||||
"query xml" => self.query_xml(name, call, input, path),
|
||||
_ => Err(LabeledError {
|
||||
label: "Plugin call with wrong name signature".into(),
|
||||
msg: "the signature used to call the plugin does not match any name in the plugin signature vector".into(),
|
||||
span: Some(call.head),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
75
crates/nu_plugin_query/src/query.rs
Normal file
75
crates/nu_plugin_query/src/query.rs
Normal file
@ -0,0 +1,75 @@
|
||||
use crate::query_json::execute_json_query;
|
||||
use crate::query_web::parse_selector_params;
|
||||
use crate::query_xml::execute_xpath_query;
|
||||
use nu_engine::documentation::get_flags_section;
|
||||
use nu_plugin::{EvaluatedCall, LabeledError, Plugin};
|
||||
use nu_protocol::{Signature, Spanned, Value};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Query;
|
||||
|
||||
impl Query {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn usage() -> &'static str {
|
||||
"Usage: query"
|
||||
}
|
||||
|
||||
pub fn query(
|
||||
&self,
|
||||
_name: &str,
|
||||
call: &EvaluatedCall,
|
||||
_value: &Value,
|
||||
_path: Option<Spanned<String>>,
|
||||
) -> Result<Value, LabeledError> {
|
||||
let help = get_brief_subcommand_help(&Query.signature());
|
||||
Ok(Value::string(help, call.head))
|
||||
}
|
||||
|
||||
pub fn query_json(
|
||||
&self,
|
||||
name: &str,
|
||||
call: &EvaluatedCall,
|
||||
input: &Value,
|
||||
query: Option<Spanned<String>>,
|
||||
) -> Result<Value, LabeledError> {
|
||||
execute_json_query(name, call, input, query)
|
||||
}
|
||||
pub fn query_web(
|
||||
&self,
|
||||
_name: &str,
|
||||
call: &EvaluatedCall,
|
||||
input: &Value,
|
||||
_rest: Option<Spanned<String>>,
|
||||
) -> Result<Value, LabeledError> {
|
||||
parse_selector_params(call, input)
|
||||
}
|
||||
pub fn query_xml(
|
||||
&self,
|
||||
name: &str,
|
||||
call: &EvaluatedCall,
|
||||
input: &Value,
|
||||
query: Option<Spanned<String>>,
|
||||
) -> Result<Value, LabeledError> {
|
||||
execute_xpath_query(name, call, input, query)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_brief_subcommand_help(sigs: &[Signature]) -> String {
|
||||
let mut help = String::new();
|
||||
help.push_str(&format!("{}\n\n", sigs[0].usage));
|
||||
help.push_str(&format!("Usage:\n > {}\n\n", sigs[0].name));
|
||||
help.push_str("Subcommands:\n");
|
||||
|
||||
for x in sigs.iter().enumerate() {
|
||||
if x.0 == 0 {
|
||||
continue;
|
||||
}
|
||||
help.push_str(&format!(" {} - {}\n", x.1.name, x.1.usage));
|
||||
}
|
||||
|
||||
help.push_str(&get_flags_section(&sigs[0]));
|
||||
help
|
||||
}
|
151
crates/nu_plugin_query/src/query_json.rs
Normal file
151
crates/nu_plugin_query/src/query_json.rs
Normal file
@ -0,0 +1,151 @@
|
||||
use gjson::Value as gjValue;
|
||||
use nu_plugin::{EvaluatedCall, LabeledError};
|
||||
use nu_protocol::{Span, Spanned, Value};
|
||||
|
||||
pub fn execute_json_query(
|
||||
_name: &str,
|
||||
call: &EvaluatedCall,
|
||||
input: &Value,
|
||||
query: Option<Spanned<String>>,
|
||||
) -> Result<Value, LabeledError> {
|
||||
let input_string = match &input.as_string() {
|
||||
Ok(s) => s.clone(),
|
||||
Err(e) => {
|
||||
return Err(LabeledError {
|
||||
span: Some(call.head),
|
||||
msg: e.to_string(),
|
||||
label: "problem with input data".to_string(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
let query_string = match &query {
|
||||
Some(v) => &v.item,
|
||||
None => {
|
||||
return Err(LabeledError {
|
||||
msg: "problem with input data".to_string(),
|
||||
label: "problem with input data".to_string(),
|
||||
span: Some(call.head),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
// Validate the json before trying to query it
|
||||
let is_valid_json = gjson::valid(&input_string);
|
||||
|
||||
if !is_valid_json {
|
||||
return Err(LabeledError {
|
||||
msg: "invalid json".to_string(),
|
||||
label: "invalid json".to_string(),
|
||||
span: Some(call.head),
|
||||
});
|
||||
}
|
||||
|
||||
let val: gjValue = gjson::get(&input_string, query_string);
|
||||
|
||||
if query_contains_modifiers(query_string) {
|
||||
let json_str = val.json();
|
||||
Ok(Value::string(json_str, Span::test_data()))
|
||||
} else {
|
||||
Ok(convert_gjson_value_to_nu_value(&val, &call.head))
|
||||
}
|
||||
}
|
||||
|
||||
fn query_contains_modifiers(query: &str) -> bool {
|
||||
// https://github.com/tidwall/gjson.rs documents 7 modifiers as of 4/19/21
|
||||
// Some of these modifiers mean we really need to output the data as a string
|
||||
// instead of tabular data. Others don't matter.
|
||||
|
||||
// Output as String
|
||||
// @ugly: Remove all whitespace from a json document.
|
||||
// @pretty: Make the json document more human readable.
|
||||
query.contains("@ugly") || query.contains("@pretty")
|
||||
|
||||
// Output as Tablular
|
||||
// Since it's output as tabular, which is our default, we can just ignore these
|
||||
// @reverse: Reverse an array or the members of an object.
|
||||
// @this: Returns the current element. It can be used to retrieve the root element.
|
||||
// @valid: Ensure the json document is valid.
|
||||
// @flatten: Flattens an array.
|
||||
// @join: Joins multiple objects into a single object.
|
||||
}
|
||||
|
||||
fn convert_gjson_value_to_nu_value(v: &gjValue, span: &Span) -> Value {
|
||||
match v.kind() {
|
||||
gjson::Kind::Array => {
|
||||
let mut vals = vec![];
|
||||
v.each(|_k, v| {
|
||||
vals.push(convert_gjson_value_to_nu_value(&v, span));
|
||||
true
|
||||
});
|
||||
|
||||
Value::List { vals, span: *span }
|
||||
}
|
||||
gjson::Kind::Null => Value::nothing(*span),
|
||||
gjson::Kind::False => Value::boolean(false, *span),
|
||||
gjson::Kind::Number => {
|
||||
let str_value = v.str();
|
||||
if str_value.contains('.') {
|
||||
Value::float(v.f64(), *span)
|
||||
} else {
|
||||
Value::int(v.i64(), *span)
|
||||
}
|
||||
}
|
||||
gjson::Kind::String => Value::string(v.str(), *span),
|
||||
gjson::Kind::True => Value::boolean(true, *span),
|
||||
gjson::Kind::Object => {
|
||||
let mut cols = vec![];
|
||||
let mut vals = vec![];
|
||||
v.each(|k, v| {
|
||||
cols.push(k.to_string());
|
||||
vals.push(convert_gjson_value_to_nu_value(&v, span));
|
||||
true
|
||||
});
|
||||
Value::Record {
|
||||
cols,
|
||||
vals,
|
||||
span: *span,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use gjson::{valid, Value as gjValue};
|
||||
|
||||
#[test]
|
||||
fn validate_string() {
|
||||
let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#;
|
||||
let val = valid(json);
|
||||
assert!(val);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn answer_from_get_age() {
|
||||
let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#;
|
||||
let val: gjValue = gjson::get(json, "age");
|
||||
assert_eq!(val.str(), "37");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn answer_from_get_children() {
|
||||
let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#;
|
||||
let val: gjValue = gjson::get(json, "children");
|
||||
assert_eq!(val.str(), r#"["Sara", "Alex", "Jack"]"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn answer_from_get_children_count() {
|
||||
let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#;
|
||||
let val: gjValue = gjson::get(json, "children.#");
|
||||
assert_eq!(val.str(), "3");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn answer_from_get_friends_first_name() {
|
||||
let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#;
|
||||
let val: gjValue = gjson::get(json, "friends.#.first");
|
||||
assert_eq!(val.str(), r#"["James","Roger"]"#);
|
||||
}
|
||||
}
|
303
crates/nu_plugin_query/src/query_web.rs
Normal file
303
crates/nu_plugin_query/src/query_web.rs
Normal file
@ -0,0 +1,303 @@
|
||||
use crate::web_tables::WebTable;
|
||||
use nu_plugin::{EvaluatedCall, LabeledError};
|
||||
use nu_protocol::{Span, Value};
|
||||
use scraper::{Html, Selector as ScraperSelector};
|
||||
|
||||
pub struct Selector {
|
||||
pub query: String,
|
||||
pub as_html: bool,
|
||||
pub attribute: String,
|
||||
pub as_table: Value,
|
||||
pub inspect: bool,
|
||||
}
|
||||
|
||||
impl Selector {
|
||||
pub fn new() -> Selector {
|
||||
Selector {
|
||||
query: String::new(),
|
||||
as_html: false,
|
||||
attribute: String::new(),
|
||||
as_table: Value::string("".to_string(), Span::test_data()),
|
||||
inspect: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Selector {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_selector_params(call: &EvaluatedCall, input: &Value) -> Result<Value, LabeledError> {
|
||||
let head = call.head;
|
||||
let query: String = match call.get_flag("query")? {
|
||||
Some(q2) => q2,
|
||||
None => "".to_string(),
|
||||
};
|
||||
let as_html = call.has_flag("as_html");
|
||||
let attribute: String = match call.get_flag("attribute")? {
|
||||
Some(a) => a,
|
||||
None => "".to_string(),
|
||||
};
|
||||
let as_table: Value = match call.get_flag("as_table")? {
|
||||
Some(v) => v,
|
||||
None => Value::nothing(head),
|
||||
};
|
||||
|
||||
let inspect = call.has_flag("inspect");
|
||||
|
||||
if !&query.is_empty() && ScraperSelector::parse(&query).is_err() {
|
||||
return Err(LabeledError {
|
||||
msg: "Cannot parse this query as a valid css selector".to_string(),
|
||||
label: "Parse error".to_string(),
|
||||
span: Some(head),
|
||||
});
|
||||
}
|
||||
|
||||
let selector = Selector {
|
||||
query,
|
||||
as_html,
|
||||
attribute,
|
||||
as_table,
|
||||
inspect,
|
||||
};
|
||||
|
||||
match input {
|
||||
Value::String { val, span } => Ok(begin_selector_query(val.to_string(), selector, *span)),
|
||||
_ => Err(LabeledError {
|
||||
label: "requires text input".to_string(),
|
||||
msg: "Expected text from pipeline".to_string(),
|
||||
span: Some(input.span()?),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn begin_selector_query(input_html: String, selector: Selector, span: Span) -> Value {
|
||||
if let Value::List { .. } = selector.as_table {
|
||||
return retrieve_tables(
|
||||
input_html.as_str(),
|
||||
&selector.as_table,
|
||||
selector.inspect,
|
||||
span,
|
||||
);
|
||||
} else {
|
||||
match selector.attribute.is_empty() {
|
||||
true => execute_selector_query(
|
||||
input_html.as_str(),
|
||||
selector.query.as_str(),
|
||||
selector.as_html,
|
||||
span,
|
||||
),
|
||||
false => execute_selector_query_with_attribute(
|
||||
input_html.as_str(),
|
||||
selector.query.as_str(),
|
||||
selector.attribute.as_str(),
|
||||
span,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn retrieve_tables(
|
||||
input_string: &str,
|
||||
columns: &Value,
|
||||
inspect_mode: bool,
|
||||
span: Span,
|
||||
) -> Value {
|
||||
let html = input_string;
|
||||
let mut cols: Vec<String> = Vec::new();
|
||||
if let Value::List { vals, .. } = &columns {
|
||||
for x in vals {
|
||||
// TODO Find a way to get the Config object here
|
||||
if let Value::String { val, .. } = x {
|
||||
cols.push(val.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if inspect_mode {
|
||||
eprintln!("Passed in Column Headers = {:#?}", &cols,);
|
||||
}
|
||||
|
||||
let tables = match WebTable::find_by_headers(html, &cols) {
|
||||
Some(t) => {
|
||||
if inspect_mode {
|
||||
eprintln!("Table Found = {:#?}", &t);
|
||||
}
|
||||
t
|
||||
}
|
||||
None => vec![WebTable::empty()],
|
||||
};
|
||||
|
||||
if tables.len() == 1 {
|
||||
return retrieve_table(
|
||||
tables
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("This should never trigger"),
|
||||
columns,
|
||||
span,
|
||||
);
|
||||
}
|
||||
|
||||
let vals = tables
|
||||
.into_iter()
|
||||
.map(move |table| retrieve_table(table, columns, span))
|
||||
.collect();
|
||||
|
||||
Value::List { vals, span }
|
||||
}
|
||||
|
||||
fn retrieve_table(mut table: WebTable, columns: &Value, span: Span) -> Value {
|
||||
let mut cols: Vec<String> = Vec::new();
|
||||
if let Value::List { vals, .. } = &columns {
|
||||
for x in vals {
|
||||
// TODO Find a way to get the Config object here
|
||||
if let Value::String { val, .. } = x {
|
||||
cols.push(val.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if cols.is_empty() && !table.headers().is_empty() {
|
||||
for col in table.headers().keys() {
|
||||
cols.push(col.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let mut table_out = Vec::new();
|
||||
// sometimes there are tables where the first column is the headers, kind of like
|
||||
// a table has ben rotated ccw 90 degrees, in these cases all columns will be missing
|
||||
// we keep track of this with this variable so we can deal with it later
|
||||
let mut at_least_one_row_filled = false;
|
||||
// if columns are still empty, let's just make a single column table with the data
|
||||
if cols.is_empty() {
|
||||
at_least_one_row_filled = true;
|
||||
let table_with_no_empties: Vec<_> = table.iter().filter(|item| !item.is_empty()).collect();
|
||||
|
||||
let mut cols = vec![];
|
||||
let mut vals = vec![];
|
||||
for row in &table_with_no_empties {
|
||||
for (counter, cell) in row.iter().enumerate() {
|
||||
cols.push(format!("Column{}", counter));
|
||||
vals.push(Value::string(cell.to_string(), span))
|
||||
}
|
||||
}
|
||||
table_out.push(Value::Record { cols, vals, span })
|
||||
} else {
|
||||
for row in &table {
|
||||
let mut vals = vec![];
|
||||
let record_cols = &cols;
|
||||
for col in &cols {
|
||||
let val = row
|
||||
.get(col)
|
||||
.unwrap_or(&format!("Missing column: '{}'", &col))
|
||||
.to_string();
|
||||
|
||||
if !at_least_one_row_filled && val != format!("Missing column: '{}'", &col) {
|
||||
at_least_one_row_filled = true;
|
||||
}
|
||||
vals.push(Value::string(val, span));
|
||||
}
|
||||
table_out.push(Value::Record {
|
||||
cols: record_cols.to_vec(),
|
||||
vals,
|
||||
span,
|
||||
})
|
||||
}
|
||||
}
|
||||
if !at_least_one_row_filled {
|
||||
let mut data2 = Vec::new();
|
||||
for x in &table.data {
|
||||
data2.push(x.join(", "));
|
||||
}
|
||||
table.data = vec![data2];
|
||||
return retrieve_table(table, columns, span);
|
||||
}
|
||||
// table_out
|
||||
|
||||
Value::List {
|
||||
vals: table_out,
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
fn execute_selector_query_with_attribute(
|
||||
input_string: &str,
|
||||
query_string: &str,
|
||||
attribute: &str,
|
||||
span: Span,
|
||||
) -> Value {
|
||||
let doc = Html::parse_fragment(input_string);
|
||||
|
||||
let vals: Vec<Value> = doc
|
||||
.select(&css(query_string))
|
||||
.map(|selection| {
|
||||
Value::string(
|
||||
selection.value().attr(attribute).unwrap_or("").to_string(),
|
||||
span,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
Value::List { vals, span }
|
||||
}
|
||||
|
||||
fn execute_selector_query(
|
||||
input_string: &str,
|
||||
query_string: &str,
|
||||
as_html: bool,
|
||||
span: Span,
|
||||
) -> Value {
|
||||
let doc = Html::parse_fragment(input_string);
|
||||
|
||||
let vals: Vec<Value> = match as_html {
|
||||
true => doc
|
||||
.select(&css(query_string))
|
||||
.map(|selection| Value::string(selection.html(), span))
|
||||
.collect(),
|
||||
false => doc
|
||||
.select(&css(query_string))
|
||||
.map(|selection| {
|
||||
Value::string(
|
||||
selection
|
||||
.text()
|
||||
.fold("".to_string(), |acc, x| format!("{}{}", acc, x)),
|
||||
span,
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
|
||||
Value::List { vals, span }
|
||||
}
|
||||
|
||||
pub fn css(selector: &str) -> ScraperSelector {
|
||||
ScraperSelector::parse(selector).expect("this should never trigger")
|
||||
}
|
||||
|
||||
// #[cfg(test)]
|
||||
// mod tests {
|
||||
// use super::*;
|
||||
|
||||
// const SIMPLE_LIST: &str = r#"
|
||||
// <ul>
|
||||
// <li>Coffee</li>
|
||||
// <li>Tea</li>
|
||||
// <li>Milk</li>
|
||||
// </ul>
|
||||
// "#;
|
||||
|
||||
// #[test]
|
||||
// fn test_first_child_is_not_empty() {
|
||||
// assert!(!execute_selector_query(SIMPLE_LIST, "li:first-child", false).is_empty())
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_first_child() {
|
||||
// assert_eq!(
|
||||
// vec!["Coffee".to_string()],
|
||||
// execute_selector_query(SIMPLE_LIST, "li:first-child", false)
|
||||
// )
|
||||
// }
|
||||
// }
|
188
crates/nu_plugin_query/src/query_xml.rs
Normal file
188
crates/nu_plugin_query/src/query_xml.rs
Normal file
@ -0,0 +1,188 @@
|
||||
use nu_plugin::{EvaluatedCall, LabeledError};
|
||||
use nu_protocol::{Span, Spanned, Value};
|
||||
use sxd_document::parser;
|
||||
use sxd_xpath::{Context, Factory};
|
||||
|
||||
pub fn execute_xpath_query(
|
||||
_name: &str,
|
||||
call: &EvaluatedCall,
|
||||
input: &Value,
|
||||
query: Option<Spanned<String>>,
|
||||
) -> Result<Value, LabeledError> {
|
||||
let (query_string, span) = match &query {
|
||||
Some(v) => (&v.item, &v.span),
|
||||
None => {
|
||||
return Err(LabeledError {
|
||||
msg: "problem with input data".to_string(),
|
||||
label: "problem with input data".to_string(),
|
||||
span: Some(call.head),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
let xpath = build_xpath(query_string, span)?;
|
||||
let input_string = input.as_string()?;
|
||||
let package = parser::parse(&input_string);
|
||||
|
||||
if package.is_err() {
|
||||
return Err(LabeledError {
|
||||
label: "invalid xml document".to_string(),
|
||||
msg: "invalid xml document".to_string(),
|
||||
span: Some(call.head),
|
||||
});
|
||||
}
|
||||
|
||||
let package = package.expect("invalid xml document");
|
||||
|
||||
let document = package.as_document();
|
||||
let context = Context::new();
|
||||
|
||||
// leaving this here for augmentation at some point
|
||||
// build_variables(&arguments, &mut context);
|
||||
// build_namespaces(&arguments, &mut context);
|
||||
let res = xpath.evaluate(&context, document.root());
|
||||
|
||||
// Some xpath statements can be long, so let's truncate it with ellipsis
|
||||
let mut key = query_string.clone();
|
||||
if query_string.len() >= 20 {
|
||||
key.truncate(17);
|
||||
key += "...";
|
||||
} else {
|
||||
key = query_string.to_string();
|
||||
};
|
||||
|
||||
match res {
|
||||
Ok(r) => {
|
||||
let mut cols: Vec<String> = vec![];
|
||||
let mut vals: Vec<Value> = vec![];
|
||||
let mut records: Vec<Value> = vec![];
|
||||
|
||||
match r {
|
||||
sxd_xpath::Value::Nodeset(ns) => {
|
||||
for n in ns.into_iter() {
|
||||
cols.push(key.to_string());
|
||||
vals.push(Value::string(n.string_value(), Span::test_data()));
|
||||
}
|
||||
}
|
||||
sxd_xpath::Value::Boolean(b) => {
|
||||
cols.push(key.to_string());
|
||||
vals.push(Value::boolean(b, Span::test_data()));
|
||||
}
|
||||
sxd_xpath::Value::Number(n) => {
|
||||
cols.push(key.to_string());
|
||||
vals.push(Value::float(n, Span::test_data()));
|
||||
}
|
||||
sxd_xpath::Value::String(s) => {
|
||||
cols.push(key.to_string());
|
||||
vals.push(Value::string(s, Span::test_data()));
|
||||
}
|
||||
};
|
||||
|
||||
// convert the cols and vecs to a table by creating individual records
|
||||
// for each item so we can then use a list to make a table
|
||||
for (k, v) in cols.iter().zip(vals.iter()) {
|
||||
records.push(Value::Record {
|
||||
cols: vec![k.to_string()],
|
||||
vals: vec![v.clone()],
|
||||
span: Span::test_data(),
|
||||
})
|
||||
}
|
||||
|
||||
Ok(Value::List {
|
||||
vals: records,
|
||||
span: Span::test_data(),
|
||||
})
|
||||
}
|
||||
Err(_) => Err(LabeledError {
|
||||
label: "xpath query error".to_string(),
|
||||
msg: "xpath query error".to_string(),
|
||||
span: Some(Span::test_data()),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_xpath(xpath_str: &str, span: &Span) -> Result<sxd_xpath::XPath, LabeledError> {
|
||||
let factory = Factory::new();
|
||||
|
||||
match factory.build(xpath_str) {
|
||||
Ok(xpath) => xpath.ok_or_else(|| LabeledError {
|
||||
label: "invalid xpath query".to_string(),
|
||||
msg: "invalid xpath query".to_string(),
|
||||
span: Some(*span),
|
||||
}),
|
||||
Err(_) => Err(LabeledError {
|
||||
label: "expected valid xpath query".to_string(),
|
||||
msg: "expected valid xpath query".to_string(),
|
||||
span: Some(*span),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::execute_xpath_query as query;
|
||||
use nu_plugin::EvaluatedCall;
|
||||
use nu_protocol::{Span, Spanned, Value};
|
||||
|
||||
#[test]
|
||||
fn position_function_in_predicate() {
|
||||
let call = EvaluatedCall {
|
||||
head: Span::test_data(),
|
||||
positional: vec![],
|
||||
named: vec![],
|
||||
};
|
||||
|
||||
let text = Value::string(
|
||||
r#"<?xml version="1.0" encoding="UTF-8"?><a><b/><b/></a>"#,
|
||||
Span::test_data(),
|
||||
);
|
||||
|
||||
let spanned_str: Spanned<String> = Spanned {
|
||||
item: "count(//a/*[position() = 2])".to_string(),
|
||||
span: Span::test_data(),
|
||||
};
|
||||
|
||||
let actual = query("", &call, &text, Some(spanned_str)).expect("test should not fail");
|
||||
let expected = Value::List {
|
||||
vals: vec![Value::Record {
|
||||
cols: vec!["count(//a/*[posit...".to_string()],
|
||||
vals: vec![Value::float(1.0, Span::test_data())],
|
||||
span: Span::test_data(),
|
||||
}],
|
||||
span: Span::test_data(),
|
||||
};
|
||||
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn functions_implicitly_coerce_argument_types() {
|
||||
let call = EvaluatedCall {
|
||||
head: Span::test_data(),
|
||||
positional: vec![],
|
||||
named: vec![],
|
||||
};
|
||||
|
||||
let text = Value::string(
|
||||
r#"<?xml version="1.0" encoding="UTF-8"?><a>true</a>"#,
|
||||
Span::test_data(),
|
||||
);
|
||||
|
||||
let spanned_str: Spanned<String> = Spanned {
|
||||
item: "count(//*[contains(., true)])".to_string(),
|
||||
span: Span::test_data(),
|
||||
};
|
||||
|
||||
let actual = query("", &call, &text, Some(spanned_str)).expect("test should not fail");
|
||||
let expected = Value::List {
|
||||
vals: vec![Value::Record {
|
||||
cols: vec!["count(//*[contain...".to_string()],
|
||||
vals: vec![Value::float(1.0, Span::test_data())],
|
||||
span: Span::test_data(),
|
||||
}],
|
||||
span: Span::test_data(),
|
||||
};
|
||||
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
}
|
1227
crates/nu_plugin_query/src/web_tables.rs
Normal file
1227
crates/nu_plugin_query/src/web_tables.rs
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user