xpath command for nushell (#2656)

* xpath prototype

* new xpath engine is finally working

* nearly there

* closer

* working with list, started to add test, code cleanup

* broken again

* working again - time for some cleanup

* cleaned up code, added error handling and test

* update example, fix clippy

* removed commented char
This commit is contained in:
Darren Schroeder 2020-10-12 08:03:00 -05:00 committed by GitHub
parent 5770b15270
commit 2573441e28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 225 additions and 0 deletions

29
Cargo.lock generated
View File

@ -2974,6 +2974,8 @@ dependencies = [
"sha2 0.9.1", "sha2 0.9.1",
"shellexpand", "shellexpand",
"strip-ansi-escapes", "strip-ansi-escapes",
"sxd-document",
"sxd-xpath",
"tempfile", "tempfile",
"term", "term",
"term_size", "term_size",
@ -3728,6 +3730,12 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]]
name = "peresil"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f658886ed52e196e850cfbbfddab9eaa7f6d90dd0929e264c31e5cec07e09e57"
[[package]] [[package]]
name = "pest" name = "pest"
version = "2.1.3" version = "2.1.3"
@ -5053,6 +5061,27 @@ dependencies = [
"web-sys", "web-sys",
] ]
[[package]]
name = "sxd-document"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94d82f37be9faf1b10a82c4bd492b74f698e40082f0f40de38ab275f31d42078"
dependencies = [
"peresil",
"typed-arena",
]
[[package]]
name = "sxd-xpath"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36e39da5d30887b5690e29de4c5ebb8ddff64ebd9933f98a01daaa4fd11b36ea"
dependencies = [
"peresil",
"quick-error",
"sxd-document",
]
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.39" version = "1.0.39"

View File

@ -79,6 +79,8 @@ serde_yaml = "0.8.13"
sha2 = "0.9.1" sha2 = "0.9.1"
shellexpand = "2.0.0" shellexpand = "2.0.0"
strip-ansi-escapes = "0.1.0" strip-ansi-escapes = "0.1.0"
sxd-xpath = "0.4.2"
sxd-document = "0.3.2"
tempfile = "3.1.0" tempfile = "3.1.0"
term = {version = "0.6.1", optional = true} term = {version = "0.6.1", optional = true}
term_size = "0.3.2" term_size = "0.3.2"

View File

@ -245,6 +245,7 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(FromURL), whole_stream_command(FromURL),
whole_stream_command(FromXLSX), whole_stream_command(FromXLSX),
whole_stream_command(FromXML), whole_stream_command(FromXML),
whole_stream_command(XPath),
whole_stream_command(FromYAML), whole_stream_command(FromYAML),
whole_stream_command(FromYML), whole_stream_command(FromYML),
whole_stream_command(FromIcs), whole_stream_command(FromIcs),

View File

@ -126,6 +126,7 @@ pub(crate) mod where_;
pub(crate) mod which_; pub(crate) mod which_;
pub(crate) mod with_env; pub(crate) mod with_env;
pub(crate) mod wrap; pub(crate) mod wrap;
pub(crate) mod xpath;
pub(crate) use autoview::Autoview; pub(crate) use autoview::Autoview;
pub(crate) use cd::Cd; pub(crate) use cd::Cd;
@ -270,6 +271,7 @@ pub(crate) use where_::Where;
pub(crate) use which_::Which; pub(crate) use which_::Which;
pub(crate) use with_env::WithEnv; pub(crate) use with_env::WithEnv;
pub(crate) use wrap::Wrap; pub(crate) use wrap::Wrap;
pub(crate) use xpath::XPath;
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {

View File

@ -0,0 +1,150 @@
extern crate sxd_document;
extern crate sxd_xpath;
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use bigdecimal::FromPrimitive;
use nu_errors::ShellError;
use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value};
use nu_source::Tagged;
use sxd_document::parser;
use sxd_xpath::{Context, Factory};
pub struct XPath;
#[derive(Deserialize)]
struct XPathArgs {
query: Tagged<String>,
}
#[async_trait]
impl WholeStreamCommand for XPath {
fn name(&self) -> &str {
"xpath"
}
fn signature(&self) -> Signature {
Signature::build("xpath").required("query", SyntaxShape::String, "xpath query")
}
fn usage(&self) -> &str {
"execute xpath query on xml"
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "find items with name attribute",
example: r#"echo '<?xml version="1.0" encoding="UTF-8"?><main><nushell rocks="true"/></main>' | from xml | to xml | xpath '//nushell/@rocks'"#,
result: None,
}]
}
async fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let (XPathArgs { query }, input) = args.process(&registry).await?;
let query_string = query.as_str();
let input_string = input.collect_string(tag.clone()).await?.item;
let result_string = execute_xpath_query(input_string, query_string.to_string());
match result_string {
Some(r) => Ok(
futures::stream::iter(r.into_iter().map(ReturnSuccess::value)).to_output_stream(),
),
None => Err(ShellError::labeled_error(
"xpath query error",
"xpath query error",
query.tag(),
)),
}
}
}
pub fn execute_xpath_query(input_string: String, query_string: String) -> Option<Vec<Value>> {
let xpath = build_xpath(&query_string);
let package = parser::parse(&input_string).expect("failed to parse xml");
let document = package.as_document();
let context = Context::new();
// leaving this here for augmentation at some point
// build_variables(&arguments, &mut context);
// build_namespaces(&arguments, &mut context);
let res = xpath.evaluate(&context, document.root());
// Some xpath statements can be long, so let's truncate it with ellipsis
let mut key = query_string.clone();
if query_string.len() >= 20 {
key.truncate(17);
key += "...";
} else {
key = query_string;
};
match res {
Ok(r) => {
let rows: Vec<Value> = match r {
sxd_xpath::Value::Nodeset(ns) => ns
.into_iter()
.map(|a| {
let mut row = TaggedDictBuilder::new(Tag::unknown());
row.insert_value(&key, UntaggedValue::string(a.string_value()));
row.into_value()
})
.collect::<Vec<Value>>(),
sxd_xpath::Value::Boolean(b) => {
let mut row = TaggedDictBuilder::new(Tag::unknown());
row.insert_value(&key, UntaggedValue::boolean(b));
vec![row.into_value()]
}
sxd_xpath::Value::Number(n) => {
let mut row = TaggedDictBuilder::new(Tag::unknown());
row.insert_value(
&key,
UntaggedValue::decimal(BigDecimal::from_f64(n).expect("error with f64"))
.into_untagged_value(),
);
vec![row.into_value()]
}
sxd_xpath::Value::String(s) => {
let mut row = TaggedDictBuilder::new(Tag::unknown());
row.insert_value(&key, UntaggedValue::string(s));
vec![row.into_value()]
}
};
if !rows.is_empty() {
Some(rows)
} else {
None
}
}
Err(_) => None,
}
}
fn build_xpath(xpath_str: &str) -> sxd_xpath::XPath {
let factory = Factory::new();
factory
.build(xpath_str)
.unwrap_or_else(|e| panic!("Unable to compile XPath {}: {}", xpath_str, e))
.expect("error with building the xpath factory")
}
#[cfg(test)]
mod tests {
use super::ShellError;
use super::XPath;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test as test_examples;
Ok(test_examples(XPath {})?)
}
}

View File

@ -0,0 +1,41 @@
use nu_test_support::fs::Stub::FileWithContentToBeTrimmed;
use nu_test_support::playground::Playground;
use nu_test_support::{nu, pipeline};
#[test]
fn position_function_in_predicate() {
let actual = nu!(
cwd: ".", pipeline(
r#"
echo "<?xml version="1.0" encoding="UTF-8"?><a><b/><b/></a>" | from xml | to xml | xpath "count(//a/*[position() = 2])"
"#
));
assert_eq!(actual.out, "1.0000");
}
#[test]
fn functions_implicitly_coerce_argument_types() {
let actual = nu!(
cwd: ".", pipeline(
r#"
echo "<?xml version="1.0" encoding="UTF-8"?><a>true</a>" | from xml | to xml | xpath "count(//*[contains(., true)])"
"#
));
assert_eq!(actual.out, "1.0000");
}
#[test]
fn find_guid_permilink_is_true() {
let actual = nu!(
cwd: "tests/fixtures/formats", pipeline(
r#"
open jonathan.xml
| to xml
| xpath '//guid/@isPermaLink'
"#
));
assert_eq!(actual.out, "true");
}