diff --git a/Cargo.lock b/Cargo.lock index 8530a5d15..b133732c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2974,6 +2974,8 @@ dependencies = [ "sha2 0.9.1", "shellexpand", "strip-ansi-escapes", + "sxd-document", + "sxd-xpath", "tempfile", "term", "term_size", @@ -3728,6 +3730,12 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" +[[package]] +name = "peresil" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f658886ed52e196e850cfbbfddab9eaa7f6d90dd0929e264c31e5cec07e09e57" + [[package]] name = "pest" version = "2.1.3" @@ -5053,6 +5061,27 @@ dependencies = [ "web-sys", ] +[[package]] +name = "sxd-document" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94d82f37be9faf1b10a82c4bd492b74f698e40082f0f40de38ab275f31d42078" +dependencies = [ + "peresil", + "typed-arena", +] + +[[package]] +name = "sxd-xpath" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36e39da5d30887b5690e29de4c5ebb8ddff64ebd9933f98a01daaa4fd11b36ea" +dependencies = [ + "peresil", + "quick-error", + "sxd-document", +] + [[package]] name = "syn" version = "1.0.39" diff --git a/crates/nu-cli/Cargo.toml b/crates/nu-cli/Cargo.toml index 9b55e51cf..ed15aacfc 100644 --- a/crates/nu-cli/Cargo.toml +++ b/crates/nu-cli/Cargo.toml @@ -79,6 +79,8 @@ serde_yaml = "0.8.13" sha2 = "0.9.1" shellexpand = "2.0.0" strip-ansi-escapes = "0.1.0" +sxd-xpath = "0.4.2" +sxd-document = "0.3.2" tempfile = "3.1.0" term = {version = "0.6.1", optional = true} term_size = "0.3.2" diff --git a/crates/nu-cli/src/cli.rs b/crates/nu-cli/src/cli.rs index f38205b8e..90a90a2dc 100644 --- a/crates/nu-cli/src/cli.rs +++ b/crates/nu-cli/src/cli.rs @@ -245,6 +245,7 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo whole_stream_command(FromURL), whole_stream_command(FromXLSX), whole_stream_command(FromXML), + whole_stream_command(XPath), whole_stream_command(FromYAML), whole_stream_command(FromYML), whole_stream_command(FromIcs), diff --git a/crates/nu-cli/src/commands.rs b/crates/nu-cli/src/commands.rs index 66e283a24..8ddd85812 100644 --- a/crates/nu-cli/src/commands.rs +++ b/crates/nu-cli/src/commands.rs @@ -126,6 +126,7 @@ pub(crate) mod where_; pub(crate) mod which_; pub(crate) mod with_env; pub(crate) mod wrap; +pub(crate) mod xpath; pub(crate) use autoview::Autoview; pub(crate) use cd::Cd; @@ -270,6 +271,7 @@ pub(crate) use where_::Where; pub(crate) use which_::Which; pub(crate) use with_env::WithEnv; pub(crate) use wrap::Wrap; +pub(crate) use xpath::XPath; #[cfg(test)] mod tests { diff --git a/crates/nu-cli/src/commands/xpath.rs b/crates/nu-cli/src/commands/xpath.rs new file mode 100644 index 000000000..11da64e5f --- /dev/null +++ b/crates/nu-cli/src/commands/xpath.rs @@ -0,0 +1,150 @@ +extern crate sxd_document; +extern crate sxd_xpath; +use crate::commands::WholeStreamCommand; +use crate::prelude::*; +use bigdecimal::FromPrimitive; +use nu_errors::ShellError; +use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value}; +use nu_source::Tagged; +use sxd_document::parser; +use sxd_xpath::{Context, Factory}; + +pub struct XPath; + +#[derive(Deserialize)] +struct XPathArgs { + query: Tagged<String>, +} + +#[async_trait] +impl WholeStreamCommand for XPath { + fn name(&self) -> &str { + "xpath" + } + + fn signature(&self) -> Signature { + Signature::build("xpath").required("query", SyntaxShape::String, "xpath query") + } + + fn usage(&self) -> &str { + "execute xpath query on xml" + } + + fn examples(&self) -> Vec<Example> { + vec![Example { + description: "find items with name attribute", + example: r#"echo '<?xml version="1.0" encoding="UTF-8"?><main><nushell rocks="true"/></main>' | from xml | to xml | xpath '//nushell/@rocks'"#, + result: None, + }] + } + + async fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result<OutputStream, ShellError> { + let tag = args.call_info.name_tag.clone(); + let (XPathArgs { query }, input) = args.process(®istry).await?; + + let query_string = query.as_str(); + let input_string = input.collect_string(tag.clone()).await?.item; + let result_string = execute_xpath_query(input_string, query_string.to_string()); + + match result_string { + Some(r) => Ok( + futures::stream::iter(r.into_iter().map(ReturnSuccess::value)).to_output_stream(), + ), + None => Err(ShellError::labeled_error( + "xpath query error", + "xpath query error", + query.tag(), + )), + } + } +} + +pub fn execute_xpath_query(input_string: String, query_string: String) -> Option<Vec<Value>> { + let xpath = build_xpath(&query_string); + let package = parser::parse(&input_string).expect("failed to parse xml"); + let document = package.as_document(); + let context = Context::new(); + + // leaving this here for augmentation at some point + // build_variables(&arguments, &mut context); + // build_namespaces(&arguments, &mut context); + + let res = xpath.evaluate(&context, document.root()); + + // Some xpath statements can be long, so let's truncate it with ellipsis + let mut key = query_string.clone(); + if query_string.len() >= 20 { + key.truncate(17); + key += "..."; + } else { + key = query_string; + }; + + match res { + Ok(r) => { + let rows: Vec<Value> = match r { + sxd_xpath::Value::Nodeset(ns) => ns + .into_iter() + .map(|a| { + let mut row = TaggedDictBuilder::new(Tag::unknown()); + row.insert_value(&key, UntaggedValue::string(a.string_value())); + row.into_value() + }) + .collect::<Vec<Value>>(), + sxd_xpath::Value::Boolean(b) => { + let mut row = TaggedDictBuilder::new(Tag::unknown()); + row.insert_value(&key, UntaggedValue::boolean(b)); + vec![row.into_value()] + } + sxd_xpath::Value::Number(n) => { + let mut row = TaggedDictBuilder::new(Tag::unknown()); + row.insert_value( + &key, + UntaggedValue::decimal(BigDecimal::from_f64(n).expect("error with f64")) + .into_untagged_value(), + ); + + vec![row.into_value()] + } + sxd_xpath::Value::String(s) => { + let mut row = TaggedDictBuilder::new(Tag::unknown()); + row.insert_value(&key, UntaggedValue::string(s)); + vec![row.into_value()] + } + }; + + if !rows.is_empty() { + Some(rows) + } else { + None + } + } + Err(_) => None, + } +} + +fn build_xpath(xpath_str: &str) -> sxd_xpath::XPath { + let factory = Factory::new(); + + factory + .build(xpath_str) + .unwrap_or_else(|e| panic!("Unable to compile XPath {}: {}", xpath_str, e)) + .expect("error with building the xpath factory") +} + +#[cfg(test)] +mod tests { + use super::ShellError; + use super::XPath; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test as test_examples; + + Ok(test_examples(XPath {})?) + } +} diff --git a/crates/nu-cli/tests/commands/xpath.rs b/crates/nu-cli/tests/commands/xpath.rs new file mode 100644 index 000000000..27ebd5476 --- /dev/null +++ b/crates/nu-cli/tests/commands/xpath.rs @@ -0,0 +1,41 @@ +use nu_test_support::fs::Stub::FileWithContentToBeTrimmed; +use nu_test_support::playground::Playground; +use nu_test_support::{nu, pipeline}; + +#[test] +fn position_function_in_predicate() { + let actual = nu!( + cwd: ".", pipeline( + r#" + echo "<?xml version="1.0" encoding="UTF-8"?><a><b/><b/></a>" | from xml | to xml | xpath "count(//a/*[position() = 2])" + "# + )); + + assert_eq!(actual.out, "1.0000"); +} + +#[test] +fn functions_implicitly_coerce_argument_types() { + let actual = nu!( + cwd: ".", pipeline( + r#" + echo "<?xml version="1.0" encoding="UTF-8"?><a>true</a>" | from xml | to xml | xpath "count(//*[contains(., true)])" + "# + )); + + assert_eq!(actual.out, "1.0000"); +} + +#[test] +fn find_guid_permilink_is_true() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + open jonathan.xml + | to xml + | xpath '//guid/@isPermaLink' + "# + )); + + assert_eq!(actual.out, "true"); +}