From 778601b8217b12d1fdd5f8551d0db19a689311ef Mon Sep 17 00:00:00 2001 From: Bruce Weirdan Date: Tue, 19 Aug 2025 01:57:56 +0200 Subject: [PATCH] Extend nodeset output formats for `query xml` (#16465) Refs https://discord.com/channels/601130461678272522/615329862395101194/1406413134402551869 ## Release notes summary - What our users need to know `query xml` now can output additional information when it returns Nodesets: * `--output-type` enables `type` column. It includes the node type found, e.g. `element` or `comment` * `--output-names` adds `local_name`, `prefixed_name` and `namespace` columns * `--output-string-value` (re)adds `string_value` column If you're using any of the `--output-*` switches, and want `string_value` column to show up, pass `--output-string-value` explicitly. In the absence of any `--output-*` attributes, `--output-string-value` is assumed to be on. ## Tasks after submitting - [ ] Update the [documentation](https://github.com/nushell/nushell.github.io) --- crates/nu_plugin_query/src/query_xml.rs | 270 +++++++++++++++++++++++- 1 file changed, 263 insertions(+), 7 deletions(-) diff --git a/crates/nu_plugin_query/src/query_xml.rs b/crates/nu_plugin_query/src/query_xml.rs index 75332764f4..ffe0caa92c 100644 --- a/crates/nu_plugin_query/src/query_xml.rs +++ b/crates/nu_plugin_query/src/query_xml.rs @@ -1,7 +1,8 @@ use crate::Query; use nu_plugin::{EngineInterface, EvaluatedCall, SimplePluginCommand}; use nu_protocol::{ - Category, LabeledError, Record, Signature, Span, Spanned, SyntaxShape, Value, record, + Category, Example, LabeledError, Record, Signature, Span, Spanned, SyntaxShape, Type, Value, + record, }; use sxd_document::parser; use sxd_xpath::{Context, Factory}; @@ -16,7 +17,17 @@ impl SimplePluginCommand for QueryXml { } fn description(&self) -> &str { - "execute xpath query on xml" + "Execute XPath 1.0 query on XML input" + } + + fn extra_description(&self) -> &str { + r#"Scalar results (Number, String, Boolean) are returned as nu scalars. +Output of the nodeset results depends on the flags used: + - No flags: returns a table with `string_value` column. + - You have to specify `--output-string-value` to include `string_value` in the output when using any other `--output-*` flags. + - `--output-type` includes `type` column with node type. + - `--output-names` includes `local_name`, `prefixed_name`, and `namespace` columns. + "# } fn signature(&self) -> Signature { @@ -28,9 +39,62 @@ impl SimplePluginCommand for QueryXml { "map of prefixes to namespace URIs", Some('n'), ) + .switch( + "output-string-value", + "Include `string_value` in the nodeset output. On by default.", + None, + ) + .switch( + "output-type", + "Include `type` in the nodeset output. Off by default.", + None, + ) + .switch( + "output-names", + "Include `local_name`, `prefixed_name`, and `namespace` in the nodeset output. Off by default.", + None, + ) + .input_output_types(vec![ + (Type::String, Type::Any), + ]) .category(Category::Filters) } + fn examples(&self) -> Vec { + vec![ + // full output + Example { + description: "Query namespaces on the root element of an SVG file", + example: r#"http get --raw https://www.w3.org/TR/SVG/images/conform/smiley.svg + | query xml '/svg:svg/namespace::*' --output-string-value --output-names --namespaces {svg: "http://www.w3.org/2000/svg"}"#, + result: None, + }, + // scalar output + Example { + description: "Query number of stylesheets SVG file has", + example: r#"http get --raw https://www.w3.org/TR/SVG/images/conform/smiley.svg + | query xml 'count(//svg:style)' --namespaces {svg: "http://www.w3.org/2000/svg"}"#, + result: None, + }, + // query attributes + Example { + description: "Query all XLink targets in SVG document", + example: r#"http get --raw https://www.w3.org/TR/SVG/images/conform/smiley.svg + | query xml '//*/@xlink:href' --namespaces {xlink: "http://www.w3.org/1999/xlink"}"#, + result: None, + }, + // default output + Example { + description: "Get recent Debian news", + example: r#"http get --raw https://www.debian.org/News/news + | query xml '//item/title|//item/link' + | window 2 --stride 2 + | each { {title: $in.0.string_value, link: $in.1.string_value} }"#, + result: None, + }, + ] + } + fn run( &self, _plugin: &Query, @@ -60,6 +124,8 @@ pub fn execute_xpath_query( } }; + let node_output_options = NodeOutputOptions::from_call(call); + let xpath = build_xpath(query_string, span)?; let input_string = input.coerce_str()?; let package = parser::parse(&input_string); @@ -91,10 +157,7 @@ pub fn execute_xpath_query( Ok(sxd_xpath::Value::Nodeset(ns)) => { let mut records: Vec = vec![]; for n in ns.document_order() { - records.push(Value::record( - record! {"string_value" => Value::string(n.string_value(), call.head)}, - call.head, - )); + records.push(node_to_record(n, &node_output_options, call.head)); } Ok(Value::list(records, call.head)) } @@ -104,6 +167,64 @@ pub fn execute_xpath_query( } } +fn node_to_record( + n: sxd_xpath::nodeset::Node<'_>, + options: &NodeOutputOptions, + span: Span, +) -> Value { + use sxd_xpath::nodeset::Node; + + let mut record = record! {}; + + if options.string_value { + record.push("string_value", Value::string(n.string_value(), span)); + } + + if options.type_ { + record.push( + "type", + match n { + Node::Element(..) => Value::string("element", span), + Node::Attribute(..) => Value::string("attribute", span), + Node::Text(..) => Value::string("text", span), + Node::Comment(..) => Value::string("comment", span), + Node::ProcessingInstruction(..) => Value::string("processing_instruction", span), + Node::Root(..) => Value::string("root", span), + Node::Namespace(..) => Value::string("namespace", span), + }, + ); + } + + if options.names { + record.push( + "local_name", + match n.expanded_name() { + Some(name) => Value::string(name.local_part(), span), + None => Value::nothing(span), + }, + ); + record.push( + "namespace", + match n.expanded_name() { + Some(name) => match name.namespace_uri() { + Some(uri) => Value::string(uri, span), + None => Value::nothing(span), + }, + None => Value::nothing(span), + }, + ); + record.push( + "prefixed_name", + match n.prefixed_name() { + Some(name) => Value::string(name, span), + None => Value::nothing(span), + }, + ); + } + + Value::record(record, span) +} + fn build_xpath(xpath_str: &str, span: Span) -> Result { let factory = Factory::new(); @@ -115,11 +236,40 @@ fn build_xpath(xpath_str: &str, span: Span) -> Result Self { + match ( + call.has_flag("output-string-value") + .expect("output-string-value flag"), + call.has_flag("output-type").expect("output-type flag"), + call.has_flag("output-names").expect("output-names flag"), + ) { + // no flags - old behavior - single column + (false, false, false) => NodeOutputOptions { + string_value: true, + type_: false, + names: false, + }, + (string_value, type_, names) => NodeOutputOptions { + string_value, + type_, + names, + }, + } + } +} + #[cfg(test)] mod tests { use super::execute_xpath_query as query; use nu_plugin::EvaluatedCall; - use nu_protocol::{Span, Spanned, Value, record}; + use nu_protocol::{IntoSpanned, Span, Spanned, Value, record}; #[test] fn position_function_in_predicate() { @@ -291,4 +441,110 @@ mod tests { assert_eq!(actual, expected); } + + #[test] + fn have_to_specify_output_string_value_explicitly_with_other_output_flags() { + let call = EvaluatedCall { + head: Span::test_data(), + positional: vec![], + named: vec![( + "output-type".to_string().into_spanned(Span::test_data()), + Some(Value::test_bool(true)), + )], + }; + + let text = Value::test_string(r#"hello"#); + + let spanned_str: Spanned = Spanned { + item: "/elt".to_string(), + span: Span::test_data(), + }; + + let actual = query(&call, &text, Some(spanned_str), None).expect("test should not fail"); + + let expected = Value::test_list(vec![Value::test_record(record! { + "type" => Value::test_string("element"), + })]); + + assert_eq!(actual, expected); + } + + #[test] + fn output_string_value_adds_string_value_column() { + let call = EvaluatedCall { + head: Span::test_data(), + positional: vec![], + named: vec![ + ( + "output-string-value" + .to_string() + .into_spanned(Span::test_data()), + Some(Value::test_bool(true)), + ), + ( + "output-type".to_string().into_spanned(Span::test_data()), + Some(Value::test_bool(true)), + ), + ], + }; + + let text = Value::test_string(r#"hello"#); + let spanned_str: Spanned = Spanned { + item: "/elt".to_string(), + span: Span::test_data(), + }; + + let actual = query(&call, &text, Some(spanned_str), None).expect("test should not fail"); + + let expected = Value::test_list(vec![Value::test_record(record! { + "string_value" => Value::test_string("hello"), + "type" => Value::test_string("element"), + })]); + + assert_eq!(actual, expected); + } + + #[test] + fn output_names_adds_names_columns() { + let call = EvaluatedCall { + head: Span::test_data(), + positional: vec![], + named: vec![ + ( + "output-names".to_string().into_spanned(Span::test_data()), + Some(Value::test_bool(true)), + ), + ( + "output-string-value" + .to_string() + .into_spanned(Span::test_data()), + Some(Value::test_bool(true)), + ), + ], + }; + + let text = Value::test_string( + r#"hello"#, + ); + let spanned_str: Spanned = Spanned { + item: "/svg:elt".to_string(), + span: Span::test_data(), + }; + + let namespaces = record! { + "svg" => Value::test_string("http://www.w3.org/2000/svg"), + }; + + let actual = + query(&call, &text, Some(spanned_str), Some(namespaces)).expect("test should not fail"); + + let expected = Value::test_list(vec![Value::test_record(record! { + "string_value" => Value::test_string("hello"), + "local_name" => Value::test_string("elt"), + "namespace" => Value::test_string("http://www.w3.org/2000/svg"), + "prefixed_name" => Value::test_string("elt"), + })]); + + assert_eq!(actual, expected); + } }