New xml format (#7947)

# Description

Changes old `from xml` `to xml` data formats. See #7682 for reasoning
behind this change.
Output is now a series of records with `tag`, `attributes` and `content`
fields.

Old:

![image](https://user-images.githubusercontent.com/17511668/224508728-92d37c1f-ebac-4d5c-924d-bebd60f5cf85.png)
New:

![image](https://user-images.githubusercontent.com/17511668/224508753-a2de338a-ff2a-41e0-bbc1-ccc07a1d00ce.png)


# User-Facing Changes

New output/input format, better error handling for `from xml` and `to
xml` commands.

# Tests + Formatting

Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect` to check that you're using the standard code
style
- `cargo test --workspace` to check that all tests pass

# After Submitting

If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
This commit is contained in:
Artemiy 2023-03-12 02:35:42 +03:00 committed by GitHub
parent 2e01bf9cba
commit a13946e3ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 663 additions and 301 deletions

View File

@ -1,3 +1,4 @@
use crate::formats::nu_xml_format::{COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME, COLUMN_TAG_NAME};
use indexmap::map::IndexMap;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
@ -5,6 +6,7 @@ use nu_protocol::{
Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned, Type,
Value,
};
use roxmltree::NodeType;
#[derive(Clone)]
pub struct FromXml;
@ -17,6 +19,12 @@ impl Command for FromXml {
fn signature(&self) -> Signature {
Signature::build("from xml")
.input_output_types(vec![(Type::String, Type::Record(vec![]))])
.switch("keep-comments", "add comment nodes to result", None)
.switch(
"keep-pi",
"add processing instruction nodes to result",
None,
)
.category(Category::Formats)
}
@ -24,6 +32,18 @@ impl Command for FromXml {
"Parse text as .xml and create record."
}
fn extra_usage(&self) -> &str {
r#"Every XML entry is represented via a record with tag, attribute and content fields.
To represent different types of entries different values are written to this fields:
1. Tag entry: {tag: <tag name> attrs: {<attr name>: "<string value>" ...} content: [<entries>]}
2. Comment entry: {tag: '!' attrs: null content: "<comment string>"}
3. Processing instruction (PI): {tag: '?<pi name>' attrs: null content: "<pi content string>"}
4. Text: {tag: null attrs: null content: "<text>"}.
Unlike to xml command all null values are always present and text is never represented via plain
string. This way content of every tag is always a table and is easier to parse"#
}
fn run(
&self,
_engine_state: &EngineState,
@ -32,7 +52,14 @@ impl Command for FromXml {
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let head = call.head;
from_xml(input, head)
let keep_comments = call.has_flag("keep-comments");
let keep_processing_instructions = call.has_flag("keep-pi");
let info = ParsingInfo {
span: head,
keep_comments,
keep_processing_instructions,
};
from_xml(input, &info)
}
fn examples(&self) -> Vec<Example> {
@ -42,51 +69,52 @@ impl Command for FromXml {
<remember>Event</remember>
</note>' | from xml"#,
description: "Converts xml formatted string to record",
result: Some(Value::Record {
cols: vec!["note".to_string()],
vals: vec![Value::Record {
cols: vec!["children".to_string(), "attributes".to_string()],
vals: vec![
Value::List {
vals: vec![Value::Record {
cols: vec!["remember".to_string()],
vals: vec![Value::Record {
cols: vec!["children".to_string(), "attributes".to_string()],
vals: vec![
Value::List {
vals: vec![Value::test_string("Event")],
span: Span::test_data(),
},
Value::Record {
cols: vec![],
vals: vec![],
span: Span::test_data(),
},
],
span: Span::test_data(),
}],
span: Span::test_data(),
}],
span: Span::test_data(),
},
Value::Record {
cols: vec![],
vals: vec![],
span: Span::test_data(),
},
],
span: Span::test_data(),
}],
span: Span::test_data(),
}),
result: Some(Value::test_record(
vec![COLUMN_TAG_NAME, COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME],
vec![
Value::test_string("note"),
Value::test_record(Vec::<&str>::new(), vec![]),
Value::list(
vec![Value::test_record(
vec![COLUMN_TAG_NAME, COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME],
vec![
Value::test_string("remember"),
Value::test_record(Vec::<&str>::new(), vec![]),
Value::list(
vec![Value::test_record(
vec![
COLUMN_TAG_NAME,
COLUMN_ATTRS_NAME,
COLUMN_CONTENT_NAME,
],
vec![
Value::test_nothing(),
Value::test_nothing(),
Value::test_string("Event"),
],
)],
Span::test_data(),
),
],
)],
Span::test_data(),
),
],
)),
}]
}
}
fn from_attributes_to_value(attributes: &[roxmltree::Attribute], span: Span) -> Value {
struct ParsingInfo {
span: Span,
keep_comments: bool,
keep_processing_instructions: bool,
}
fn from_attributes_to_value(attributes: &[roxmltree::Attribute], info: &ParsingInfo) -> Value {
let mut collected = IndexMap::new();
for a in attributes {
collected.insert(String::from(a.name()), Value::string(a.value(), span));
collected.insert(String::from(a.name()), Value::string(a.value(), info.span));
}
let (cols, vals) = collected
@ -97,97 +125,205 @@ fn from_attributes_to_value(attributes: &[roxmltree::Attribute], span: Span) ->
acc
});
Value::Record { cols, vals, span }
}
fn from_node_to_value(n: &roxmltree::Node, span: Span) -> Value {
if n.is_element() {
let name = n.tag_name().name().trim().to_string();
let mut children_values = vec![];
for c in n.children() {
children_values.push(from_node_to_value(&c, span));
}
let children_values: Vec<Value> = children_values
.into_iter()
.filter(|x| match x {
Value::String { val: f, .. } => {
!f.trim().is_empty() // non-whitespace characters?
}
_ => true,
})
.collect();
let mut collected = IndexMap::new();
let attribute_value: Value =
from_attributes_to_value(&n.attributes().collect::<Vec<_>>(), span);
let mut row = IndexMap::new();
row.insert(
String::from("children"),
Value::List {
vals: children_values,
span,
},
);
row.insert(String::from("attributes"), attribute_value);
collected.insert(name, Value::from(Spanned { item: row, span }));
Value::from(Spanned {
item: collected,
span,
})
} else if n.is_comment() {
Value::String {
val: "<comment>".to_string(),
span,
}
} else if n.is_pi() {
Value::String {
val: "<processing_instruction>".to_string(),
span,
}
} else if n.is_text() {
match n.text() {
Some(text) => Value::String {
val: text.to_string(),
span,
},
None => Value::String {
val: "<error>".to_string(),
span,
},
}
} else {
Value::String {
val: "<unknown>".to_string(),
span,
}
Value::Record {
cols,
vals,
span: info.span,
}
}
fn from_document_to_value(d: &roxmltree::Document, span: Span) -> Value {
from_node_to_value(&d.root_element(), span)
fn element_to_value(n: &roxmltree::Node, info: &ParsingInfo) -> Value {
let span = info.span;
let mut node = IndexMap::new();
let tag = n.tag_name().name().trim().to_string();
let tag = Value::string(tag, span);
let content: Vec<Value> = n
.children()
.into_iter()
.filter_map(|node| from_node_to_value(&node, info))
.collect();
let content = Value::list(content, span);
let attributes = from_attributes_to_value(&n.attributes().collect::<Vec<_>>(), info);
node.insert(String::from(COLUMN_TAG_NAME), tag);
node.insert(String::from(COLUMN_ATTRS_NAME), attributes);
node.insert(String::from(COLUMN_CONTENT_NAME), content);
Value::from(Spanned { item: node, span })
}
pub fn from_xml_string_to_value(s: String, span: Span) -> Result<Value, roxmltree::Error> {
fn text_to_value(n: &roxmltree::Node, info: &ParsingInfo) -> Option<Value> {
let span = info.span;
let text = n.text().expect("Non-text node supplied to text_to_value");
let text = text.trim();
if text.is_empty() {
None
} else {
let mut node = IndexMap::new();
let content = Value::string(String::from(text), span);
node.insert(String::from(COLUMN_TAG_NAME), Value::nothing(span));
node.insert(String::from(COLUMN_ATTRS_NAME), Value::nothing(span));
node.insert(String::from(COLUMN_CONTENT_NAME), content);
let result = Value::from(Spanned { item: node, span });
Some(result)
}
}
fn comment_to_value(n: &roxmltree::Node, info: &ParsingInfo) -> Option<Value> {
if info.keep_comments {
let span = info.span;
let text = n
.text()
.expect("Non-comment node supplied to comment_to_value");
let mut node = IndexMap::new();
let content = Value::string(String::from(text), span);
node.insert(String::from(COLUMN_TAG_NAME), Value::string("!", span));
node.insert(String::from(COLUMN_ATTRS_NAME), Value::nothing(span));
node.insert(String::from(COLUMN_CONTENT_NAME), content);
let result = Value::from(Spanned { item: node, span });
Some(result)
} else {
None
}
}
fn processing_instruction_to_value(n: &roxmltree::Node, info: &ParsingInfo) -> Option<Value> {
if info.keep_processing_instructions {
let span = info.span;
let pi = n.pi()?;
let mut node = IndexMap::new();
// Add '?' before target to differentiate tags from pi targets
let tag = format!("?{}", pi.target);
let tag = Value::string(tag, span);
let content = pi
.value
.map_or_else(|| Value::nothing(span), |x| Value::string(x, span));
node.insert(String::from(COLUMN_TAG_NAME), tag);
node.insert(String::from(COLUMN_ATTRS_NAME), Value::nothing(span));
node.insert(String::from(COLUMN_CONTENT_NAME), content);
let result = Value::from(Spanned { item: node, span });
Some(result)
} else {
None
}
}
fn from_node_to_value(n: &roxmltree::Node, info: &ParsingInfo) -> Option<Value> {
match n.node_type() {
NodeType::Element => Some(element_to_value(n, info)),
NodeType::Text => text_to_value(n, info),
NodeType::Comment => comment_to_value(n, info),
NodeType::PI => processing_instruction_to_value(n, info),
_ => None,
}
}
fn from_document_to_value(d: &roxmltree::Document, info: &ParsingInfo) -> Value {
element_to_value(&d.root_element(), info)
}
fn from_xml_string_to_value(s: String, info: &ParsingInfo) -> Result<Value, roxmltree::Error> {
let parsed = roxmltree::Document::parse(&s)?;
Ok(from_document_to_value(&parsed, span))
Ok(from_document_to_value(&parsed, info))
}
fn from_xml(input: PipelineData, head: Span) -> Result<PipelineData, ShellError> {
let (concat_string, span, metadata) = input.collect_string_strict(head)?;
fn from_xml(input: PipelineData, info: &ParsingInfo) -> Result<PipelineData, ShellError> {
let (concat_string, span, metadata) = input.collect_string_strict(info.span)?;
match from_xml_string_to_value(concat_string, head) {
match from_xml_string_to_value(concat_string, info) {
Ok(x) => Ok(x.into_pipeline_data_with_metadata(metadata)),
_ => Err(ShellError::UnsupportedInput(
"Could not parse string as XML".to_string(),
"value originates from here".into(),
head,
Err(err) => Err(process_xml_parse_error(err, span)),
}
}
fn process_xml_parse_error(err: roxmltree::Error, span: Span) -> ShellError {
match err {
roxmltree::Error::InvalidXmlPrefixUri(_) => make_cant_convert_error(
"The `xmlns:xml` attribute must have an <http://www.w3.org/XML/1998/namespace> URI.",
span,
)),
),
roxmltree::Error::UnexpectedXmlUri(_) => make_cant_convert_error(
"Only the xmlns:xml attribute can have the http://www.w3.org/XML/1998/namespace URI.",
span,
),
roxmltree::Error::UnexpectedXmlnsUri(_) => make_cant_convert_error(
"The http://www.w3.org/2000/xmlns/ URI must not be declared.",
span,
),
roxmltree::Error::InvalidElementNamePrefix(_) => {
make_cant_convert_error("xmlns can't be used as an element prefix.", span)
}
roxmltree::Error::DuplicatedNamespace(_, _) => {
make_cant_convert_error("A namespace was already defined on this element.", span)
}
roxmltree::Error::UnknownNamespace(prefix, _) => {
make_cant_convert_error(format!("Unknown prefix {}", prefix), span)
}
roxmltree::Error::UnexpectedCloseTag { .. } => {
make_cant_convert_error("Unexpected close tag", span)
}
roxmltree::Error::UnexpectedEntityCloseTag(_) => {
make_cant_convert_error("Entity value starts with a close tag.", span)
}
roxmltree::Error::UnknownEntityReference(_, _) => make_cant_convert_error(
"A reference to an entity that was not defined in the DTD.",
span,
),
roxmltree::Error::MalformedEntityReference(_) => {
make_cant_convert_error("A malformed entity reference.", span)
}
roxmltree::Error::EntityReferenceLoop(_) => {
make_cant_convert_error("A possible entity reference loop.", span)
}
roxmltree::Error::InvalidAttributeValue(_) => {
make_cant_convert_error("Attribute value cannot have a < character.", span)
}
roxmltree::Error::DuplicatedAttribute(_, _) => {
make_cant_convert_error("An element has a duplicated attributes.", span)
}
roxmltree::Error::NoRootNode => {
make_cant_convert_error("The XML document must have at least one element.", span)
}
roxmltree::Error::UnclosedRootNode => {
make_cant_convert_error("The root node was opened but never closed.", span)
}
roxmltree::Error::DtdDetected => make_cant_convert_error(
"An XML with DTD detected. DTDs are currently disabled due to security reasons.",
span,
),
roxmltree::Error::NodesLimitReached => {
make_cant_convert_error("Node limit was reached.", span)
}
roxmltree::Error::AttributesLimitReached => {
make_cant_convert_error("Attribute limit reached", span)
}
roxmltree::Error::NamespacesLimitReached => {
make_cant_convert_error("Namespace limit reached", span)
}
roxmltree::Error::ParserError(_) => make_cant_convert_error("Parser error", span),
}
}
fn make_cant_convert_error(help: impl Into<String>, span: Span) -> ShellError {
ShellError::CantConvert {
from_type: Type::String.to_string(),
to_type: "XML".to_string(),
span,
help: Some(help.into()),
}
}
@ -203,9 +339,12 @@ mod tests {
Value::test_string(input)
}
fn row(entries: IndexMap<String, Value>) -> Value {
fn attributes(entries: IndexMap<&str, &str>) -> Value {
Value::from(Spanned {
item: entries,
item: entries
.into_iter()
.map(|(k, v)| (k.into(), string(v)))
.collect::<IndexMap<String, Value>>(),
span: Span::test_data(),
})
}
@ -217,23 +356,46 @@ mod tests {
}
}
fn content_tag(
tag: impl Into<String>,
attrs: IndexMap<&str, &str>,
content: &[Value],
) -> Value {
Value::from(Spanned {
item: indexmap! {
COLUMN_TAG_NAME.into() => string(tag),
COLUMN_ATTRS_NAME.into() => attributes(attrs),
COLUMN_CONTENT_NAME.into() => table(content),
},
span: Span::test_data(),
})
}
fn content_string(value: impl Into<String>) -> Value {
Value::from(Spanned {
item: indexmap! {
COLUMN_TAG_NAME.into() => Value::nothing(Span::test_data()),
COLUMN_ATTRS_NAME.into() => Value::nothing(Span::test_data()),
COLUMN_CONTENT_NAME.into() => string(value),
},
span: Span::test_data(),
})
}
fn parse(xml: &str) -> Result<Value, roxmltree::Error> {
from_xml_string_to_value(xml.to_string(), Span::test_data())
let info = ParsingInfo {
span: Span::test_data(),
keep_comments: false,
keep_processing_instructions: false,
};
from_xml_string_to_value(xml.to_string(), &info)
}
#[test]
fn parses_empty_element() -> Result<(), roxmltree::Error> {
let source = "<nu></nu>";
assert_eq!(
parse(source)?,
row(indexmap! {
"nu".into() => row(indexmap! {
"children".into() => table(&[]),
"attributes".into() => row(indexmap! {})
})
})
);
assert_eq!(parse(source)?, content_tag("nu", indexmap! {}, &vec![]));
Ok(())
}
@ -244,12 +406,11 @@ mod tests {
assert_eq!(
parse(source)?,
row(indexmap! {
"nu".into() => row(indexmap! {
"children".into() => table(&[string("La era de los tres caballeros")]),
"attributes".into() => row(indexmap! {})
})
})
content_tag(
"nu",
indexmap! {},
&vec![content_string("La era de los tres caballeros")]
)
);
Ok(())
@ -266,31 +427,15 @@ mod tests {
assert_eq!(
parse(source)?,
row(indexmap! {
"nu".into() => row(indexmap! {
"children".into() => table(&[
row(indexmap! {
"dev".into() => row(indexmap! {
"children".into() => table(&[string("Andrés")]),
"attributes".into() => row(indexmap! {})
})
}),
row(indexmap! {
"dev".into() => row(indexmap! {
"children".into() => table(&[string("Jonathan")]),
"attributes".into() => row(indexmap! {})
})
}),
row(indexmap! {
"dev".into() => row(indexmap! {
"children".into() => table(&[string("Yehuda")]),
"attributes".into() => row(indexmap! {})
})
})
]),
"attributes".into() => row(indexmap! {})
})
})
content_tag(
"nu",
indexmap! {},
&vec![
content_tag("dev", indexmap! {}, &vec![content_string("Andrés")]),
content_tag("dev", indexmap! {}, &vec![content_string("Jonathan")]),
content_tag("dev", indexmap! {}, &vec![content_string("Yehuda")])
]
)
);
Ok(())
@ -304,14 +449,7 @@ mod tests {
assert_eq!(
parse(source)?,
row(indexmap! {
"nu".into() => row(indexmap! {
"children".into() => table(&[]),
"attributes".into() => row(indexmap! {
"version".into() => string("2.0")
})
})
})
content_tag("nu", indexmap! {"version" => "2.0"}, &vec![])
);
Ok(())
@ -326,21 +464,15 @@ mod tests {
assert_eq!(
parse(source)?,
row(indexmap! {
"nu".into() => row(indexmap! {
"children".into() => table(&[
row(indexmap! {
"version".into() => row(indexmap! {
"children".into() => table(&[string("2.0")]),
"attributes".into() => row(indexmap! {})
})
})
]),
"attributes".into() => row(indexmap! {
"version".into() => string("2.0")
})
})
})
content_tag(
"nu",
indexmap! {"version" => "2.0"},
&vec![content_tag(
"version",
indexmap! {},
&vec![content_string("2.0")]
)]
)
);
Ok(())
@ -354,15 +486,7 @@ mod tests {
assert_eq!(
parse(source)?,
row(indexmap! {
"nu".into() => row(indexmap! {
"children".into() => table(&[]),
"attributes".into() => row(indexmap! {
"version".into() => string("2.0"),
"age".into() => string("25")
})
})
})
content_tag("nu", indexmap! {"version" => "2.0", "age" => "25"}, &vec![])
);
Ok(())

View File

@ -1,4 +1,5 @@
mod from;
mod nu_xml_format;
mod to;
pub use from::*;

View File

@ -0,0 +1,3 @@
pub const COLUMN_TAG_NAME: &str = "tag";
pub const COLUMN_ATTRS_NAME: &str = "attributes";
pub const COLUMN_CONTENT_NAME: &str = "content";

View File

@ -1,13 +1,13 @@
use crate::formats::nu_xml_format::{COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME, COLUMN_TAG_NAME};
use indexmap::IndexMap;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Config, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span,
Spanned, SyntaxShape, Type, Value,
Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value,
};
use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
use std::collections::HashSet;
use std::io::Cursor;
use std::io::Write;
@ -31,18 +31,36 @@ impl Command for ToXml {
.category(Category::Formats)
}
fn extra_usage(&self) -> &str {
r#"Every XML entry is represented via a record with tag, attribute and content fields.
To represent different types of entries different values must be written to this fields:
1. Tag entry: {tag: <tag name> attrs: {<attr name>: "<string value>" ...} content: [<entries>]}
2. Comment entry: {tag: '!' attrs: null content: "<comment string>"}
3. Processing instruction (PI): {tag: '?<pi name>' attrs: null content: "<pi content string>"}
4. Text: {tag: null attrs: null content: "<text>"}. Or as plain "<text>" instead of record.
Additionally any field which is: empty record, empty list or null, can be omitted."#
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Outputs an XML string representing the contents of this table",
example: r#"{ "note": { "children": [{ "remember": {"attributes" : {}, "children": [Event]}}], "attributes": {} } } | to xml"#,
example: r#"{tag: note attributes: {} content : [{tag: remember attributes: {} content : [{tag: null attrs: null content : Event}]}]} | to xml"#,
result: Some(Value::test_string(
"<note><remember>Event</remember></note>",
)),
},
Example {
description: "When formatting xml null and empty record fields can be omitted and strings can be written without a wrapping record",
example: r#"{tag: note content : [{tag: remember content : [Event]}]} | to xml"#,
result: Some(Value::test_string(
"<note><remember>Event</remember></note>",
)),
},
Example {
description: "Optionally, formats the text with a custom indentation setting",
example: r#"{ "note": { "children": [{ "remember": {"attributes" : {}, "children": [Event]}}], "attributes": {} } } | to xml -p 3"#,
example: r#"{tag: note content : [{tag: remember content : [Event]}]} | to xml -p 3"#,
result: Some(Value::test_string(
"<note>\n <remember>Event</remember>\n</note>",
)),
@ -51,7 +69,7 @@ impl Command for ToXml {
}
fn usage(&self) -> &str {
"Convert table into .xml text."
"Convert special record structure into .xml text."
}
fn run(
@ -62,110 +80,316 @@ impl Command for ToXml {
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let head = call.head;
let config = engine_state.get_config();
let pretty: Option<Spanned<i64>> = call.get_flag(engine_state, stack, "pretty")?;
let input = input.try_expand_range()?;
to_xml(input, head, pretty, config)
to_xml(input, head, pretty)
}
}
pub fn add_attributes<'a>(
element: &mut quick_xml::events::BytesStart<'a>,
attributes: &'a IndexMap<String, String>,
) {
pub fn add_attributes<'a>(element: &mut BytesStart<'a>, attributes: &'a IndexMap<String, String>) {
for (k, v) in attributes {
element.push_attribute((k.as_str(), v.as_str()));
}
}
pub fn get_attributes(row: &Value, config: &Config) -> Option<IndexMap<String, String>> {
if let Value::Record { .. } = row {
if let Some(Value::Record { cols, vals, .. }) = row.get_data_by_key("attributes") {
let mut h = IndexMap::new();
for (k, v) in cols.iter().zip(vals.iter()) {
h.insert(k.clone(), v.clone().into_abbreviated_string(config));
}
return Some(h);
}
}
None
}
pub fn get_children(row: &Value) -> Option<Vec<Value>> {
if let Value::Record { .. } = row {
if let Some(Value::List { vals, .. }) = row.get_data_by_key("children") {
return Some(vals);
}
}
None
}
pub fn is_xml_row(row: &Value) -> bool {
if let Value::Record { cols, .. } = &row {
let keys: HashSet<&String> = cols.iter().collect();
let children: String = "children".to_string();
let attributes: String = "attributes".to_string();
return keys.contains(&children) && keys.contains(&attributes) && keys.len() == 2;
}
false
}
pub fn write_xml_events<W: Write>(
current: Value,
fn to_xml_entry<W: Write>(
entry: Value,
top_level: bool,
writer: &mut quick_xml::Writer<W>,
config: &Config,
) -> Result<(), ShellError> {
match current {
Value::Record { cols, vals, span } => {
for (k, v) in cols.iter().zip(vals.iter()) {
let mut e = BytesStart::new(k);
if !is_xml_row(v) {
return Err(ShellError::GenericError(
"Expected a row with 'children' and 'attributes' columns".to_string(),
"missing 'children' and 'attributes' columns ".to_string(),
Some(span),
None,
Vec::new(),
));
}
let a = get_attributes(v, config);
if let Some(ref a) = a {
add_attributes(&mut e, a);
}
writer
.write_event(Event::Start(e))
.expect("Couldn't open XML node");
let c = get_children(v);
if let Some(c) = c {
for v in c {
write_xml_events(v, writer, config)?;
}
}
writer
.write_event(Event::End(BytesEnd::new(k)))
.expect("Couldn't close XML node");
let entry_span = entry.span()?;
// Allow using strings directly as content.
// So user can write
// {tag: a content: ['qwe']}
// instead of longer
// {tag: a content: [{content: 'qwe'}]}
if let (Value::String { val, span }, false) = (&entry, top_level) {
return to_xml_text(val.as_str(), *span, writer);
}
if !matches!(entry, Value::Record { .. }) {
return Err(ShellError::CantConvert {
to_type: "XML".into(),
from_type: entry.get_type().to_string(),
span: entry_span,
help: Some("Xml entry expected to be a record".into()),
});
};
// If key is not found it is assumed to be nothing. This way
// user can write a tag like {tag: a content: [...]} instead
// of longer {tag: a attributes: {} content: [...]}
let tag = entry
.get_data_by_key(COLUMN_TAG_NAME)
.unwrap_or_else(|| Value::nothing(Span::unknown()));
let attrs = entry
.get_data_by_key(COLUMN_ATTRS_NAME)
.unwrap_or_else(|| Value::nothing(Span::unknown()));
let content = entry
.get_data_by_key(COLUMN_CONTENT_NAME)
.unwrap_or_else(|| Value::nothing(Span::unknown()));
match (tag, attrs, content) {
(Value::Nothing { .. }, Value::Nothing { .. }, Value::String { val, span }) => {
// Strings can not appear on top level of document
if top_level {
return Err(ShellError::CantConvert {
to_type: "XML".into(),
from_type: entry.get_type().to_string(),
span: entry_span,
help: Some("Strings can not be a root element of document".into()),
});
}
to_xml_text(val.as_str(), span, writer)
}
Value::List { vals, .. } => {
for v in vals {
write_xml_events(v, writer, config)?;
(
Value::String {
val: tag_name,
span: tag_span,
},
attrs,
children,
) => to_tag_like(
entry_span, tag_name, tag_span, attrs, children, top_level, writer,
),
_ => Ok(()),
}
}
/// Convert record to tag-like entry: tag, PI, comment.
fn to_tag_like<W: Write>(
entry_span: Span,
tag: String,
tag_span: Span,
attrs: Value,
content: Value,
top_level: bool,
writer: &mut quick_xml::Writer<W>,
) -> Result<(), ShellError> {
if tag == "!" {
// Comments can not appear on top level of document
if top_level {
return Err(ShellError::CantConvert {
to_type: "XML".into(),
from_type: "record".into(),
span: entry_span,
help: Some("Comments can not be a root element of document".into()),
});
}
to_comment(entry_span, attrs, content, writer)
} else if let Some(tag) = tag.strip_prefix('?') {
// PIs can not appear on top level of document
if top_level {
return Err(ShellError::CantConvert {
to_type: "XML".into(),
from_type: Type::Record(vec![]).to_string(),
span: entry_span,
help: Some("PIs can not be a root element of document".into()),
});
}
let content: String = match content {
Value::String { val, .. } => val,
Value::Nothing { .. } => "".into(),
_ => {
return Err(ShellError::CantConvert {
to_type: "XML".into(),
from_type: Type::Record(vec![]).to_string(),
span: content.span()?,
help: Some("PI content expected to be a string".into()),
});
}
}
_ => {
let s = current.into_abbreviated_string(config);
};
to_processing_instruction(entry_span, tag, attrs, content, writer)
} else {
// Allow tag to have no attributes or content for short hand input
// alternatives like {tag: a attributes: {} content: []}, {tag: a attribbutes: null
// content: null}, {tag: a}. See to_xml_entry for more
let (attr_cols, attr_values) = match attrs {
Value::Record { cols, vals, .. } => (cols, vals),
Value::Nothing { .. } => (Vec::new(), Vec::new()),
_ => {
return Err(ShellError::CantConvert {
to_type: "XML".into(),
from_type: attrs.get_type().to_string(),
span: attrs.span()?,
help: Some("Tag attributes expected to be a record".into()),
});
}
};
let content = match content {
Value::List { vals, .. } => vals,
Value::Nothing { .. } => Vec::new(),
_ => {
return Err(ShellError::CantConvert {
to_type: "XML".into(),
from_type: content.get_type().to_string(),
span: content.span()?,
help: Some("Tag content expected to be a list".into()),
});
}
};
to_tag(
entry_span,
tag,
tag_span,
attr_cols,
attr_values,
content,
writer,
)
}
}
fn to_comment<W: Write>(
entry_span: Span,
attrs: Value,
content: Value,
writer: &mut quick_xml::Writer<W>,
) -> Result<(), ShellError> {
match (attrs, content) {
(Value::Nothing { .. }, Value::String { val, .. }) => {
let comment_content = BytesText::new(val.as_str());
writer
.write_event(Event::Text(BytesText::from_escaped(s.as_str())))
.expect("Couldn't write XML text");
.write_event(Event::Comment(comment_content))
.map_err(|_| ShellError::CantConvert {
to_type: "XML".to_string(),
from_type: Type::Record(vec![]).to_string(),
span: entry_span,
help: Some("Failure writing comment to xml".into()),
})
}
(_, content) => Err(ShellError::CantConvert {
to_type: "XML".into(),
from_type: content.get_type().to_string(),
span: entry_span,
help: Some("Comment expected to have string content and no attributes".into()),
}),
}
}
fn to_processing_instruction<W: Write>(
entry_span: Span,
tag: &str,
attrs: Value,
content: String,
writer: &mut quick_xml::Writer<W>,
) -> Result<(), ShellError> {
if !matches!(attrs, Value::Nothing { .. }) {
return Err(ShellError::CantConvert {
to_type: "XML".into(),
from_type: Type::Record(vec![]).to_string(),
span: entry_span,
help: Some("PIs do not have attributes".into()),
});
}
let content_text = format!("{} {}", tag, content);
let pi_content = BytesText::new(content_text.as_str());
writer
.write_event(Event::PI(pi_content))
.map_err(|_| ShellError::CantConvert {
to_type: "XML".to_string(),
from_type: Type::Record(vec![]).to_string(),
span: entry_span,
help: Some("Failure writing PI to xml".into()),
})
}
fn to_tag<W: Write>(
entry_span: Span,
tag: String,
tag_span: Span,
attr_cols: Vec<String>,
attr_vals: Vec<Value>,
children: Vec<Value>,
writer: &mut quick_xml::Writer<W>,
) -> Result<(), ShellError> {
if tag.starts_with('!') || tag.starts_with('?') {
return Err(ShellError::CantConvert {
to_type: "XML".to_string(),
from_type: Type::Record(vec![]).to_string(),
span: tag_span,
help: Some(format!(
"Incorrect tag name {}, tag name can not start with ! or ?",
tag
)),
});
}
let attributes = parse_attributes(attr_cols, attr_vals)?;
let mut open_tag_event = BytesStart::new(tag.clone());
add_attributes(&mut open_tag_event, &attributes);
writer
.write_event(Event::Start(open_tag_event))
.map_err(|_| ShellError::CantConvert {
to_type: "XML".to_string(),
from_type: Type::Record(vec![]).to_string(),
span: entry_span,
help: Some("Failure writing tag to xml".into()),
})?;
children
.into_iter()
.try_for_each(|child| to_xml_entry(child, false, writer))?;
let close_tag_event = BytesEnd::new(tag);
writer
.write_event(Event::End(close_tag_event))
.map_err(|_| ShellError::CantConvert {
to_type: "XML".to_string(),
from_type: Type::Record(vec![]).to_string(),
span: entry_span,
help: Some("Failure writing tag to xml".into()),
})
}
fn parse_attributes(
cols: Vec<String>,
vals: Vec<Value>,
) -> Result<IndexMap<String, String>, ShellError> {
let mut h = IndexMap::new();
for (k, v) in cols.into_iter().zip(vals.into_iter()) {
if let Value::String { val, .. } = v {
h.insert(k, val);
} else {
return Err(ShellError::CantConvert {
to_type: "XML".to_string(),
from_type: v.get_type().to_string(),
span: v.span()?,
help: Some("Attribute value expected to be a string".into()),
});
}
}
Ok(())
Ok(h)
}
fn to_xml_text<W: Write>(
val: &str,
span: Span,
writer: &mut quick_xml::Writer<W>,
) -> Result<(), ShellError> {
let text = Event::Text(BytesText::new(val));
writer
.write_event(text)
.map_err(|_| ShellError::CantConvert {
to_type: "XML".to_string(),
from_type: Type::String.to_string(),
span,
help: Some("Failure writing string to xml".into()),
})
}
fn to_xml(
input: PipelineData,
head: Span,
pretty: Option<Spanned<i64>>,
config: &Config,
) -> Result<PipelineData, ShellError> {
let mut w = pretty.as_ref().map_or_else(
|| quick_xml::Writer::new(Cursor::new(Vec::new())),
@ -173,25 +397,16 @@ fn to_xml(
);
let value = input.into_value(head);
let value_type = value.get_type();
match write_xml_events(value, &mut w, config) {
Ok(_) => {
let b = w.into_inner().into_inner();
let s = if let Ok(s) = String::from_utf8(b) {
s
} else {
return Err(ShellError::NonUtf8(head));
};
Ok(Value::string(s, head).into_pipeline_data())
}
Err(_) => Err(ShellError::CantConvert {
to_type: "XML".into(),
from_type: value_type.to_string(),
span: head,
help: None,
}),
}
to_xml_entry(value, true, &mut w).and_then(|_| {
let b = w.into_inner().into_inner();
let s = if let Ok(s) = String::from_utf8(b) {
s
} else {
return Err(ShellError::NonUtf8(head));
};
Ok(Value::string(s, head).into_pipeline_data())
})
}
#[cfg(test)]

View File

@ -179,7 +179,18 @@ fn parses_json() {
fn parses_xml() {
let actual = nu!(
cwd: "tests/fixtures/formats",
"open jonathan.xml | get rss.children.channel.children | get 0.3.item.children | get 3.link.children.0"
pipeline(r#"
open jonathan.xml
| get content
| where tag == channel
| get content
| flatten
| where tag == item
| get content
| flatten
| where tag == guid
| get content.0.content.0
"#)
);
assert_eq!(

View File

@ -8,7 +8,15 @@ fn table_to_xml_text_and_from_xml_text_back_into_table() {
open jonathan.xml
| to xml
| from xml
| get rss.children.channel.children.0.3.item.children.guid.4.attributes.isPermaLink
| get content
| where tag == channel
| get content
| flatten
| where tag == item
| get content
| flatten
| where tag == guid
| get 0.attributes.isPermaLink
"#
));