Added support for XML formatting (#1129)

As a side effect, XHTML responses will be pretty-printed too.
This commit is contained in:
Mickaël Schoentgen 2021-08-31 22:49:53 +02:00 committed by GitHub
parent 8618f12fce
commit d10e108b5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 387 additions and 2 deletions

View File

@ -40,5 +40,4 @@ Patches and ideas
* `Jeff Byrnes <https://github.com/jeffbyrnes>`_
* `Denis Belavin <https://github.com/LuckyDenis>`_
* `Mickaël Schoentgen <https://github.com/BoboTiG>`_
* `Rohit Sehgal <https://github.com/r0hi7>`_

View File

@ -11,6 +11,7 @@ This project adheres to `Semantic Versioning <https://semver.org/>`_.
-------------------------
* Added ``--raw`` to allow specifying the raw request body without extra processing as
an alternative to ``stdin``. (`#534`_)
* Added support for XML formatting. (`#1129`_)
* Fixed ``--continue --download`` with a single byte to be downloaded left. (`#1032`_)
* Fixed ``--verbose`` HTTP 307 redirects with streamed request body. (`#1088`_)
* Fixed handling of session files with ``Cookie:`` followed by other headers. (`#1126`_)
@ -507,3 +508,4 @@ This project adheres to `Semantic Versioning <https://semver.org/>`_.
.. _#1088: https://github.com/httpie/httpie/issues/1088
.. _#1094: https://github.com/httpie/httpie/issues/1094
.. _#1126: https://github.com/httpie/httpie/issues/1126
.. _#1129: https://github.com/httpie/httpie/issues/1129

View File

@ -1580,6 +1580,10 @@ The following options are available:
+--------------------+----------+---------------+------------------------------+
| ``json.sort_keys`` | ``true`` | ``--sorted``, ``--unsorted`` |
+--------------------+----------+---------------+------------------------------+
| ``xml.format`` | ``true`` | N/A |
+-------------------------------+---------------+------------------------------+
| ``xml.indent`` | ``4`` | N/A |
+--------------------+----------+---------------+------------------------------+
For example, this is how you would disable the default header and JSON key
sorting, and specify a custom JSON indent size:

View File

@ -29,6 +29,7 @@ PACKAGES = [
'idna',
'chardet',
'PySocks',
'defusedxml',
]

View File

@ -90,6 +90,8 @@ DEFAULT_FORMAT_OPTIONS = [
'json.format:true',
'json.indent:4',
'json.sort_keys:true',
'xml.format:true',
'xml.indent:4',
]
SORTED_FORMAT_OPTIONS = [
'headers.sort:true',

View File

@ -0,0 +1,59 @@
import sys
from typing import TYPE_CHECKING, Optional
from ...constants import UTF8
from ...plugins import FormatterPlugin
if TYPE_CHECKING:
from xml.dom.minidom import Document
def parse_xml(data: str) -> 'Document':
"""Parse given XML `data` string into an appropriate :class:`~xml.dom.minidom.Document` object."""
from defusedxml.minidom import parseString
return parseString(data)
def pretty_xml(document: 'Document',
encoding: Optional[str] = UTF8,
indent: int = 4,
standalone: Optional[bool] = None) -> str:
"""Render the given :class:`~xml.dom.minidom.Document` `document` into a prettified string."""
kwargs = {
'encoding': encoding or UTF8,
'indent': ' ' * indent,
}
if standalone is not None and sys.version_info >= (3, 9):
kwargs['standalone'] = standalone
body = document.toprettyxml(**kwargs).decode()
# Remove blank lines automatically added by `toprettyxml()`.
return '\n'.join(line for line in body.splitlines() if line.strip())
class XMLFormatter(FormatterPlugin):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.enabled = self.format_options['xml']['format']
def format_body(self, body: str, mime: str):
if 'xml' not in mime:
return body
from xml.parsers.expat import ExpatError
from defusedxml.common import DefusedXmlException
try:
parsed_body = parse_xml(body)
except ExpatError:
pass # Invalid XML, ignore.
except DefusedXmlException:
pass # Unsafe XML, ignore.
else:
body = pretty_xml(parsed_body,
encoding=parsed_body.encoding,
indent=self.format_options['xml']['indent'],
standalone=parsed_body.standalone)
return body

View File

@ -2,6 +2,7 @@ from .manager import PluginManager
from .builtin import BasicAuthPlugin, DigestAuthPlugin
from ..output.formatters.headers import HeadersFormatter
from ..output.formatters.json import JSONFormatter
from ..output.formatters.xml import XMLFormatter
from ..output.formatters.colors import ColorFormatter
@ -14,5 +15,6 @@ plugin_manager.register(
DigestAuthPlugin,
HeadersFormatter,
JSONFormatter,
XMLFormatter,
ColorFormatter,
)

View File

@ -25,6 +25,7 @@ dev_require = [
'wheel',
]
install_requires = [
'defusedxml>=0.6.0',
'requests[socks]>=2.22.0',
'Pygments>=2.5.2',
'requests-toolbelt>=0.9.1',

View File

@ -17,6 +17,9 @@ FIXTURES_ROOT = Path(__file__).parent
FILE_PATH = FIXTURES_ROOT / 'test.txt'
JSON_FILE_PATH = FIXTURES_ROOT / 'test.json'
BIN_FILE_PATH = FIXTURES_ROOT / 'test.bin'
XML_FILES_PATH = FIXTURES_ROOT / 'xmldata'
XML_FILES_VALID = list((XML_FILES_PATH / 'valid').glob('*_raw.xml'))
XML_FILES_INVALID = list((XML_FILES_PATH / 'invalid').glob('*.xml'))
FILE_PATH_ARG = patharg(FILE_PATH)
BIN_FILE_PATH_ARG = patharg(BIN_FILE_PATH)

View File

@ -0,0 +1,5 @@
<!DOCTYPE xmlbomb [
<!ENTITY a "123 &b;" >
<!ENTITY b "&a;">
]>
<bomb>&a;</bomb>

View File

@ -0,0 +1,4 @@
<!DOCTYPE external [
<!ENTITY ee SYSTEM "http://www.w3schools.com/xml/note.xml">
]>
<root>&ee;</root>

View File

@ -0,0 +1,5 @@
<!DOCTYPE external [
<!ENTITY ee SYSTEM "file:///PATH/TO/xmltestdata/simple.xml">
]>
<root>&ee;</root>

View File

@ -0,0 +1 @@
some string

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,20 @@
<!-- Tested with xalan-j_2_7_1-bin.zip, Xerces-J-bin.2.11.0.tar.gz on
OpenJDK 1.7.0_15
$ LC_ALL=C java -cp xalan.jar:serializer.jar:xercesImpl.jar:xml-apis.jar \
org.apache.xalan.xslt.Process -in simple.xml -xsl xalan_exec.xsl
-->
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:rt="http://xml.apache.org/xalan/java/java.lang.Runtime"
xmlns:ob="http://xml.apache.org/xalan/java/java.lang.Object"
exclude-result-prefixes="rt ob">
<xsl:template match="/">
<xsl:variable name="runtimeObject" select="rt:getRuntime()"/>
<xsl:variable name="command"
select="rt:exec($runtimeObject, &apos;/usr/bin/notify-send SomethingBadHappensHere&apos;)"/>
<xsl:variable name="commandAsString" select="ob:toString($command)"/>
<xsl:value-of select="$commandAsString"/>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,18 @@
<!-- Tested with xalan-j_2_7_1-bin.zip, Xerces-J-bin.2.11.0.tar.gz on
OpenJDK 1.7.0_15
$ LC_ALL=C java -cp xalan.jar:serializer.jar:xercesImpl.jar:xml-apis.jar \
org.apache.xalan.xslt.Process -in simple.xml -xsl xalan_write.xsl
-->
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:redirect="http://xml.apache.org/xalan/redirect"
extension-element-prefixes="redirect">
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:template match="/">
<redirect:write file="xalan_redirect.txt" method="text">
<xsl:text>Something bad happens here!&#13;</xsl:text>
</redirect:write>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,7 @@
<!DOCTYPE xmlbomb [
<!ENTITY a "1234567890" >
<!ENTITY b "&a;&a;&a;&a;&a;&a;&a;&a;">
<!ENTITY c "&b;&b;&b;&b;&b;&b;&b;&b;">
<!ENTITY d "&c;&c;&c;&c;&c;&c;&c;&c;">
]>
<bomb>&c;</bomb>

View File

@ -0,0 +1,4 @@
<!DOCTYPE xmlbomb [
<!ENTITY a "1234567890">
]>
<root>text<bomb>&a;</bomb><tag/></root>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html
PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html>
<head/>
<body>text</body>
</html>

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html><head/><body>text</body></html>

View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="utf-8"?>
<?pi data?>
<!-- comment -->
<root xmlns="namespace">
<element key="value">text</element>
<element>text</element>
tail
<empty-element/>
</root>

View File

@ -0,0 +1 @@
<?pi data?><!-- comment --><root xmlns='namespace'><element key='value'>text</element><element>text</element>tail<empty-element/></root>

View File

@ -0,0 +1,3 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE s1>
<s1>........</s1>

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?><!DOCTYPE s1>
<s1>........</s1>

View File

@ -0,0 +1,3 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!DOCTYPE s1>
<s1>........</s1>

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><!DOCTYPE s1>
<s1>........</s1>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- comment -->
<root>
<element key="value">text</element>
<element>text</element>
tail
<empty-element/>
</root>

View File

@ -0,0 +1 @@
<!-- comment --><root><element key='value'>text</element><element>text</element>tail<empty-element/></root>

View File

@ -0,0 +1,29 @@
<!DOCTYPE html
PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<title>XHTML 1.0 Strict Example</title>
<script type="text/javascript">
//
<![CDATA[
function loadpdf() {
document.getElementById("pdf-object").src="http://www.w3.org/TR/xhtml1/xhtml1.pdf";
}
//]]>
</script>
</head>
<body onload="loadpdf()">
<p>
This is an example of an
<abbr title="Extensible HyperText Markup Language">XHTML</abbr>
1.0 Strict document.
<br/>
<img id="validation-icon" src="http://www.w3.org/Icons/valid-xhtml10" alt="Valid XHTML 1.0 Strict"/>
<br/>
<object id="pdf-object" name="pdf-object" type="application/pdf" data="http://www.w3.org/TR/xhtml1/xhtml1.pdf" width="100%" height="500">
</object>
</p>
</body>
</html>

View File

@ -0,0 +1,30 @@
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html
PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<title>XHTML 1.0 Strict Example</title>
<script type="text/javascript">
//
<![CDATA[
function loadpdf() {
document.getElementById("pdf-object").src="http://www.w3.org/TR/xhtml1/xhtml1.pdf";
}
//]]>
</script>
</head>
<body onload="loadpdf()">
<p>
This is an example of an
<abbr title="Extensible HyperText Markup Language">XHTML</abbr>
1.0 Strict document.
<br/>
<img alt="Valid XHTML 1.0 Strict" id="validation-icon" src="http://www.w3.org/Icons/valid-xhtml10"/>
<br/>
<object data="http://www.w3.org/TR/xhtml1/xhtml1.pdf" height="500" id="pdf-object" name="pdf-object" type="application/pdf" width="100%">
</object>
</p>
</body>
</html>

View File

@ -0,0 +1,30 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<title>XHTML 1.0 Strict Example</title>
<script type="text/javascript">
//<![CDATA[
function loadpdf() {
document.getElementById("pdf-object").src="http://www.w3.org/TR/xhtml1/xhtml1.pdf";
}
//]]>
</script>
</head>
<body onload="loadpdf()">
<p>This is an example of an
<abbr title="Extensible HyperText Markup Language">XHTML</abbr> 1.0 Strict document.<br />
<img id="validation-icon"
src="http://www.w3.org/Icons/valid-xhtml10"
alt="Valid XHTML 1.0 Strict"/><br />
<object id="pdf-object"
name="pdf-object"
type="application/pdf"
data="http://www.w3.org/TR/xhtml1/xhtml1.pdf"
width="100%"
height="500">
</object>
</p>
</body>
</html>

View File

@ -377,6 +377,10 @@ class TestFormatOptions:
'indent': 10,
'format': True
},
'xml': {
'format': True,
'indent': 4,
},
}
),
(
@ -392,6 +396,10 @@ class TestFormatOptions:
'indent': 4,
'format': True
},
'xml': {
'format': True,
'indent': 4,
},
}
),
(
@ -409,6 +417,10 @@ class TestFormatOptions:
'indent': 4,
'format': True
},
'xml': {
'format': True,
'indent': 4,
},
}
),
(
@ -423,6 +435,8 @@ class TestFormatOptions:
(
[
'--format-options=json.indent:2',
'--format-options=xml.format:false',
'--format-options=xml.indent:2',
'--unsorted',
'--no-unsorted',
],
@ -435,6 +449,10 @@ class TestFormatOptions:
'indent': 2,
'format': True
},
'xml': {
'format': False,
'indent': 2,
},
}
),
(
@ -452,6 +470,10 @@ class TestFormatOptions:
'indent': 2,
'format': True
},
'xml': {
'format': True,
'indent': 4,
},
}
),
(
@ -470,6 +492,10 @@ class TestFormatOptions:
'indent': 2,
'format': True
},
'xml': {
'format': True,
'indent': 4,
},
}
),
],

90
tests/test_xml.py Normal file
View File

@ -0,0 +1,90 @@
import sys
import pytest
import responses
from httpie.constants import UTF8
from httpie.output.formatters.xml import parse_xml, pretty_xml
from .fixtures import XML_FILES_PATH, XML_FILES_VALID, XML_FILES_INVALID
from .utils import http
SAMPLE_XML_DATA = '<?xml version="1.0" encoding="utf-8"?><root><e>text</e></root>'
@pytest.mark.parametrize(
'options, expected_xml',
[
('xml.format:false', SAMPLE_XML_DATA),
('xml.indent:2', pretty_xml(parse_xml(SAMPLE_XML_DATA), indent=2)),
('xml.indent:4', pretty_xml(parse_xml(SAMPLE_XML_DATA))),
]
)
@responses.activate
def test_xml_format_options(options, expected_xml):
url = 'https://example.org'
responses.add(responses.GET, url, body=SAMPLE_XML_DATA,
content_type='application/xml')
r = http('--format-options', options, url)
assert expected_xml in r
@pytest.mark.parametrize('file', XML_FILES_VALID)
@responses.activate
def test_valid_xml(file):
"""Test XML formatter limits with data containing comments, doctypes
and other XML-specific subtles.
"""
if 'standalone' in file.stem and sys.version_info < (3, 9):
pytest.skip('Standalone XML requires Python 3.9+')
url = 'https://example.org'
xml_data = file.read_text(encoding=UTF8)
expected_xml_file = file.with_name(file.name.replace('_raw', '_formatted'))
expected_xml_output = expected_xml_file.read_text(encoding=UTF8)
responses.add(responses.GET, url, body=xml_data,
content_type='application/xml')
r = http(url)
assert expected_xml_output in r
@responses.activate
def test_xml_xhtml():
"""XHTML responses are handled by the XML formatter."""
url = 'https://example.org'
file = XML_FILES_PATH / 'xhtml' / 'xhtml_raw.xml'
xml_data = file.read_text(encoding=UTF8)
# Python < 3.8 was sorting attributes (https://bugs.python.org/issue34160)
# so we have 2 different output expected given the Python version.
expected_file_name = (
'xhtml_formatted_python_less_than_3.8.xml'
if sys.version_info < (3, 8)
else 'xhtml_formatted.xml'
)
expected_xml_file = file.with_name(expected_file_name)
expected_xml_output = expected_xml_file.read_text(encoding=UTF8)
responses.add(responses.GET, url, body=xml_data,
content_type='application/xhtml+xml')
r = http(url)
print(r)
assert expected_xml_output in r
@pytest.mark.parametrize('file', XML_FILES_INVALID)
@responses.activate
def test_invalid_xml(file):
"""Testing several problematic XML files, none should be formatted
and none should make HTTPie to crash.
"""
url = 'https://example.org'
xml_data = file.read_text(encoding=UTF8)
responses.add(responses.GET, url, body=xml_data,
content_type='application/xml')
# No formatting done, data is simply printed as-is
r = http(url)
assert xml_data in r