Added support for XML formatting (#1129)

As a side effect, XHTML responses will be pretty-printed too.
This commit is contained in:
Mickaël Schoentgen 2021-08-31 22:49:53 +02:00 committed by GitHub
parent 8618f12fce
commit d10e108b5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 387 additions and 2 deletions

View File

@ -40,5 +40,4 @@ Patches and ideas
* `Jeff Byrnes <https://github.com/jeffbyrnes>`_ * `Jeff Byrnes <https://github.com/jeffbyrnes>`_
* `Denis Belavin <https://github.com/LuckyDenis>`_ * `Denis Belavin <https://github.com/LuckyDenis>`_
* `Mickaël Schoentgen <https://github.com/BoboTiG>`_ * `Mickaël Schoentgen <https://github.com/BoboTiG>`_
* `Rohit Sehgal <https://github.com/r0hi7>`_

View File

@ -11,6 +11,7 @@ This project adheres to `Semantic Versioning <https://semver.org/>`_.
------------------------- -------------------------
* Added ``--raw`` to allow specifying the raw request body without extra processing as * Added ``--raw`` to allow specifying the raw request body without extra processing as
an alternative to ``stdin``. (`#534`_) an alternative to ``stdin``. (`#534`_)
* Added support for XML formatting. (`#1129`_)
* Fixed ``--continue --download`` with a single byte to be downloaded left. (`#1032`_) * Fixed ``--continue --download`` with a single byte to be downloaded left. (`#1032`_)
* Fixed ``--verbose`` HTTP 307 redirects with streamed request body. (`#1088`_) * Fixed ``--verbose`` HTTP 307 redirects with streamed request body. (`#1088`_)
* Fixed handling of session files with ``Cookie:`` followed by other headers. (`#1126`_) * Fixed handling of session files with ``Cookie:`` followed by other headers. (`#1126`_)
@ -507,3 +508,4 @@ This project adheres to `Semantic Versioning <https://semver.org/>`_.
.. _#1088: https://github.com/httpie/httpie/issues/1088 .. _#1088: https://github.com/httpie/httpie/issues/1088
.. _#1094: https://github.com/httpie/httpie/issues/1094 .. _#1094: https://github.com/httpie/httpie/issues/1094
.. _#1126: https://github.com/httpie/httpie/issues/1126 .. _#1126: https://github.com/httpie/httpie/issues/1126
.. _#1129: https://github.com/httpie/httpie/issues/1129

View File

@ -1580,6 +1580,10 @@ The following options are available:
+--------------------+----------+---------------+------------------------------+ +--------------------+----------+---------------+------------------------------+
| ``json.sort_keys`` | ``true`` | ``--sorted``, ``--unsorted`` | | ``json.sort_keys`` | ``true`` | ``--sorted``, ``--unsorted`` |
+--------------------+----------+---------------+------------------------------+ +--------------------+----------+---------------+------------------------------+
| ``xml.format`` | ``true`` | N/A |
+-------------------------------+---------------+------------------------------+
| ``xml.indent`` | ``4`` | N/A |
+--------------------+----------+---------------+------------------------------+
For example, this is how you would disable the default header and JSON key For example, this is how you would disable the default header and JSON key
sorting, and specify a custom JSON indent size: sorting, and specify a custom JSON indent size:

View File

@ -29,6 +29,7 @@ PACKAGES = [
'idna', 'idna',
'chardet', 'chardet',
'PySocks', 'PySocks',
'defusedxml',
] ]

View File

@ -90,6 +90,8 @@ DEFAULT_FORMAT_OPTIONS = [
'json.format:true', 'json.format:true',
'json.indent:4', 'json.indent:4',
'json.sort_keys:true', 'json.sort_keys:true',
'xml.format:true',
'xml.indent:4',
] ]
SORTED_FORMAT_OPTIONS = [ SORTED_FORMAT_OPTIONS = [
'headers.sort:true', 'headers.sort:true',

View File

@ -0,0 +1,59 @@
import sys
from typing import TYPE_CHECKING, Optional
from ...constants import UTF8
from ...plugins import FormatterPlugin
if TYPE_CHECKING:
from xml.dom.minidom import Document
def parse_xml(data: str) -> 'Document':
"""Parse given XML `data` string into an appropriate :class:`~xml.dom.minidom.Document` object."""
from defusedxml.minidom import parseString
return parseString(data)
def pretty_xml(document: 'Document',
encoding: Optional[str] = UTF8,
indent: int = 4,
standalone: Optional[bool] = None) -> str:
"""Render the given :class:`~xml.dom.minidom.Document` `document` into a prettified string."""
kwargs = {
'encoding': encoding or UTF8,
'indent': ' ' * indent,
}
if standalone is not None and sys.version_info >= (3, 9):
kwargs['standalone'] = standalone
body = document.toprettyxml(**kwargs).decode()
# Remove blank lines automatically added by `toprettyxml()`.
return '\n'.join(line for line in body.splitlines() if line.strip())
class XMLFormatter(FormatterPlugin):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.enabled = self.format_options['xml']['format']
def format_body(self, body: str, mime: str):
if 'xml' not in mime:
return body
from xml.parsers.expat import ExpatError
from defusedxml.common import DefusedXmlException
try:
parsed_body = parse_xml(body)
except ExpatError:
pass # Invalid XML, ignore.
except DefusedXmlException:
pass # Unsafe XML, ignore.
else:
body = pretty_xml(parsed_body,
encoding=parsed_body.encoding,
indent=self.format_options['xml']['indent'],
standalone=parsed_body.standalone)
return body

View File

@ -2,6 +2,7 @@ from .manager import PluginManager
from .builtin import BasicAuthPlugin, DigestAuthPlugin from .builtin import BasicAuthPlugin, DigestAuthPlugin
from ..output.formatters.headers import HeadersFormatter from ..output.formatters.headers import HeadersFormatter
from ..output.formatters.json import JSONFormatter from ..output.formatters.json import JSONFormatter
from ..output.formatters.xml import XMLFormatter
from ..output.formatters.colors import ColorFormatter from ..output.formatters.colors import ColorFormatter
@ -14,5 +15,6 @@ plugin_manager.register(
DigestAuthPlugin, DigestAuthPlugin,
HeadersFormatter, HeadersFormatter,
JSONFormatter, JSONFormatter,
XMLFormatter,
ColorFormatter, ColorFormatter,
) )

View File

@ -25,6 +25,7 @@ dev_require = [
'wheel', 'wheel',
] ]
install_requires = [ install_requires = [
'defusedxml>=0.6.0',
'requests[socks]>=2.22.0', 'requests[socks]>=2.22.0',
'Pygments>=2.5.2', 'Pygments>=2.5.2',
'requests-toolbelt>=0.9.1', 'requests-toolbelt>=0.9.1',

View File

@ -17,6 +17,9 @@ FIXTURES_ROOT = Path(__file__).parent
FILE_PATH = FIXTURES_ROOT / 'test.txt' FILE_PATH = FIXTURES_ROOT / 'test.txt'
JSON_FILE_PATH = FIXTURES_ROOT / 'test.json' JSON_FILE_PATH = FIXTURES_ROOT / 'test.json'
BIN_FILE_PATH = FIXTURES_ROOT / 'test.bin' BIN_FILE_PATH = FIXTURES_ROOT / 'test.bin'
XML_FILES_PATH = FIXTURES_ROOT / 'xmldata'
XML_FILES_VALID = list((XML_FILES_PATH / 'valid').glob('*_raw.xml'))
XML_FILES_INVALID = list((XML_FILES_PATH / 'invalid').glob('*.xml'))
FILE_PATH_ARG = patharg(FILE_PATH) FILE_PATH_ARG = patharg(FILE_PATH)
BIN_FILE_PATH_ARG = patharg(BIN_FILE_PATH) BIN_FILE_PATH_ARG = patharg(BIN_FILE_PATH)

View File

@ -0,0 +1,5 @@
<!DOCTYPE xmlbomb [
<!ENTITY a "123 &b;" >
<!ENTITY b "&a;">
]>
<bomb>&a;</bomb>

View File

@ -0,0 +1,4 @@
<!DOCTYPE external [
<!ENTITY ee SYSTEM "http://www.w3schools.com/xml/note.xml">
]>
<root>&ee;</root>

View File

@ -0,0 +1,5 @@
<!DOCTYPE external [
<!ENTITY ee SYSTEM "file:///PATH/TO/xmltestdata/simple.xml">
]>
<root>&ee;</root>

View File

@ -0,0 +1 @@
some string

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,20 @@
<!-- Tested with xalan-j_2_7_1-bin.zip, Xerces-J-bin.2.11.0.tar.gz on
OpenJDK 1.7.0_15
$ LC_ALL=C java -cp xalan.jar:serializer.jar:xercesImpl.jar:xml-apis.jar \
org.apache.xalan.xslt.Process -in simple.xml -xsl xalan_exec.xsl
-->
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:rt="http://xml.apache.org/xalan/java/java.lang.Runtime"
xmlns:ob="http://xml.apache.org/xalan/java/java.lang.Object"
exclude-result-prefixes="rt ob">
<xsl:template match="/">
<xsl:variable name="runtimeObject" select="rt:getRuntime()"/>
<xsl:variable name="command"
select="rt:exec($runtimeObject, &apos;/usr/bin/notify-send SomethingBadHappensHere&apos;)"/>
<xsl:variable name="commandAsString" select="ob:toString($command)"/>
<xsl:value-of select="$commandAsString"/>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,18 @@
<!-- Tested with xalan-j_2_7_1-bin.zip, Xerces-J-bin.2.11.0.tar.gz on
OpenJDK 1.7.0_15
$ LC_ALL=C java -cp xalan.jar:serializer.jar:xercesImpl.jar:xml-apis.jar \
org.apache.xalan.xslt.Process -in simple.xml -xsl xalan_write.xsl
-->
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:redirect="http://xml.apache.org/xalan/redirect"
extension-element-prefixes="redirect">
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:template match="/">
<redirect:write file="xalan_redirect.txt" method="text">
<xsl:text>Something bad happens here!&#13;</xsl:text>
</redirect:write>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,7 @@
<!DOCTYPE xmlbomb [
<!ENTITY a "1234567890" >
<!ENTITY b "&a;&a;&a;&a;&a;&a;&a;&a;">
<!ENTITY c "&b;&b;&b;&b;&b;&b;&b;&b;">
<!ENTITY d "&c;&c;&c;&c;&c;&c;&c;&c;">
]>
<bomb>&c;</bomb>

View File

@ -0,0 +1,4 @@
<!DOCTYPE xmlbomb [
<!ENTITY a "1234567890">
]>
<root>text<bomb>&a;</bomb><tag/></root>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html
PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html>
<head/>
<body>text</body>
</html>

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html><head/><body>text</body></html>

View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="utf-8"?>
<?pi data?>
<!-- comment -->
<root xmlns="namespace">
<element key="value">text</element>
<element>text</element>
tail
<empty-element/>
</root>

View File

@ -0,0 +1 @@
<?pi data?><!-- comment --><root xmlns='namespace'><element key='value'>text</element><element>text</element>tail<empty-element/></root>

View File

@ -0,0 +1,3 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE s1>
<s1>........</s1>

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?><!DOCTYPE s1>
<s1>........</s1>

View File

@ -0,0 +1,3 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!DOCTYPE s1>
<s1>........</s1>

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><!DOCTYPE s1>
<s1>........</s1>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- comment -->
<root>
<element key="value">text</element>
<element>text</element>
tail
<empty-element/>
</root>

View File

@ -0,0 +1 @@
<!-- comment --><root><element key='value'>text</element><element>text</element>tail<empty-element/></root>

View File

@ -0,0 +1,29 @@
<!DOCTYPE html
PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<title>XHTML 1.0 Strict Example</title>
<script type="text/javascript">
//
<![CDATA[
function loadpdf() {
document.getElementById("pdf-object").src="http://www.w3.org/TR/xhtml1/xhtml1.pdf";
}
//]]>
</script>
</head>
<body onload="loadpdf()">
<p>
This is an example of an
<abbr title="Extensible HyperText Markup Language">XHTML</abbr>
1.0 Strict document.
<br/>
<img id="validation-icon" src="http://www.w3.org/Icons/valid-xhtml10" alt="Valid XHTML 1.0 Strict"/>
<br/>
<object id="pdf-object" name="pdf-object" type="application/pdf" data="http://www.w3.org/TR/xhtml1/xhtml1.pdf" width="100%" height="500">
</object>
</p>
</body>
</html>

View File

@ -0,0 +1,30 @@
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html
PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<title>XHTML 1.0 Strict Example</title>
<script type="text/javascript">
//
<![CDATA[
function loadpdf() {
document.getElementById("pdf-object").src="http://www.w3.org/TR/xhtml1/xhtml1.pdf";
}
//]]>
</script>
</head>
<body onload="loadpdf()">
<p>
This is an example of an
<abbr title="Extensible HyperText Markup Language">XHTML</abbr>
1.0 Strict document.
<br/>
<img alt="Valid XHTML 1.0 Strict" id="validation-icon" src="http://www.w3.org/Icons/valid-xhtml10"/>
<br/>
<object data="http://www.w3.org/TR/xhtml1/xhtml1.pdf" height="500" id="pdf-object" name="pdf-object" type="application/pdf" width="100%">
</object>
</p>
</body>
</html>

View File

@ -0,0 +1,30 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<title>XHTML 1.0 Strict Example</title>
<script type="text/javascript">
//<![CDATA[
function loadpdf() {
document.getElementById("pdf-object").src="http://www.w3.org/TR/xhtml1/xhtml1.pdf";
}
//]]>
</script>
</head>
<body onload="loadpdf()">
<p>This is an example of an
<abbr title="Extensible HyperText Markup Language">XHTML</abbr> 1.0 Strict document.<br />
<img id="validation-icon"
src="http://www.w3.org/Icons/valid-xhtml10"
alt="Valid XHTML 1.0 Strict"/><br />
<object id="pdf-object"
name="pdf-object"
type="application/pdf"
data="http://www.w3.org/TR/xhtml1/xhtml1.pdf"
width="100%"
height="500">
</object>
</p>
</body>
</html>

View File

@ -377,6 +377,10 @@ class TestFormatOptions:
'indent': 10, 'indent': 10,
'format': True 'format': True
}, },
'xml': {
'format': True,
'indent': 4,
},
} }
), ),
( (
@ -392,6 +396,10 @@ class TestFormatOptions:
'indent': 4, 'indent': 4,
'format': True 'format': True
}, },
'xml': {
'format': True,
'indent': 4,
},
} }
), ),
( (
@ -409,6 +417,10 @@ class TestFormatOptions:
'indent': 4, 'indent': 4,
'format': True 'format': True
}, },
'xml': {
'format': True,
'indent': 4,
},
} }
), ),
( (
@ -423,6 +435,8 @@ class TestFormatOptions:
( (
[ [
'--format-options=json.indent:2', '--format-options=json.indent:2',
'--format-options=xml.format:false',
'--format-options=xml.indent:2',
'--unsorted', '--unsorted',
'--no-unsorted', '--no-unsorted',
], ],
@ -435,6 +449,10 @@ class TestFormatOptions:
'indent': 2, 'indent': 2,
'format': True 'format': True
}, },
'xml': {
'format': False,
'indent': 2,
},
} }
), ),
( (
@ -452,6 +470,10 @@ class TestFormatOptions:
'indent': 2, 'indent': 2,
'format': True 'format': True
}, },
'xml': {
'format': True,
'indent': 4,
},
} }
), ),
( (
@ -470,6 +492,10 @@ class TestFormatOptions:
'indent': 2, 'indent': 2,
'format': True 'format': True
}, },
'xml': {
'format': True,
'indent': 4,
},
} }
), ),
], ],

90
tests/test_xml.py Normal file
View File

@ -0,0 +1,90 @@
import sys
import pytest
import responses
from httpie.constants import UTF8
from httpie.output.formatters.xml import parse_xml, pretty_xml
from .fixtures import XML_FILES_PATH, XML_FILES_VALID, XML_FILES_INVALID
from .utils import http
SAMPLE_XML_DATA = '<?xml version="1.0" encoding="utf-8"?><root><e>text</e></root>'
@pytest.mark.parametrize(
'options, expected_xml',
[
('xml.format:false', SAMPLE_XML_DATA),
('xml.indent:2', pretty_xml(parse_xml(SAMPLE_XML_DATA), indent=2)),
('xml.indent:4', pretty_xml(parse_xml(SAMPLE_XML_DATA))),
]
)
@responses.activate
def test_xml_format_options(options, expected_xml):
url = 'https://example.org'
responses.add(responses.GET, url, body=SAMPLE_XML_DATA,
content_type='application/xml')
r = http('--format-options', options, url)
assert expected_xml in r
@pytest.mark.parametrize('file', XML_FILES_VALID)
@responses.activate
def test_valid_xml(file):
"""Test XML formatter limits with data containing comments, doctypes
and other XML-specific subtles.
"""
if 'standalone' in file.stem and sys.version_info < (3, 9):
pytest.skip('Standalone XML requires Python 3.9+')
url = 'https://example.org'
xml_data = file.read_text(encoding=UTF8)
expected_xml_file = file.with_name(file.name.replace('_raw', '_formatted'))
expected_xml_output = expected_xml_file.read_text(encoding=UTF8)
responses.add(responses.GET, url, body=xml_data,
content_type='application/xml')
r = http(url)
assert expected_xml_output in r
@responses.activate
def test_xml_xhtml():
"""XHTML responses are handled by the XML formatter."""
url = 'https://example.org'
file = XML_FILES_PATH / 'xhtml' / 'xhtml_raw.xml'
xml_data = file.read_text(encoding=UTF8)
# Python < 3.8 was sorting attributes (https://bugs.python.org/issue34160)
# so we have 2 different output expected given the Python version.
expected_file_name = (
'xhtml_formatted_python_less_than_3.8.xml'
if sys.version_info < (3, 8)
else 'xhtml_formatted.xml'
)
expected_xml_file = file.with_name(expected_file_name)
expected_xml_output = expected_xml_file.read_text(encoding=UTF8)
responses.add(responses.GET, url, body=xml_data,
content_type='application/xhtml+xml')
r = http(url)
print(r)
assert expected_xml_output in r
@pytest.mark.parametrize('file', XML_FILES_INVALID)
@responses.activate
def test_invalid_xml(file):
"""Testing several problematic XML files, none should be formatted
and none should make HTTPie to crash.
"""
url = 'https://example.org'
xml_data = file.read_text(encoding=UTF8)
responses.add(responses.GET, url, body=xml_data,
content_type='application/xml')
# No formatting done, data is simply printed as-is
r = http(url)
assert xml_data in r