2021-08-31 22:49:53 +02:00
|
|
|
from typing import TYPE_CHECKING, Optional
|
|
|
|
|
2021-10-06 17:27:07 +02:00
|
|
|
from ...encoding import UTF8
|
2021-08-31 22:49:53 +02:00
|
|
|
from ...plugins import FormatterPlugin
|
|
|
|
|
|
|
|
if TYPE_CHECKING:
|
|
|
|
from xml.dom.minidom import Document
|
|
|
|
|
|
|
|
|
2021-12-14 16:15:19 +01:00
|
|
|
XML_DECLARATION_OPEN = '<?xml'
|
|
|
|
XML_DECLARATION_CLOSE = '?>'
|
|
|
|
|
|
|
|
|
2021-08-31 22:49:53 +02:00
|
|
|
def parse_xml(data: str) -> 'Document':
|
|
|
|
"""Parse given XML `data` string into an appropriate :class:`~xml.dom.minidom.Document` object."""
|
|
|
|
from defusedxml.minidom import parseString
|
|
|
|
return parseString(data)
|
|
|
|
|
|
|
|
|
2021-12-14 16:15:19 +01:00
|
|
|
def parse_declaration(raw_body: str) -> Optional[str]:
|
|
|
|
body = raw_body.strip()
|
|
|
|
# XMLDecl ::= '<?xml' DECL_CONTENT '?>'
|
|
|
|
if body.startswith(XML_DECLARATION_OPEN):
|
|
|
|
end = body.find(XML_DECLARATION_CLOSE)
|
|
|
|
if end != -1:
|
|
|
|
return body[:end + len(XML_DECLARATION_CLOSE)]
|
|
|
|
|
|
|
|
|
2021-08-31 22:49:53 +02:00
|
|
|
def pretty_xml(document: 'Document',
|
2021-12-14 16:15:19 +01:00
|
|
|
declaration: Optional[str] = None,
|
2021-08-31 22:49:53 +02:00
|
|
|
encoding: Optional[str] = UTF8,
|
2021-12-14 16:15:19 +01:00
|
|
|
indent: int = 2) -> str:
|
2021-08-31 22:49:53 +02:00
|
|
|
"""Render the given :class:`~xml.dom.minidom.Document` `document` into a prettified string."""
|
|
|
|
kwargs = {
|
|
|
|
'encoding': encoding or UTF8,
|
|
|
|
'indent': ' ' * indent,
|
|
|
|
}
|
2021-09-29 20:22:19 +02:00
|
|
|
body = document.toprettyxml(**kwargs).decode(kwargs['encoding'])
|
2021-08-31 22:49:53 +02:00
|
|
|
|
|
|
|
# Remove blank lines automatically added by `toprettyxml()`.
|
2021-12-14 16:15:19 +01:00
|
|
|
lines = [line for line in body.splitlines() if line.strip()]
|
|
|
|
|
|
|
|
# xml.dom automatically adds the declaration, even if
|
|
|
|
# it is not present in the actual body. Remove it.
|
|
|
|
if len(lines) >= 1 and parse_declaration(lines[0]):
|
|
|
|
lines.pop(0)
|
|
|
|
if declaration:
|
|
|
|
lines.insert(0, declaration)
|
|
|
|
|
|
|
|
return '\n'.join(lines)
|
2021-08-31 22:49:53 +02:00
|
|
|
|
|
|
|
|
|
|
|
class XMLFormatter(FormatterPlugin):
|
|
|
|
|
|
|
|
def __init__(self, **kwargs):
|
|
|
|
super().__init__(**kwargs)
|
|
|
|
self.enabled = self.format_options['xml']['format']
|
|
|
|
|
|
|
|
def format_body(self, body: str, mime: str):
|
|
|
|
if 'xml' not in mime:
|
|
|
|
return body
|
|
|
|
|
|
|
|
from xml.parsers.expat import ExpatError
|
|
|
|
from defusedxml.common import DefusedXmlException
|
|
|
|
|
2021-12-14 16:15:19 +01:00
|
|
|
declaration = parse_declaration(body)
|
2021-08-31 22:49:53 +02:00
|
|
|
try:
|
|
|
|
parsed_body = parse_xml(body)
|
|
|
|
except ExpatError:
|
|
|
|
pass # Invalid XML, ignore.
|
|
|
|
except DefusedXmlException:
|
|
|
|
pass # Unsafe XML, ignore.
|
|
|
|
else:
|
|
|
|
body = pretty_xml(parsed_body,
|
|
|
|
encoding=parsed_body.encoding,
|
|
|
|
indent=self.format_options['xml']['indent'],
|
2021-12-14 16:15:19 +01:00
|
|
|
declaration=declaration)
|
2021-08-31 22:49:53 +02:00
|
|
|
|
|
|
|
return body
|