Handle unicode when formatting XML.

This commit is contained in:
Jakub Roztocil 2013-06-02 20:25:36 +02:00
parent 8d302f91f9
commit 1bad62ab0e

View File

@ -268,8 +268,9 @@ class PrettyStream(EncodedStream):
def _process_body(self, chunk): def _process_body(self, chunk):
return (self.processor return (self.processor
.process_body( .process_body(
chunk.decode(self.msg.encoding, 'replace'), content=chunk.decode(self.msg.encoding, 'replace'),
self.msg.content_type) content_type=self.msg.content_type,
encoding=self.msg.encoding)
.encode(self.output_encoding, 'replace')) .encode(self.output_encoding, 'replace'))
@ -371,12 +372,13 @@ class BaseProcessor(object):
""" """
return headers return headers
def process_body(self, content, content_type, subtype): def process_body(self, content, content_type, subtype, encoding):
"""Return processed `content`. """Return processed `content`.
:param content: The body content as text :param content: The body content as text
:param content_type: Full content type, e.g., 'application/atom+xml'. :param content_type: Full content type, e.g., 'application/atom+xml'.
:param subtype: E.g. 'xml'. :param subtype: E.g. 'xml'.
:param encoding: The original content encoding.
""" """
return content return content
@ -385,7 +387,7 @@ class BaseProcessor(object):
class JSONProcessor(BaseProcessor): class JSONProcessor(BaseProcessor):
"""JSON body processor.""" """JSON body processor."""
def process_body(self, content, content_type, subtype): def process_body(self, content, content_type, subtype, encoding):
if subtype == 'json': if subtype == 'json':
try: try:
# Indent the JSON data, sort keys by name, and # Indent the JSON data, sort keys by name, and
@ -403,11 +405,11 @@ class JSONProcessor(BaseProcessor):
class XMLProcessor(BaseProcessor): class XMLProcessor(BaseProcessor):
"""XML body processor.""" """XML body processor."""
def process_body(self, content, content_type, subtype): def process_body(self, content, content_type, subtype, encoding):
if subtype == 'xml': if subtype == 'xml':
try: try:
# Pretty print the XML # Pretty print the XML
doc = xml.dom.minidom.parseString(content) doc = xml.dom.minidom.parseString(content.encode(encoding))
content = doc.toprettyxml(indent=' ' * DEFAULT_INDENT) content = doc.toprettyxml(indent=' ' * DEFAULT_INDENT)
except xml.parsers.expat.ExpatError: except xml.parsers.expat.ExpatError:
# Ignore invalid XML errors (skips attempting to pretty print) # Ignore invalid XML errors (skips attempting to pretty print)
@ -446,7 +448,7 @@ class PygmentsProcessor(BaseProcessor):
return pygments.highlight( return pygments.highlight(
headers, HTTPLexer(), self.formatter).strip() headers, HTTPLexer(), self.formatter).strip()
def process_body(self, content, content_type, subtype): def process_body(self, content, content_type, subtype, encoding):
try: try:
lexer = self.lexers_by_type.get(content_type) lexer = self.lexers_by_type.get(content_type)
if not lexer: if not lexer:
@ -506,13 +508,18 @@ class OutputProcessor(object):
headers = processor.process_headers(headers) headers = processor.process_headers(headers)
return headers return headers
def process_body(self, content, content_type): def process_body(self, content, content_type, encoding):
# e.g., 'application/atom+xml' # e.g., 'application/atom+xml'
content_type = content_type.split(';')[0] content_type = content_type.split(';')[0]
# e.g., 'xml' # e.g., 'xml'
subtype = content_type.split('/')[-1].split('+')[-1] subtype = content_type.split('/')[-1].split('+')[-1]
for processor in self.processors: for processor in self.processors:
content = processor.process_body(content, content_type, subtype) content = processor.process_body(
content,
content_type,
subtype,
encoding
)
return content return content