mirror of
https://github.com/httpie/cli.git
synced 2024-11-21 23:33:12 +01:00
Improve handling of prettified responses without correct content-type encoding (#1110)
* Improve handling of responses without correct content-type charset * [skip ci] Minor tweaks in tests * [skip ci] Add documentation Co-authored-by: claudiatd <claudiatd@gmail.com> * Improve unknown encoding test [skip ci] * Review mime and options retrieval * Add full content-type example in help output * Simplify decoder * [skip ci] s/charset/encoding/ * Tweaks * [skip ci] Fix type annotation * [skip ci] s/charset/encoding/ * Tweaks * Fix type annoation * Improvement * Introduce `codec.encode()` * [skip ci] Tweak changelog Co-authored-by: claudiatd <claudiatd@gmail.com>
This commit is contained in:
parent
b50f9aa7e7
commit
71adcd97d0
@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](https://semver.org/).
|
||||
- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130))
|
||||
- Added `--format-options=response.as:CONTENT_TYPE` to allow overriding the response `Content-Type`. ([#1134](https://github.com/httpie/httpie/issues/1134))
|
||||
- Added `--response-as` shortcut for setting the response `Content-Type`-related `--format-options`. ([#1134](https://github.com/httpie/httpie/issues/1134))
|
||||
- Improved handling of prettified responses without correct `Content-Type` encoding. ([#1110](https://github.com/httpie/httpie/issues/1110))
|
||||
- Installed plugins are now listed in `--debug` output. ([#1165](https://github.com/httpie/httpie/issues/1165))
|
||||
- Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163))
|
||||
|
||||
|
@ -1249,6 +1249,18 @@ For example, the following request will force the response to be treated as XML:
|
||||
$ http --response-as=application/xml pie.dev/get
|
||||
```
|
||||
|
||||
And the following requests will force the response to use the [big5](https://docs.python.org/3/library/codecs.html#standard-encodings) encoding:
|
||||
|
||||
```bash
|
||||
$ http --response-as='charset=big5' pie.dev/get
|
||||
```
|
||||
|
||||
```bash
|
||||
$ http --response-as='text/plain; charset=big5' pie.dev/get
|
||||
```
|
||||
|
||||
Given the encoding is not sent by the server, HTTPie will auto-detect it.
|
||||
|
||||
### Binary data
|
||||
|
||||
Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
|
||||
|
@ -316,6 +316,8 @@ output_processing.add_argument(
|
||||
Override the response Content-Type for formatting purposes, e.g.:
|
||||
|
||||
--response-as=application/xml
|
||||
--response-as=charset=utf-8
|
||||
--response-as='application/xml; charset=utf-8'
|
||||
|
||||
It is a shortcut for:
|
||||
|
||||
|
37
httpie/codec.py
Normal file
37
httpie/codec.py
Normal file
@ -0,0 +1,37 @@
|
||||
from typing import Union
|
||||
|
||||
from charset_normalizer import from_bytes
|
||||
|
||||
from .constants import UTF8
|
||||
|
||||
Bytes = Union[bytearray, bytes]
|
||||
|
||||
|
||||
def detect_encoding(content: Bytes) -> str:
|
||||
"""Detect the `content` encoding.
|
||||
Fallback to UTF-8 when no suitable encoding found.
|
||||
|
||||
"""
|
||||
match = from_bytes(bytes(content)).best()
|
||||
return match.encoding if match else UTF8
|
||||
|
||||
|
||||
def decode(content: Bytes, encoding: str) -> str:
|
||||
"""Decode `content` using the given `encoding`.
|
||||
If no `encoding` is provided, the best effort is to guess it from `content`.
|
||||
|
||||
Unicode errors are replaced.
|
||||
|
||||
"""
|
||||
if not encoding:
|
||||
encoding = detect_encoding(content)
|
||||
return content.decode(encoding, 'replace')
|
||||
|
||||
|
||||
def encode(content: str, encoding: str) -> bytes:
|
||||
"""Encode `content` using the given `encoding`.
|
||||
|
||||
Unicode errors are replaced.
|
||||
|
||||
"""
|
||||
return content.encode(encoding, 'replace')
|
@ -30,11 +30,6 @@ class HTTPMessage(metaclass=ABCMeta):
|
||||
def encoding(self) -> Optional[str]:
|
||||
"""Return a `str` with the message's encoding, if known."""
|
||||
|
||||
@property
|
||||
def body(self) -> bytes:
|
||||
"""Return a `bytes` with the message's body."""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def content_type(self) -> str:
|
||||
"""Return the message content type."""
|
||||
@ -86,12 +81,6 @@ class HTTPResponse(HTTPMessage):
|
||||
def encoding(self):
|
||||
return self._orig.encoding or UTF8
|
||||
|
||||
@property
|
||||
def body(self):
|
||||
# Only now the response body is fetched.
|
||||
# Shouldn't be touched unless the body is actually needed.
|
||||
return self._orig.content
|
||||
|
||||
|
||||
class HTTPRequest(HTTPMessage):
|
||||
"""A :class:`requests.models.Request` wrapper."""
|
||||
|
@ -25,7 +25,7 @@ def pretty_xml(document: 'Document',
|
||||
}
|
||||
if standalone is not None and sys.version_info >= (3, 9):
|
||||
kwargs['standalone'] = standalone
|
||||
body = document.toprettyxml(**kwargs).decode()
|
||||
body = document.toprettyxml(**kwargs).decode(kwargs['encoding'])
|
||||
|
||||
# Remove blank lines automatically added by `toprettyxml()`.
|
||||
return '\n'.join(line for line in body.splitlines() if line.strip())
|
||||
|
@ -1,7 +1,8 @@
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from itertools import chain
|
||||
from typing import Callable, Iterable, Union
|
||||
from typing import Any, Callable, Dict, Iterable, Tuple, Union
|
||||
|
||||
from .. import codec
|
||||
from ..cli.constants import EMPTY_FORMAT_OPTION
|
||||
from ..context import Environment
|
||||
from ..constants import UTF8
|
||||
@ -114,8 +115,8 @@ class EncodedStream(BaseStream):
|
||||
for line, lf in self.msg.iter_lines(self.CHUNK_SIZE):
|
||||
if b'\0' in line:
|
||||
raise BinarySuppressedError()
|
||||
yield line.decode(self.msg.encoding) \
|
||||
.encode(self.output_encoding, 'replace') + lf
|
||||
line = codec.decode(line, self.msg.encoding)
|
||||
yield codec.encode(line, self.output_encoding) + lf
|
||||
|
||||
|
||||
class PrettyStream(EncodedStream):
|
||||
@ -137,15 +138,23 @@ class PrettyStream(EncodedStream):
|
||||
super().__init__(**kwargs)
|
||||
self.formatting = formatting
|
||||
self.conversion = conversion
|
||||
self.mime = self.get_mime()
|
||||
self.mime, mime_options = self._get_mime_and_options()
|
||||
self.encoding = mime_options.get('charset') or ''
|
||||
|
||||
def get_mime(self) -> str:
|
||||
mime = parse_header_content_type(self.msg.content_type)[0]
|
||||
if isinstance(self.msg, HTTPResponse):
|
||||
forced_content_type = self.formatting.options['response']['as']
|
||||
if forced_content_type != EMPTY_FORMAT_OPTION:
|
||||
mime = parse_header_content_type(forced_content_type)[0] or mime
|
||||
return mime
|
||||
def _get_mime_and_options(self) -> Tuple[str, Dict[str, Any]]:
|
||||
# Defaults from the `Content-Type` header.
|
||||
mime, options = parse_header_content_type(self.msg.content_type)
|
||||
|
||||
if not isinstance(self.msg, HTTPResponse):
|
||||
return mime, options
|
||||
|
||||
# Override from the `--response-as` option.
|
||||
forced_content_type = self.formatting.options['response']['as']
|
||||
if forced_content_type == EMPTY_FORMAT_OPTION:
|
||||
return mime, options
|
||||
|
||||
forced_mime, forced_options = parse_header_content_type(forced_content_type)
|
||||
return (forced_mime or mime, forced_options or options)
|
||||
|
||||
def get_headers(self) -> bytes:
|
||||
return self.formatting.format_headers(
|
||||
@ -176,9 +185,9 @@ class PrettyStream(EncodedStream):
|
||||
if not isinstance(chunk, str):
|
||||
# Text when a converter has been used,
|
||||
# otherwise it will always be bytes.
|
||||
chunk = chunk.decode(self.msg.encoding, 'replace')
|
||||
chunk = codec.decode(chunk, self.encoding)
|
||||
chunk = self.formatting.format_body(content=chunk, mime=self.mime)
|
||||
return chunk.encode(self.output_encoding, 'replace')
|
||||
return codec.encode(chunk, self.output_encoding)
|
||||
|
||||
|
||||
class BufferedPrettyStream(PrettyStream):
|
||||
|
1
setup.py
1
setup.py
@ -25,6 +25,7 @@ dev_require = [
|
||||
'wheel',
|
||||
]
|
||||
install_requires = [
|
||||
'charset_normalizer>=2.0.0',
|
||||
'defusedxml>=0.6.0',
|
||||
'requests[socks]>=2.22.0',
|
||||
'Pygments>=2.5.2',
|
||||
|
@ -39,3 +39,10 @@ def test_max_headers_limit(httpbin_both):
|
||||
|
||||
def test_max_headers_no_limit(httpbin_both):
|
||||
assert HTTP_OK in http('--max-headers=0', httpbin_both + '/get')
|
||||
|
||||
|
||||
def test_charset_argument_unknown_encoding(httpbin_both):
|
||||
with raises(LookupError) as e:
|
||||
http('--response-as', 'charset=foobar',
|
||||
'GET', httpbin_both + '/get')
|
||||
assert 'unknown encoding: foobar' in str(e.value)
|
||||
|
@ -2,9 +2,17 @@
|
||||
Various unicode handling related tests.
|
||||
|
||||
"""
|
||||
from .utils import http, HTTP_OK
|
||||
import pytest
|
||||
import responses
|
||||
|
||||
from httpie.cli.constants import PRETTY_MAP
|
||||
from httpie.constants import UTF8
|
||||
|
||||
from .utils import http, HTTP_OK, URL_EXAMPLE
|
||||
from .fixtures import UNICODE
|
||||
|
||||
ENCODINGS = [UTF8, 'windows-1250']
|
||||
|
||||
|
||||
def test_unicode_headers(httpbin):
|
||||
# httpbin doesn't interpret UFT-8 headers
|
||||
@ -109,3 +117,95 @@ def test_unicode_digest_auth(httpbin):
|
||||
http('--auth-type=digest',
|
||||
'--auth', f'test:{UNICODE}',
|
||||
f'{httpbin.url}/digest-auth/auth/test/{UNICODE}')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('encoding', ENCODINGS)
|
||||
@responses.activate
|
||||
def test_GET_encoding_detection_from_content_type_header(encoding):
|
||||
responses.add(responses.GET,
|
||||
URL_EXAMPLE,
|
||||
body='<?xml version="1.0"?>\n<c>Financiën</c>'.encode(encoding),
|
||||
content_type=f'text/xml; charset={encoding.upper()}')
|
||||
r = http('GET', URL_EXAMPLE)
|
||||
assert 'Financiën' in r
|
||||
|
||||
|
||||
@pytest.mark.parametrize('encoding', ENCODINGS)
|
||||
@responses.activate
|
||||
def test_GET_encoding_detection_from_content(encoding):
|
||||
body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>Financiën</c>'
|
||||
responses.add(responses.GET,
|
||||
URL_EXAMPLE,
|
||||
body=body.encode(encoding),
|
||||
content_type='text/xml')
|
||||
r = http('GET', URL_EXAMPLE)
|
||||
assert 'Financiën' in r
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_GET_encoding_provided_by_format_options():
|
||||
responses.add(responses.GET,
|
||||
URL_EXAMPLE,
|
||||
body='▒▒▒'.encode('johab'),
|
||||
content_type='text/plain')
|
||||
r = http('--format-options', 'response.as:text/plain; charset=johab',
|
||||
'GET', URL_EXAMPLE)
|
||||
assert '▒▒▒' in r
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_GET_encoding_provided_by_shortcut_option():
|
||||
responses.add(responses.GET,
|
||||
URL_EXAMPLE,
|
||||
body='▒▒▒'.encode('johab'),
|
||||
content_type='text/plain')
|
||||
r = http('--response-as', 'text/plain; charset=johab',
|
||||
'GET', URL_EXAMPLE)
|
||||
assert '▒▒▒' in r
|
||||
|
||||
|
||||
@pytest.mark.parametrize('encoding', ENCODINGS)
|
||||
@responses.activate
|
||||
def test_GET_encoding_provided_by_empty_shortcut_option_should_use_content_detection(encoding):
|
||||
body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>Financiën</c>'
|
||||
responses.add(responses.GET,
|
||||
URL_EXAMPLE,
|
||||
body=body.encode(encoding),
|
||||
content_type='text/xml')
|
||||
r = http('--response-as', '', 'GET', URL_EXAMPLE)
|
||||
assert 'Financiën' in r
|
||||
|
||||
|
||||
@pytest.mark.parametrize('encoding', ENCODINGS)
|
||||
@responses.activate
|
||||
def test_POST_encoding_detection_from_content_type_header(encoding):
|
||||
responses.add(responses.POST,
|
||||
URL_EXAMPLE,
|
||||
body='Všichni lidé jsou si rovni.'.encode(encoding),
|
||||
content_type=f'text/plain; charset={encoding.upper()}')
|
||||
r = http('--form', 'POST', URL_EXAMPLE)
|
||||
assert 'Všichni lidé jsou si rovni.' in r
|
||||
|
||||
|
||||
@pytest.mark.parametrize('encoding', ENCODINGS)
|
||||
@responses.activate
|
||||
def test_POST_encoding_detection_from_content(encoding):
|
||||
responses.add(responses.POST,
|
||||
URL_EXAMPLE,
|
||||
body='Všichni lidé jsou si rovni.'.encode(encoding),
|
||||
content_type='text/plain')
|
||||
r = http('--form', 'POST', URL_EXAMPLE)
|
||||
assert 'Všichni lidé jsou si rovni.' in r
|
||||
|
||||
|
||||
@pytest.mark.parametrize('encoding', ENCODINGS)
|
||||
@pytest.mark.parametrize('pretty', PRETTY_MAP.keys())
|
||||
@responses.activate
|
||||
def test_stream_encoding_detection_from_content_type_header(encoding, pretty):
|
||||
responses.add(responses.GET,
|
||||
URL_EXAMPLE,
|
||||
body='<?xml version="1.0"?>\n<c>Financiën</c>'.encode(encoding),
|
||||
stream=True,
|
||||
content_type=f'text/xml; charset={encoding.upper()}')
|
||||
r = http('--pretty=' + pretty, '--stream', 'GET', URL_EXAMPLE)
|
||||
assert 'Financiën' in r
|
||||
|
Loading…
Reference in New Issue
Block a user