Encoding refactoring

* split --response-as into --response-mime and --response-charset
* add support for Content-Type charset for requests printed to terminal
* add support charset detection for requests printed to terminal without a Content-Type charset
* etc.
This commit is contained in:
Jakub Roztocil 2021-10-04 20:43:34 +02:00
parent 0b5f4d6b1c
commit 491188d9d4
25 changed files with 345 additions and 254 deletions

View File

@ -1179,6 +1179,8 @@ HTTPie does several things by default in order to make its terminal output easy
### Colors and formatting
TODO: mention body colors/formatting are based on content-type + --response-mime (heuristics for JSON content-type)
Syntax highlighting is applied to HTTP headers and bodies (where it makes sense).
You can choose your preferred color scheme via the `--style` option if you dont like the default one.
There are dozens of styles available, here are just a few notable ones:
@ -1259,26 +1261,6 @@ $ http --response-as='text/plain; charset=big5' pie.dev/get
Given the encoding is not sent by the server, HTTPie will auto-detect it.
### Binary data
Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
Binary data is also suppressed in redirected but prettified output.
The connection is closed as soon as we know that the response body is binary,
```bash
$ http pie.dev/bytes/2000
```
You will nearly instantly see something like this:
```http
HTTP/1.1 200 OK
Content-Type: application/octet-stream
+-----------------------------------------+
| NOTE: binary data not shown in terminal |
+-----------------------------------------+
```
### Redirected output
@ -1320,6 +1302,36 @@ function httpless {
http --pretty=all --print=hb "$@" | less -R;
}
```
### Binary data
Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
Binary data is also suppressed in redirected but prettified output.
The connection is closed as soon as we know that the response body is binary,
```bash
$ http pie.dev/bytes/2000
```
You will nearly instantly see something like this:
```http
HTTP/1.1 200 OK
Content-Type: application/octet-stream
+-----------------------------------------+
| NOTE: binary data not shown in terminal |
+-----------------------------------------+
```
### Display encoding
TODO:
(both request/response)
* we look at content-type
* else we detect
* short texts default to utf8
(only response)
* --response-charset allows overwriting
## Download mode

View File

@ -242,3 +242,19 @@ PARSED_DEFAULT_FORMAT_OPTIONS = parse_format_options(
s=','.join(DEFAULT_FORMAT_OPTIONS),
defaults=None,
)
def response_charset_type(encoding: str) -> str:
try:
''.encode(encoding)
except LookupError:
raise argparse.ArgumentTypeError(
f'{encoding!r} is not a supported encoding')
return encoding
def response_mime_type(mime_type: str) -> str:
if mime_type.count('/') != 1:
raise argparse.ArgumentTypeError(
f'{mime_type!r} doesnt look like a mime type; use type/subtype')
return mime_type

View File

@ -9,7 +9,7 @@ from .. import __doc__, __version__
from .argparser import HTTPieArgumentParser
from .argtypes import (
KeyValueArgType, SessionNameValidator,
readable_file_arg,
readable_file_arg, response_charset_type, response_mime_type,
)
from .constants import (
DEFAULT_FORMAT_OPTIONS, OUTPUT_OPTIONS,
@ -310,18 +310,28 @@ output_processing.add_argument(
)
output_processing.add_argument(
'--response-as',
metavar='CONTENT_TYPE',
'--response-charset',
metavar='ENCODING',
type=response_charset_type,
help='''
Override the response Content-Type for display purposes, e.g.:
--response-as=application/xml
--response-as=charset=utf-8
--response-as='application/xml; charset=utf-8'
Override the response encoding for terminal display purposes, e.g.:
--response-charset=utf8
--response-charset=big5
'''
)
output_processing.add_argument(
'--response-mime',
metavar='MIME_TYPE',
type=response_mime_type,
help='''
Override the response mime type for coloring and formatting for the terminal, e.g.:
--response-mime=application/json
--response-mime=text/xml
'''
)
output_processing.add_argument(
'--format-options',

View File

@ -12,7 +12,7 @@ import requests
import urllib3
from . import __version__
from .cli.dicts import RequestHeadersDict
from .constants import UTF8
from .encoding import UTF8
from .plugins.registry import plugin_manager
from .sessions import get_httpie_session
from .ssl import AVAILABLE_SSL_VERSION_ARG_MAPPING, HTTPieHTTPSAdapter

View File

@ -1,37 +0,0 @@
from typing import Union
from charset_normalizer import from_bytes
from .constants import UTF8
Bytes = Union[bytearray, bytes]
def detect_encoding(content: Bytes) -> str:
"""Detect the `content` encoding.
Fallback to UTF-8 when no suitable encoding found.
"""
match = from_bytes(bytes(content)).best()
return match.encoding if match else UTF8
def decode(content: Bytes, encoding: str) -> str:
"""Decode `content` using the given `encoding`.
If no `encoding` is provided, the best effort is to guess it from `content`.
Unicode errors are replaced.
"""
if not encoding:
encoding = detect_encoding(content)
return content.decode(encoding, 'replace')
def encode(content: str, encoding: str) -> bytes:
"""Encode `content` using the given `encoding`.
Unicode errors are replaced.
"""
return content.encode(encoding, 'replace')

View File

@ -2,3 +2,53 @@ import sys
is_windows = 'win32' in str(sys.platform).lower()
try:
from functools import cached_property
except ImportError:
# Can be removed once we drop Pyth on <3.8 support
# Taken from: `django.utils.functional.cached_property`
class cached_property:
"""
Decorator that converts a method with a single self argument into a
property cached on the instance.
A cached property can be made out of an existing method:
(e.g. ``url = cached_property(get_absolute_url)``).
The optional ``name`` argument is obsolete as of Python 3.6 and will be
deprecated in Django 4.0 (#30127).
"""
name = None
@staticmethod
def func(instance):
raise TypeError(
'Cannot use cached_property instance without calling '
'__set_name__() on it.'
)
def __init__(self, func, name=None):
self.real_func = func
self.__doc__ = getattr(func, '__doc__')
def __set_name__(self, owner, name):
if self.name is None:
self.name = name
self.func = self.real_func
elif name != self.name:
raise TypeError(
"Cannot assign the same cached_property to two different names "
"(%r and %r)." % (self.name, name)
)
def __get__(self, instance, cls=None):
"""
Call the function and put the return value in instance.__dict__ so that
subsequent attribute access on the instance returns the cached value
instead of calling cached_property.__get__().
"""
if instance is None:
return self
res = instance.__dict__[self.name] = self.func(instance)
return res

View File

@ -5,7 +5,7 @@ from typing import Union
from . import __version__
from .compat import is_windows
from .constants import UTF8
from .encoding import UTF8
ENV_XDG_CONFIG_HOME = 'XDG_CONFIG_HOME'

View File

@ -1,2 +0,0 @@
# UTF-8 encoding name
UTF8 = 'utf-8'

View File

@ -11,7 +11,7 @@ except ImportError:
from .compat import is_windows
from .config import DEFAULT_CONFIG_DIR, Config, ConfigFileError
from .constants import UTF8
from .encoding import UTF8
from .utils import repr_dict

50
httpie/encoding.py Normal file
View File

@ -0,0 +1,50 @@
from typing import Union
from charset_normalizer import from_bytes
from charset_normalizer.constant import TOO_SMALL_SEQUENCE
UTF8 = 'utf-8'
ContentBytes = Union[bytearray, bytes]
def detect_encoding(content: ContentBytes) -> str:
"""
We default to utf8 if text too short, because the detection
can return a random encoding leading to confusing results:
>>> too_short = ']"foo"'
>>> detected = from_bytes(too_short.encode()).best().encoding
>>> detected
'utf_16_be'
>>> too_short.encode().decode(detected)
'崢景漢'
"""
encoding = UTF8
if len(content) > TOO_SMALL_SEQUENCE:
match = from_bytes(bytes(content)).best()
if match:
encoding = match.encoding
return encoding
def smart_decode(content: ContentBytes, encoding: str) -> str:
"""Decode `content` using the given `encoding`.
If no `encoding` is provided, the best effort is to guess it from `content`.
Unicode errors are replaced.
"""
if not encoding:
encoding = detect_encoding(content)
return content.decode(encoding, 'replace')
def smart_encode(content: str, encoding: str) -> bytes:
"""Encode `content` using the given `encoding`.
Unicode errors are replaced.
"""
return content.encode(encoding, 'replace')

View File

@ -1,34 +1,33 @@
from abc import ABCMeta, abstractmethod
from typing import Iterable, Optional
from typing import Iterable
from urllib.parse import urlsplit
from .constants import UTF8
from .utils import split_cookies
from .utils import split_cookies, parse_content_type_header
from .compat import cached_property
class HTTPMessage(metaclass=ABCMeta):
class HTTPMessage:
"""Abstract class for HTTP messages."""
def __init__(self, orig):
self._orig = orig
@abstractmethod
def iter_body(self, chunk_size: int) -> Iterable[bytes]:
"""Return an iterator over the body."""
raise NotImplementedError
@abstractmethod
def iter_lines(self, chunk_size: int) -> Iterable[bytes]:
"""Return an iterator over the body yielding (`line`, `line_feed`)."""
raise NotImplementedError
@property
@abstractmethod
def headers(self) -> str:
"""Return a `str` with the message's headers."""
raise NotImplementedError
@property
@abstractmethod
def encoding(self) -> Optional[str]:
"""Return a `str` with the message's encoding, if known."""
@cached_property
def encoding(self) -> str:
ct, params = parse_content_type_header(self.content_type)
return params.get('charset', '')
@property
def content_type(self) -> str:
@ -77,10 +76,6 @@ class HTTPResponse(HTTPMessage):
)
return '\r\n'.join(headers)
@property
def encoding(self):
return self._orig.encoding or UTF8
class HTTPRequest(HTTPMessage):
"""A :class:`requests.models.Request` wrapper."""
@ -114,10 +109,6 @@ class HTTPRequest(HTTPMessage):
headers = '\r\n'.join(headers).strip()
return headers
@property
def encoding(self):
return UTF8
@property
def body(self):
body = self._orig.body

View File

@ -1,7 +1,7 @@
import sys
from typing import TYPE_CHECKING, Optional
from ...constants import UTF8
from ...encoding import UTF8
from ...plugins import FormatterPlugin
if TYPE_CHECKING:

View File

@ -1,13 +1,13 @@
from abc import ABCMeta, abstractmethod
from itertools import chain
from typing import Callable, Dict, Iterable, Tuple, Union
from typing import Callable, Iterable, Union
from .. import codec
from ..context import Environment
from ..constants import UTF8
from ..models import HTTPMessage, HTTPRequest
from .processing import Conversion, Formatting
from .utils import parse_header_content_type
from .. import encoding
from ..context import Environment
from ..encoding import smart_decode, smart_encode, UTF8
from ..models import HTTPMessage
BINARY_SUPPRESSED_NOTICE = (
b'\n'
@ -99,11 +99,16 @@ class EncodedStream(BaseStream):
"""
CHUNK_SIZE = 1
def __init__(self, env=Environment(), response_as: str = None, **kwargs):
def __init__(
self,
env=Environment(),
mime_overwrite: str = None,
encoding_overwrite: str = None,
**kwargs
):
super().__init__(**kwargs)
self.response_as = response_as
self.mime, self.encoding = self._get_mime_and_encoding()
self.mime = mime_overwrite or self.msg.content_type
self.encoding = encoding_overwrite or self.msg.encoding
if env.stdout_isatty:
# Use the encoding supported by the terminal.
output_encoding = env.stdout_encoding
@ -113,32 +118,12 @@ class EncodedStream(BaseStream):
# Default to UTF-8 when unsure.
self.output_encoding = output_encoding or UTF8
def _get_mime_and_encoding(self) -> Tuple[str, Dict[str, str]]:
"""Parse `Content-Type` header or `--response-as` value to guess
correct mime type and encoding.
"""
# Defaults from the `Content-Type` header.
mime, options = parse_header_content_type(self.msg.content_type)
if isinstance(self.msg, HTTPRequest):
encoding = self.msg.encoding
elif self.response_as is None:
encoding = options.get('charset')
else:
# Override from the `--response-as` option.
forced_mime, forced_options = parse_header_content_type(self.response_as)
mime = forced_mime or mime
encoding = forced_options.get('charset') or options.get('charset')
return mime, encoding or ''
def iter_body(self) -> Iterable[bytes]:
for line, lf in self.msg.iter_lines(self.CHUNK_SIZE):
if b'\0' in line:
raise BinarySuppressedError()
line = codec.decode(line, self.encoding)
yield codec.encode(line, self.output_encoding) + lf
line = smart_decode(line, self.encoding)
yield smart_encode(line, self.output_encoding) + lf
class PrettyStream(EncodedStream):
@ -190,9 +175,9 @@ class PrettyStream(EncodedStream):
if not isinstance(chunk, str):
# Text when a converter has been used,
# otherwise it will always be bytes.
chunk = codec.decode(chunk, self.encoding)
chunk = encoding.smart_decode(chunk, self.encoding)
chunk = self.formatting.format_body(content=chunk, mime=self.mime)
return codec.encode(chunk, self.output_encoding)
return encoding.smart_encode(chunk, self.output_encoding)
class BufferedPrettyStream(PrettyStream):

View File

@ -35,57 +35,3 @@ def parse_prefixed_json(data: str) -> Tuple[str, str]:
data_prefix = matches[0] if matches else ''
body = data[len(data_prefix):]
return data_prefix, body
def parse_header_content_type(line):
"""Parse a Content-Type like header.
Return the main Content-Type and a dictionary of options.
>>> parse_header_content_type('application/xml; charset=utf-8')
('application/xml', {'charset': 'utf-8'})
>>> parse_header_content_type('application/xml; charset = utf-8')
('application/xml', {'charset': 'utf-8'})
>>> parse_header_content_type('application/html+xml;ChArSeT="UTF-8"')
('application/html+xml', {'charset': 'UTF-8'})
>>> parse_header_content_type('application/xml')
('application/xml', {})
>>> parse_header_content_type(';charset=utf-8')
('', {'charset': 'utf-8'})
>>> parse_header_content_type('charset=utf-8')
('', {'charset': 'utf-8'})
>>> parse_header_content_type('multipart/mixed; boundary="gc0pJq0M:08jU534c0p"')
('multipart/mixed', {'boundary': 'gc0pJq0M:08jU534c0p'})
>>> parse_header_content_type('Message/Partial; number=3; total=3; id="oc=jpbe0M2Yt4s@foo.com"')
('Message/Partial', {'number': '3', 'total': '3', 'id': 'oc=jpbe0M2Yt4s@foo.com'})
"""
# Source: https://github.com/python/cpython/blob/bb3e0c2/Lib/cgi.py#L230
def _parseparam(s: str):
# Source: https://github.com/python/cpython/blob/bb3e0c2/Lib/cgi.py#L218
while s[:1] == ';':
s = s[1:]
end = s.find(';')
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(';', end + 1)
if end < 0:
end = len(s)
f = s[:end]
yield f.strip()
s = s[end:]
# Special case: 'key=value' only (without starting with ';').
if ';' not in line and '=' in line:
line = ';' + line
parts = _parseparam(';' + line)
key = parts.__next__()
pdict = {}
for p in parts:
i = p.find('=')
if i >= 0:
name = p[:i].strip().lower()
value = p[i + 1:].strip()
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
value = value.replace('\\\\', '\\').replace('\\"', '"')
pdict[name] = value
return key, pdict

View File

@ -5,7 +5,7 @@ from typing import IO, TextIO, Tuple, Type, Union
import requests
from ..context import Environment
from ..models import HTTPRequest, HTTPResponse
from ..models import HTTPRequest, HTTPResponse, HTTPMessage
from .processing import Conversion, Formatting
from .streams import (
BaseStream, BufferedPrettyStream, EncodedStream, PrettyStream, RawStream,
@ -97,16 +97,17 @@ def build_output_stream_for_message(
with_headers: bool,
with_body: bool,
):
stream_class, stream_kwargs = get_stream_type_and_kwargs(
env=env,
args=args,
)
message_class = {
message_type = {
requests.PreparedRequest: HTTPRequest,
requests.Response: HTTPResponse,
}[type(requests_message)]
stream_class, stream_kwargs = get_stream_type_and_kwargs(
env=env,
args=args,
message_type=message_type,
)
yield from stream_class(
msg=message_class(requests_message),
msg=message_type(requests_message),
with_headers=with_headers,
with_body=with_body,
**stream_kwargs,
@ -120,7 +121,8 @@ def build_output_stream_for_message(
def get_stream_type_and_kwargs(
env: Environment,
args: argparse.Namespace
args: argparse.Namespace,
message_type: Type[HTTPMessage],
) -> Tuple[Type['BaseStream'], dict]:
"""Pick the right stream type and kwargs for it based on `env` and `args`.
@ -138,8 +140,12 @@ def get_stream_type_and_kwargs(
stream_class = EncodedStream
stream_kwargs = {
'env': env,
'response_as': args.response_as,
}
if message_type is HTTPResponse:
stream_kwargs.update({
'mime_overwrite': args.response_mime,
'encoding_overwrite': args.response_charset,
})
if args.prettify:
stream_class = PrettyStream if args.stream else BufferedPrettyStream
stream_kwargs.update({

View File

@ -191,3 +191,21 @@ def _max_age_to_expires(cookies, now):
max_age = cookie.get('max-age')
if max_age and max_age.isdigit():
cookie['expires'] = now + float(max_age)
def parse_content_type_header(header):
"""Borrowed from requests."""
tokens = header.split(';')
content_type, params = tokens[0].strip(), tokens[1:]
params_dict = {}
items_to_strip = "\"' "
for param in params:
param = param.strip()
if param:
key, value = param, True
index_of_equals = param.find("=")
if index_of_equals != -1:
key = param[:index_of_equals].strip(items_to_strip)
value = param[index_of_equals + 1:].strip(items_to_strip)
params_dict[key.lower()] = value
return content_type, params_dict

View File

@ -1,7 +1,7 @@
"""Test data"""
from pathlib import Path
from httpie.constants import UTF8
from httpie.encoding import UTF8
def patharg(path):

View File

@ -4,7 +4,7 @@ import pytest
from _pytest.monkeypatch import MonkeyPatch
from httpie.compat import is_windows
from httpie.constants import UTF8
from httpie.encoding import UTF8
from httpie.config import (
Config, DEFAULT_CONFIG_DIRNAME, DEFAULT_RELATIVE_LEGACY_CONFIG_DIR,
DEFAULT_RELATIVE_XDG_CONFIG_HOME, DEFAULT_WINDOWS_CONFIG_DIR,

View File

@ -41,8 +41,19 @@ def test_max_headers_no_limit(httpbin_both):
assert HTTP_OK in http('--max-headers=0', httpbin_both + '/get')
def test_charset_argument_unknown_encoding(httpbin_both):
with raises(LookupError) as e:
http('--response-as', 'charset=foobar',
'GET', httpbin_both + '/get')
assert 'unknown encoding: foobar' in str(e.value)
def test_response_charset_option_unknown_encoding(httpbin_both):
r = http(
'--response-charset=foobar',
httpbin_both + '/get',
tolerate_error_exit_status=True
)
assert "'foobar' is not a supported encoding" in r.stderr
def test_response_mime_option_unknown_encoding(httpbin_both):
r = http(
'--response-mime=foobar',
httpbin_both + '/get',
tolerate_error_exit_status=True
)
assert "'foobar' doesnt look like a mime type" in r.stderr

View File

@ -9,7 +9,7 @@ import httpie.__main__
from .fixtures import FILE_CONTENT, FILE_PATH
from httpie.cli.exceptions import ParseError
from httpie.context import Environment
from httpie.constants import UTF8
from httpie.encoding import UTF8
from httpie.status import ExitStatus
from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http

View File

@ -11,8 +11,25 @@ from httpie.utils import JsonDictPreservingDuplicateKeys
from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH
from .utils import MockEnvironment, http, URL_EXAMPLE
TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}')
TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float
TEST_JSON_XXSI_PREFIXES = [
r")]}',\n", ")]}',",
'while(1);',
'for(;;)',
')',
']',
'}'
]
TEST_JSON_VALUES = [
# FIXME: missing int & float
{},
{'a': 0, 'b': 0},
[],
['a', 'b'],
'foo',
True,
False,
None
]
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'
JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}'
@ -37,15 +54,19 @@ JSON_WITH_DUPES_FORMATTED_UNSORTED = '''{
def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_data, pretty):
"""Test JSON bodies preceded by non-JSON data."""
body = data_prefix + json.dumps(json_data)
content_type = 'application/json'
responses.add(responses.GET, URL_EXAMPLE, body=body,
content_type=content_type)
content_type = 'application/json;charset=utf8'
responses.add(
responses.GET,
URL_EXAMPLE,
body=body,
content_type=content_type
)
colored_output = pretty in ('all', 'colors')
colored_output = pretty in {'all', 'colors'}
env = MockEnvironment(colors=256) if colored_output else None
r = http('--pretty=' + pretty, URL_EXAMPLE, env=env)
r = http('--pretty', pretty, URL_EXAMPLE, env=env)
indent = None if pretty in ('none', 'colors') else 4
indent = None if pretty in {'none', 'colors'} else 4
expected_body = data_prefix + json.dumps(json_data, indent=indent)
if colored_output:
fmt = ColorFormatter(env, format_options={'json': {'format': True, 'indent': 4}})

View File

@ -15,7 +15,7 @@ from httpie.cli.argtypes import (
parse_format_options,
)
from httpie.cli.definition import parser
from httpie.constants import UTF8
from httpie.encoding import UTF8
from httpie.output.formatters.colors import get_lexer
from httpie.status import ExitStatus
from .utils import COLOR, CRLF, HTTP_OK, MockEnvironment, http

View File

@ -7,7 +7,7 @@ from unittest import mock
import pytest
from .fixtures import FILE_PATH_ARG, UNICODE
from httpie.constants import UTF8
from httpie.encoding import UTF8
from httpie.plugins import AuthPlugin
from httpie.plugins.builtin import HTTPBasicAuth
from httpie.plugins.registry import plugin_manager

View File

@ -4,14 +4,19 @@ Various unicode handling related tests.
"""
import pytest
import responses
from charset_normalizer.constant import TOO_SMALL_SEQUENCE
from httpie.cli.constants import PRETTY_MAP
from httpie.constants import UTF8
from httpie.encoding import UTF8
from .utils import http, HTTP_OK, URL_EXAMPLE
from .utils import http, HTTP_OK, URL_EXAMPLE, MockEnvironment, StdinBytesIO
from .fixtures import UNICODE
ENCODINGS = [UTF8, 'windows-1250']
CZECH_TEXT = 'Všichni lidé jsou si rovni. Všichni lidé jsou si rovni.'
assert len(CZECH_TEXT) > TOO_SMALL_SEQUENCE
CZECH_TEXT_SPECIFIC_CHARSET = 'windows-1250'
ENCODINGS = [UTF8, CZECH_TEXT_SPECIFIC_CHARSET]
def test_unicode_headers(httpbin):
@ -122,24 +127,28 @@ def test_unicode_digest_auth(httpbin):
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_GET_encoding_detection_from_content_type_header(encoding):
responses.add(responses.GET,
URL_EXAMPLE,
body='<?xml version="1.0"?>\n<c>Financiën</c>'.encode(encoding),
content_type=f'text/xml; charset={encoding.upper()}')
responses.add(
responses.GET,
URL_EXAMPLE,
body=f'<?xml version="1.0"?>\n<c>{CZECH_TEXT}</c>'.encode(encoding),
content_type=f'text/xml; charset={encoding.upper()}'
)
r = http('GET', URL_EXAMPLE)
assert 'Financiën' in r
assert CZECH_TEXT in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_GET_encoding_detection_from_content(encoding):
body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>Financiën</c>'
responses.add(responses.GET,
URL_EXAMPLE,
body=body.encode(encoding),
content_type='text/xml')
r = http('GET', URL_EXAMPLE)
assert 'Financiën' in r
def test_encoding_detection_from_content(encoding):
body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>{CZECH_TEXT}</c>'
responses.add(
responses.GET,
URL_EXAMPLE,
body=body.encode(encoding),
content_type='text/xml'
)
r = http(URL_EXAMPLE)
assert CZECH_TEXT in r
@pytest.mark.parametrize('pretty', PRETTY_MAP.keys())
@ -149,40 +158,45 @@ def test_GET_encoding_provided_by_option(pretty):
URL_EXAMPLE,
body='卷首'.encode('big5'),
content_type='text/plain; charset=utf-8')
args = ('--pretty=' + pretty, 'GET', URL_EXAMPLE)
args = ('--pretty', pretty, URL_EXAMPLE)
# Encoding provided by Content-Type is incorrect, thus it should print something unreadable.
r = http(*args)
assert '卷首' not in r
# Specifying the correct encoding, both in short & long versions, should fix it.
r = http('--response-as', 'charset=big5', *args)
assert '卷首' in r
r = http('--response-as', 'text/plain; charset=big5', *args)
r = http('--response-charset=big5', *args)
assert '卷首' in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_GET_encoding_provided_by_empty_option_should_use_content_detection(encoding):
body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>Financiën</c>'
responses.add(responses.GET,
URL_EXAMPLE,
body=body.encode(encoding),
content_type='text/xml')
r = http('--response-as', '', 'GET', URL_EXAMPLE)
assert 'Financiën' in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_POST_encoding_detection_from_content_type_header(encoding):
responses.add(responses.POST,
URL_EXAMPLE,
body='Všichni lidé jsou si rovni.'.encode(encoding),
content_type=f'text/plain; charset={encoding.upper()}')
def test_encoding_detection_from_content_type_header(encoding):
responses.add(
responses.POST,
URL_EXAMPLE,
body=CZECH_TEXT.encode(encoding),
content_type=f'text/plain; charset={encoding.upper()}'
)
r = http('--form', 'POST', URL_EXAMPLE)
assert 'Všichni lidé jsou si rovni.' in r
assert CZECH_TEXT in r
@pytest.mark.parametrize('encoding', ENCODINGS)
def test_request_body_content_type_charset_used(encoding):
body_str = CZECH_TEXT
body_bytes = body_str.encode(encoding)
if encoding != UTF8:
with pytest.raises(UnicodeDecodeError):
assert body_str != body_bytes.decode()
r = http(
'--offline',
URL_EXAMPLE,
f'Content-Type: text/plain; charset={encoding.upper()}',
env=MockEnvironment(
stdin=StdinBytesIO(body_bytes),
stdin_isatty=False,
)
)
assert body_str in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@ -190,10 +204,10 @@ def test_POST_encoding_detection_from_content_type_header(encoding):
def test_POST_encoding_detection_from_content(encoding):
responses.add(responses.POST,
URL_EXAMPLE,
body='Všichni lidé jsou si rovni.'.encode(encoding),
body=CZECH_TEXT.encode(encoding),
content_type='text/plain')
r = http('--form', 'POST', URL_EXAMPLE)
assert 'Všichni lidé jsou si rovni.' in r
assert CZECH_TEXT in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@ -202,8 +216,8 @@ def test_POST_encoding_detection_from_content(encoding):
def test_stream_encoding_detection_from_content_type_header(encoding, pretty):
responses.add(responses.GET,
URL_EXAMPLE,
body='<?xml version="1.0"?>\n<c>Financiën</c>'.encode(encoding),
body=f'<?xml version="1.0"?>\n<c>{CZECH_TEXT}</c>'.encode(encoding),
stream=True,
content_type=f'text/xml; charset={encoding.upper()}')
r = http('--pretty=' + pretty, '--stream', 'GET', URL_EXAMPLE)
assert 'Financiën' in r
assert CZECH_TEXT in r

View File

@ -3,7 +3,7 @@ import sys
import pytest
import responses
from httpie.constants import UTF8
from httpie.encoding import UTF8
from httpie.output.formatters.xml import parse_xml, pretty_xml
from .fixtures import XML_FILES_PATH, XML_FILES_VALID, XML_FILES_INVALID
@ -93,7 +93,7 @@ def test_content_type_from_option():
"""
responses.add(responses.GET, URL_EXAMPLE, body=XML_DATA_RAW,
content_type='text/plain')
args = ('--response-as', 'application/xml', URL_EXAMPLE)
args = ('--response-mime', 'application/xml', URL_EXAMPLE)
# Ensure the option is taken into account only for responses.
# Request
@ -114,5 +114,5 @@ def test_content_type_from_option_incomplete():
content_type='text/plain')
# The provided Content-Type is simply ignored, and so no formatting is done.
r = http('--response-as', 'charset=utf-8', URL_EXAMPLE)
r = http('--response-charset', 'utf-8', URL_EXAMPLE)
assert XML_DATA_RAW in r