Encoding refactoring

* split --response-as into --response-mime and --response-charset
* add support for Content-Type charset for requests printed to terminal
* add support charset detection for requests printed to terminal without a Content-Type charset
* etc.
This commit is contained in:
Jakub Roztocil 2021-10-04 20:43:34 +02:00
parent 0b5f4d6b1c
commit 491188d9d4
25 changed files with 345 additions and 254 deletions

View File

@ -1179,6 +1179,8 @@ HTTPie does several things by default in order to make its terminal output easy
### Colors and formatting ### Colors and formatting
TODO: mention body colors/formatting are based on content-type + --response-mime (heuristics for JSON content-type)
Syntax highlighting is applied to HTTP headers and bodies (where it makes sense). Syntax highlighting is applied to HTTP headers and bodies (where it makes sense).
You can choose your preferred color scheme via the `--style` option if you dont like the default one. You can choose your preferred color scheme via the `--style` option if you dont like the default one.
There are dozens of styles available, here are just a few notable ones: There are dozens of styles available, here are just a few notable ones:
@ -1259,26 +1261,6 @@ $ http --response-as='text/plain; charset=big5' pie.dev/get
Given the encoding is not sent by the server, HTTPie will auto-detect it. Given the encoding is not sent by the server, HTTPie will auto-detect it.
### Binary data
Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
Binary data is also suppressed in redirected but prettified output.
The connection is closed as soon as we know that the response body is binary,
```bash
$ http pie.dev/bytes/2000
```
You will nearly instantly see something like this:
```http
HTTP/1.1 200 OK
Content-Type: application/octet-stream
+-----------------------------------------+
| NOTE: binary data not shown in terminal |
+-----------------------------------------+
```
### Redirected output ### Redirected output
@ -1320,6 +1302,36 @@ function httpless {
http --pretty=all --print=hb "$@" | less -R; http --pretty=all --print=hb "$@" | less -R;
} }
``` ```
### Binary data
Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
Binary data is also suppressed in redirected but prettified output.
The connection is closed as soon as we know that the response body is binary,
```bash
$ http pie.dev/bytes/2000
```
You will nearly instantly see something like this:
```http
HTTP/1.1 200 OK
Content-Type: application/octet-stream
+-----------------------------------------+
| NOTE: binary data not shown in terminal |
+-----------------------------------------+
```
### Display encoding
TODO:
(both request/response)
* we look at content-type
* else we detect
* short texts default to utf8
(only response)
* --response-charset allows overwriting
## Download mode ## Download mode

View File

@ -242,3 +242,19 @@ PARSED_DEFAULT_FORMAT_OPTIONS = parse_format_options(
s=','.join(DEFAULT_FORMAT_OPTIONS), s=','.join(DEFAULT_FORMAT_OPTIONS),
defaults=None, defaults=None,
) )
def response_charset_type(encoding: str) -> str:
try:
''.encode(encoding)
except LookupError:
raise argparse.ArgumentTypeError(
f'{encoding!r} is not a supported encoding')
return encoding
def response_mime_type(mime_type: str) -> str:
if mime_type.count('/') != 1:
raise argparse.ArgumentTypeError(
f'{mime_type!r} doesnt look like a mime type; use type/subtype')
return mime_type

View File

@ -9,7 +9,7 @@ from .. import __doc__, __version__
from .argparser import HTTPieArgumentParser from .argparser import HTTPieArgumentParser
from .argtypes import ( from .argtypes import (
KeyValueArgType, SessionNameValidator, KeyValueArgType, SessionNameValidator,
readable_file_arg, readable_file_arg, response_charset_type, response_mime_type,
) )
from .constants import ( from .constants import (
DEFAULT_FORMAT_OPTIONS, OUTPUT_OPTIONS, DEFAULT_FORMAT_OPTIONS, OUTPUT_OPTIONS,
@ -310,18 +310,28 @@ output_processing.add_argument(
) )
output_processing.add_argument( output_processing.add_argument(
'--response-as', '--response-charset',
metavar='CONTENT_TYPE', metavar='ENCODING',
type=response_charset_type,
help=''' help='''
Override the response Content-Type for display purposes, e.g.: Override the response encoding for terminal display purposes, e.g.:
--response-charset=utf8
--response-as=application/xml --response-charset=big5
--response-as=charset=utf-8
--response-as='application/xml; charset=utf-8'
''' '''
) )
output_processing.add_argument(
'--response-mime',
metavar='MIME_TYPE',
type=response_mime_type,
help='''
Override the response mime type for coloring and formatting for the terminal, e.g.:
--response-mime=application/json
--response-mime=text/xml
'''
)
output_processing.add_argument( output_processing.add_argument(
'--format-options', '--format-options',

View File

@ -12,7 +12,7 @@ import requests
import urllib3 import urllib3
from . import __version__ from . import __version__
from .cli.dicts import RequestHeadersDict from .cli.dicts import RequestHeadersDict
from .constants import UTF8 from .encoding import UTF8
from .plugins.registry import plugin_manager from .plugins.registry import plugin_manager
from .sessions import get_httpie_session from .sessions import get_httpie_session
from .ssl import AVAILABLE_SSL_VERSION_ARG_MAPPING, HTTPieHTTPSAdapter from .ssl import AVAILABLE_SSL_VERSION_ARG_MAPPING, HTTPieHTTPSAdapter

View File

@ -1,37 +0,0 @@
from typing import Union
from charset_normalizer import from_bytes
from .constants import UTF8
Bytes = Union[bytearray, bytes]
def detect_encoding(content: Bytes) -> str:
"""Detect the `content` encoding.
Fallback to UTF-8 when no suitable encoding found.
"""
match = from_bytes(bytes(content)).best()
return match.encoding if match else UTF8
def decode(content: Bytes, encoding: str) -> str:
"""Decode `content` using the given `encoding`.
If no `encoding` is provided, the best effort is to guess it from `content`.
Unicode errors are replaced.
"""
if not encoding:
encoding = detect_encoding(content)
return content.decode(encoding, 'replace')
def encode(content: str, encoding: str) -> bytes:
"""Encode `content` using the given `encoding`.
Unicode errors are replaced.
"""
return content.encode(encoding, 'replace')

View File

@ -2,3 +2,53 @@ import sys
is_windows = 'win32' in str(sys.platform).lower() is_windows = 'win32' in str(sys.platform).lower()
try:
from functools import cached_property
except ImportError:
# Can be removed once we drop Pyth on <3.8 support
# Taken from: `django.utils.functional.cached_property`
class cached_property:
"""
Decorator that converts a method with a single self argument into a
property cached on the instance.
A cached property can be made out of an existing method:
(e.g. ``url = cached_property(get_absolute_url)``).
The optional ``name`` argument is obsolete as of Python 3.6 and will be
deprecated in Django 4.0 (#30127).
"""
name = None
@staticmethod
def func(instance):
raise TypeError(
'Cannot use cached_property instance without calling '
'__set_name__() on it.'
)
def __init__(self, func, name=None):
self.real_func = func
self.__doc__ = getattr(func, '__doc__')
def __set_name__(self, owner, name):
if self.name is None:
self.name = name
self.func = self.real_func
elif name != self.name:
raise TypeError(
"Cannot assign the same cached_property to two different names "
"(%r and %r)." % (self.name, name)
)
def __get__(self, instance, cls=None):
"""
Call the function and put the return value in instance.__dict__ so that
subsequent attribute access on the instance returns the cached value
instead of calling cached_property.__get__().
"""
if instance is None:
return self
res = instance.__dict__[self.name] = self.func(instance)
return res

View File

@ -5,7 +5,7 @@ from typing import Union
from . import __version__ from . import __version__
from .compat import is_windows from .compat import is_windows
from .constants import UTF8 from .encoding import UTF8
ENV_XDG_CONFIG_HOME = 'XDG_CONFIG_HOME' ENV_XDG_CONFIG_HOME = 'XDG_CONFIG_HOME'

View File

@ -1,2 +0,0 @@
# UTF-8 encoding name
UTF8 = 'utf-8'

View File

@ -11,7 +11,7 @@ except ImportError:
from .compat import is_windows from .compat import is_windows
from .config import DEFAULT_CONFIG_DIR, Config, ConfigFileError from .config import DEFAULT_CONFIG_DIR, Config, ConfigFileError
from .constants import UTF8 from .encoding import UTF8
from .utils import repr_dict from .utils import repr_dict

50
httpie/encoding.py Normal file
View File

@ -0,0 +1,50 @@
from typing import Union
from charset_normalizer import from_bytes
from charset_normalizer.constant import TOO_SMALL_SEQUENCE
UTF8 = 'utf-8'
ContentBytes = Union[bytearray, bytes]
def detect_encoding(content: ContentBytes) -> str:
"""
We default to utf8 if text too short, because the detection
can return a random encoding leading to confusing results:
>>> too_short = ']"foo"'
>>> detected = from_bytes(too_short.encode()).best().encoding
>>> detected
'utf_16_be'
>>> too_short.encode().decode(detected)
'崢景漢'
"""
encoding = UTF8
if len(content) > TOO_SMALL_SEQUENCE:
match = from_bytes(bytes(content)).best()
if match:
encoding = match.encoding
return encoding
def smart_decode(content: ContentBytes, encoding: str) -> str:
"""Decode `content` using the given `encoding`.
If no `encoding` is provided, the best effort is to guess it from `content`.
Unicode errors are replaced.
"""
if not encoding:
encoding = detect_encoding(content)
return content.decode(encoding, 'replace')
def smart_encode(content: str, encoding: str) -> bytes:
"""Encode `content` using the given `encoding`.
Unicode errors are replaced.
"""
return content.encode(encoding, 'replace')

View File

@ -1,34 +1,33 @@
from abc import ABCMeta, abstractmethod from typing import Iterable
from typing import Iterable, Optional
from urllib.parse import urlsplit from urllib.parse import urlsplit
from .constants import UTF8 from .utils import split_cookies, parse_content_type_header
from .utils import split_cookies from .compat import cached_property
class HTTPMessage(metaclass=ABCMeta): class HTTPMessage:
"""Abstract class for HTTP messages.""" """Abstract class for HTTP messages."""
def __init__(self, orig): def __init__(self, orig):
self._orig = orig self._orig = orig
@abstractmethod
def iter_body(self, chunk_size: int) -> Iterable[bytes]: def iter_body(self, chunk_size: int) -> Iterable[bytes]:
"""Return an iterator over the body.""" """Return an iterator over the body."""
raise NotImplementedError
@abstractmethod
def iter_lines(self, chunk_size: int) -> Iterable[bytes]: def iter_lines(self, chunk_size: int) -> Iterable[bytes]:
"""Return an iterator over the body yielding (`line`, `line_feed`).""" """Return an iterator over the body yielding (`line`, `line_feed`)."""
raise NotImplementedError
@property @property
@abstractmethod
def headers(self) -> str: def headers(self) -> str:
"""Return a `str` with the message's headers.""" """Return a `str` with the message's headers."""
raise NotImplementedError
@property @cached_property
@abstractmethod def encoding(self) -> str:
def encoding(self) -> Optional[str]: ct, params = parse_content_type_header(self.content_type)
"""Return a `str` with the message's encoding, if known.""" return params.get('charset', '')
@property @property
def content_type(self) -> str: def content_type(self) -> str:
@ -77,10 +76,6 @@ class HTTPResponse(HTTPMessage):
) )
return '\r\n'.join(headers) return '\r\n'.join(headers)
@property
def encoding(self):
return self._orig.encoding or UTF8
class HTTPRequest(HTTPMessage): class HTTPRequest(HTTPMessage):
"""A :class:`requests.models.Request` wrapper.""" """A :class:`requests.models.Request` wrapper."""
@ -114,10 +109,6 @@ class HTTPRequest(HTTPMessage):
headers = '\r\n'.join(headers).strip() headers = '\r\n'.join(headers).strip()
return headers return headers
@property
def encoding(self):
return UTF8
@property @property
def body(self): def body(self):
body = self._orig.body body = self._orig.body

View File

@ -1,7 +1,7 @@
import sys import sys
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
from ...constants import UTF8 from ...encoding import UTF8
from ...plugins import FormatterPlugin from ...plugins import FormatterPlugin
if TYPE_CHECKING: if TYPE_CHECKING:

View File

@ -1,13 +1,13 @@
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from itertools import chain from itertools import chain
from typing import Callable, Dict, Iterable, Tuple, Union from typing import Callable, Iterable, Union
from .. import codec
from ..context import Environment
from ..constants import UTF8
from ..models import HTTPMessage, HTTPRequest
from .processing import Conversion, Formatting from .processing import Conversion, Formatting
from .utils import parse_header_content_type from .. import encoding
from ..context import Environment
from ..encoding import smart_decode, smart_encode, UTF8
from ..models import HTTPMessage
BINARY_SUPPRESSED_NOTICE = ( BINARY_SUPPRESSED_NOTICE = (
b'\n' b'\n'
@ -99,11 +99,16 @@ class EncodedStream(BaseStream):
""" """
CHUNK_SIZE = 1 CHUNK_SIZE = 1
def __init__(self, env=Environment(), response_as: str = None, **kwargs): def __init__(
self,
env=Environment(),
mime_overwrite: str = None,
encoding_overwrite: str = None,
**kwargs
):
super().__init__(**kwargs) super().__init__(**kwargs)
self.response_as = response_as self.mime = mime_overwrite or self.msg.content_type
self.mime, self.encoding = self._get_mime_and_encoding() self.encoding = encoding_overwrite or self.msg.encoding
if env.stdout_isatty: if env.stdout_isatty:
# Use the encoding supported by the terminal. # Use the encoding supported by the terminal.
output_encoding = env.stdout_encoding output_encoding = env.stdout_encoding
@ -113,32 +118,12 @@ class EncodedStream(BaseStream):
# Default to UTF-8 when unsure. # Default to UTF-8 when unsure.
self.output_encoding = output_encoding or UTF8 self.output_encoding = output_encoding or UTF8
def _get_mime_and_encoding(self) -> Tuple[str, Dict[str, str]]:
"""Parse `Content-Type` header or `--response-as` value to guess
correct mime type and encoding.
"""
# Defaults from the `Content-Type` header.
mime, options = parse_header_content_type(self.msg.content_type)
if isinstance(self.msg, HTTPRequest):
encoding = self.msg.encoding
elif self.response_as is None:
encoding = options.get('charset')
else:
# Override from the `--response-as` option.
forced_mime, forced_options = parse_header_content_type(self.response_as)
mime = forced_mime or mime
encoding = forced_options.get('charset') or options.get('charset')
return mime, encoding or ''
def iter_body(self) -> Iterable[bytes]: def iter_body(self) -> Iterable[bytes]:
for line, lf in self.msg.iter_lines(self.CHUNK_SIZE): for line, lf in self.msg.iter_lines(self.CHUNK_SIZE):
if b'\0' in line: if b'\0' in line:
raise BinarySuppressedError() raise BinarySuppressedError()
line = codec.decode(line, self.encoding) line = smart_decode(line, self.encoding)
yield codec.encode(line, self.output_encoding) + lf yield smart_encode(line, self.output_encoding) + lf
class PrettyStream(EncodedStream): class PrettyStream(EncodedStream):
@ -190,9 +175,9 @@ class PrettyStream(EncodedStream):
if not isinstance(chunk, str): if not isinstance(chunk, str):
# Text when a converter has been used, # Text when a converter has been used,
# otherwise it will always be bytes. # otherwise it will always be bytes.
chunk = codec.decode(chunk, self.encoding) chunk = encoding.smart_decode(chunk, self.encoding)
chunk = self.formatting.format_body(content=chunk, mime=self.mime) chunk = self.formatting.format_body(content=chunk, mime=self.mime)
return codec.encode(chunk, self.output_encoding) return encoding.smart_encode(chunk, self.output_encoding)
class BufferedPrettyStream(PrettyStream): class BufferedPrettyStream(PrettyStream):

View File

@ -35,57 +35,3 @@ def parse_prefixed_json(data: str) -> Tuple[str, str]:
data_prefix = matches[0] if matches else '' data_prefix = matches[0] if matches else ''
body = data[len(data_prefix):] body = data[len(data_prefix):]
return data_prefix, body return data_prefix, body
def parse_header_content_type(line):
"""Parse a Content-Type like header.
Return the main Content-Type and a dictionary of options.
>>> parse_header_content_type('application/xml; charset=utf-8')
('application/xml', {'charset': 'utf-8'})
>>> parse_header_content_type('application/xml; charset = utf-8')
('application/xml', {'charset': 'utf-8'})
>>> parse_header_content_type('application/html+xml;ChArSeT="UTF-8"')
('application/html+xml', {'charset': 'UTF-8'})
>>> parse_header_content_type('application/xml')
('application/xml', {})
>>> parse_header_content_type(';charset=utf-8')
('', {'charset': 'utf-8'})
>>> parse_header_content_type('charset=utf-8')
('', {'charset': 'utf-8'})
>>> parse_header_content_type('multipart/mixed; boundary="gc0pJq0M:08jU534c0p"')
('multipart/mixed', {'boundary': 'gc0pJq0M:08jU534c0p'})
>>> parse_header_content_type('Message/Partial; number=3; total=3; id="oc=jpbe0M2Yt4s@foo.com"')
('Message/Partial', {'number': '3', 'total': '3', 'id': 'oc=jpbe0M2Yt4s@foo.com'})
"""
# Source: https://github.com/python/cpython/blob/bb3e0c2/Lib/cgi.py#L230
def _parseparam(s: str):
# Source: https://github.com/python/cpython/blob/bb3e0c2/Lib/cgi.py#L218
while s[:1] == ';':
s = s[1:]
end = s.find(';')
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(';', end + 1)
if end < 0:
end = len(s)
f = s[:end]
yield f.strip()
s = s[end:]
# Special case: 'key=value' only (without starting with ';').
if ';' not in line and '=' in line:
line = ';' + line
parts = _parseparam(';' + line)
key = parts.__next__()
pdict = {}
for p in parts:
i = p.find('=')
if i >= 0:
name = p[:i].strip().lower()
value = p[i + 1:].strip()
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
value = value.replace('\\\\', '\\').replace('\\"', '"')
pdict[name] = value
return key, pdict

View File

@ -5,7 +5,7 @@ from typing import IO, TextIO, Tuple, Type, Union
import requests import requests
from ..context import Environment from ..context import Environment
from ..models import HTTPRequest, HTTPResponse from ..models import HTTPRequest, HTTPResponse, HTTPMessage
from .processing import Conversion, Formatting from .processing import Conversion, Formatting
from .streams import ( from .streams import (
BaseStream, BufferedPrettyStream, EncodedStream, PrettyStream, RawStream, BaseStream, BufferedPrettyStream, EncodedStream, PrettyStream, RawStream,
@ -97,16 +97,17 @@ def build_output_stream_for_message(
with_headers: bool, with_headers: bool,
with_body: bool, with_body: bool,
): ):
stream_class, stream_kwargs = get_stream_type_and_kwargs( message_type = {
env=env,
args=args,
)
message_class = {
requests.PreparedRequest: HTTPRequest, requests.PreparedRequest: HTTPRequest,
requests.Response: HTTPResponse, requests.Response: HTTPResponse,
}[type(requests_message)] }[type(requests_message)]
stream_class, stream_kwargs = get_stream_type_and_kwargs(
env=env,
args=args,
message_type=message_type,
)
yield from stream_class( yield from stream_class(
msg=message_class(requests_message), msg=message_type(requests_message),
with_headers=with_headers, with_headers=with_headers,
with_body=with_body, with_body=with_body,
**stream_kwargs, **stream_kwargs,
@ -120,7 +121,8 @@ def build_output_stream_for_message(
def get_stream_type_and_kwargs( def get_stream_type_and_kwargs(
env: Environment, env: Environment,
args: argparse.Namespace args: argparse.Namespace,
message_type: Type[HTTPMessage],
) -> Tuple[Type['BaseStream'], dict]: ) -> Tuple[Type['BaseStream'], dict]:
"""Pick the right stream type and kwargs for it based on `env` and `args`. """Pick the right stream type and kwargs for it based on `env` and `args`.
@ -138,8 +140,12 @@ def get_stream_type_and_kwargs(
stream_class = EncodedStream stream_class = EncodedStream
stream_kwargs = { stream_kwargs = {
'env': env, 'env': env,
'response_as': args.response_as,
} }
if message_type is HTTPResponse:
stream_kwargs.update({
'mime_overwrite': args.response_mime,
'encoding_overwrite': args.response_charset,
})
if args.prettify: if args.prettify:
stream_class = PrettyStream if args.stream else BufferedPrettyStream stream_class = PrettyStream if args.stream else BufferedPrettyStream
stream_kwargs.update({ stream_kwargs.update({

View File

@ -191,3 +191,21 @@ def _max_age_to_expires(cookies, now):
max_age = cookie.get('max-age') max_age = cookie.get('max-age')
if max_age and max_age.isdigit(): if max_age and max_age.isdigit():
cookie['expires'] = now + float(max_age) cookie['expires'] = now + float(max_age)
def parse_content_type_header(header):
"""Borrowed from requests."""
tokens = header.split(';')
content_type, params = tokens[0].strip(), tokens[1:]
params_dict = {}
items_to_strip = "\"' "
for param in params:
param = param.strip()
if param:
key, value = param, True
index_of_equals = param.find("=")
if index_of_equals != -1:
key = param[:index_of_equals].strip(items_to_strip)
value = param[index_of_equals + 1:].strip(items_to_strip)
params_dict[key.lower()] = value
return content_type, params_dict

View File

@ -1,7 +1,7 @@
"""Test data""" """Test data"""
from pathlib import Path from pathlib import Path
from httpie.constants import UTF8 from httpie.encoding import UTF8
def patharg(path): def patharg(path):

View File

@ -4,7 +4,7 @@ import pytest
from _pytest.monkeypatch import MonkeyPatch from _pytest.monkeypatch import MonkeyPatch
from httpie.compat import is_windows from httpie.compat import is_windows
from httpie.constants import UTF8 from httpie.encoding import UTF8
from httpie.config import ( from httpie.config import (
Config, DEFAULT_CONFIG_DIRNAME, DEFAULT_RELATIVE_LEGACY_CONFIG_DIR, Config, DEFAULT_CONFIG_DIRNAME, DEFAULT_RELATIVE_LEGACY_CONFIG_DIR,
DEFAULT_RELATIVE_XDG_CONFIG_HOME, DEFAULT_WINDOWS_CONFIG_DIR, DEFAULT_RELATIVE_XDG_CONFIG_HOME, DEFAULT_WINDOWS_CONFIG_DIR,

View File

@ -41,8 +41,19 @@ def test_max_headers_no_limit(httpbin_both):
assert HTTP_OK in http('--max-headers=0', httpbin_both + '/get') assert HTTP_OK in http('--max-headers=0', httpbin_both + '/get')
def test_charset_argument_unknown_encoding(httpbin_both): def test_response_charset_option_unknown_encoding(httpbin_both):
with raises(LookupError) as e: r = http(
http('--response-as', 'charset=foobar', '--response-charset=foobar',
'GET', httpbin_both + '/get') httpbin_both + '/get',
assert 'unknown encoding: foobar' in str(e.value) tolerate_error_exit_status=True
)
assert "'foobar' is not a supported encoding" in r.stderr
def test_response_mime_option_unknown_encoding(httpbin_both):
r = http(
'--response-mime=foobar',
httpbin_both + '/get',
tolerate_error_exit_status=True
)
assert "'foobar' doesnt look like a mime type" in r.stderr

View File

@ -9,7 +9,7 @@ import httpie.__main__
from .fixtures import FILE_CONTENT, FILE_PATH from .fixtures import FILE_CONTENT, FILE_PATH
from httpie.cli.exceptions import ParseError from httpie.cli.exceptions import ParseError
from httpie.context import Environment from httpie.context import Environment
from httpie.constants import UTF8 from httpie.encoding import UTF8
from httpie.status import ExitStatus from httpie.status import ExitStatus
from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http

View File

@ -11,8 +11,25 @@ from httpie.utils import JsonDictPreservingDuplicateKeys
from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH
from .utils import MockEnvironment, http, URL_EXAMPLE from .utils import MockEnvironment, http, URL_EXAMPLE
TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}') TEST_JSON_XXSI_PREFIXES = [
TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float r")]}',\n", ")]}',",
'while(1);',
'for(;;)',
')',
']',
'}'
]
TEST_JSON_VALUES = [
# FIXME: missing int & float
{},
{'a': 0, 'b': 0},
[],
['a', 'b'],
'foo',
True,
False,
None
]
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m' TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'
JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}' JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}'
@ -37,15 +54,19 @@ JSON_WITH_DUPES_FORMATTED_UNSORTED = '''{
def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_data, pretty): def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_data, pretty):
"""Test JSON bodies preceded by non-JSON data.""" """Test JSON bodies preceded by non-JSON data."""
body = data_prefix + json.dumps(json_data) body = data_prefix + json.dumps(json_data)
content_type = 'application/json' content_type = 'application/json;charset=utf8'
responses.add(responses.GET, URL_EXAMPLE, body=body, responses.add(
content_type=content_type) responses.GET,
URL_EXAMPLE,
body=body,
content_type=content_type
)
colored_output = pretty in ('all', 'colors') colored_output = pretty in {'all', 'colors'}
env = MockEnvironment(colors=256) if colored_output else None env = MockEnvironment(colors=256) if colored_output else None
r = http('--pretty=' + pretty, URL_EXAMPLE, env=env) r = http('--pretty', pretty, URL_EXAMPLE, env=env)
indent = None if pretty in ('none', 'colors') else 4 indent = None if pretty in {'none', 'colors'} else 4
expected_body = data_prefix + json.dumps(json_data, indent=indent) expected_body = data_prefix + json.dumps(json_data, indent=indent)
if colored_output: if colored_output:
fmt = ColorFormatter(env, format_options={'json': {'format': True, 'indent': 4}}) fmt = ColorFormatter(env, format_options={'json': {'format': True, 'indent': 4}})

View File

@ -15,7 +15,7 @@ from httpie.cli.argtypes import (
parse_format_options, parse_format_options,
) )
from httpie.cli.definition import parser from httpie.cli.definition import parser
from httpie.constants import UTF8 from httpie.encoding import UTF8
from httpie.output.formatters.colors import get_lexer from httpie.output.formatters.colors import get_lexer
from httpie.status import ExitStatus from httpie.status import ExitStatus
from .utils import COLOR, CRLF, HTTP_OK, MockEnvironment, http from .utils import COLOR, CRLF, HTTP_OK, MockEnvironment, http

View File

@ -7,7 +7,7 @@ from unittest import mock
import pytest import pytest
from .fixtures import FILE_PATH_ARG, UNICODE from .fixtures import FILE_PATH_ARG, UNICODE
from httpie.constants import UTF8 from httpie.encoding import UTF8
from httpie.plugins import AuthPlugin from httpie.plugins import AuthPlugin
from httpie.plugins.builtin import HTTPBasicAuth from httpie.plugins.builtin import HTTPBasicAuth
from httpie.plugins.registry import plugin_manager from httpie.plugins.registry import plugin_manager

View File

@ -4,14 +4,19 @@ Various unicode handling related tests.
""" """
import pytest import pytest
import responses import responses
from charset_normalizer.constant import TOO_SMALL_SEQUENCE
from httpie.cli.constants import PRETTY_MAP from httpie.cli.constants import PRETTY_MAP
from httpie.constants import UTF8 from httpie.encoding import UTF8
from .utils import http, HTTP_OK, URL_EXAMPLE from .utils import http, HTTP_OK, URL_EXAMPLE, MockEnvironment, StdinBytesIO
from .fixtures import UNICODE from .fixtures import UNICODE
ENCODINGS = [UTF8, 'windows-1250']
CZECH_TEXT = 'Všichni lidé jsou si rovni. Všichni lidé jsou si rovni.'
assert len(CZECH_TEXT) > TOO_SMALL_SEQUENCE
CZECH_TEXT_SPECIFIC_CHARSET = 'windows-1250'
ENCODINGS = [UTF8, CZECH_TEXT_SPECIFIC_CHARSET]
def test_unicode_headers(httpbin): def test_unicode_headers(httpbin):
@ -122,24 +127,28 @@ def test_unicode_digest_auth(httpbin):
@pytest.mark.parametrize('encoding', ENCODINGS) @pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate @responses.activate
def test_GET_encoding_detection_from_content_type_header(encoding): def test_GET_encoding_detection_from_content_type_header(encoding):
responses.add(responses.GET, responses.add(
responses.GET,
URL_EXAMPLE, URL_EXAMPLE,
body='<?xml version="1.0"?>\n<c>Financiën</c>'.encode(encoding), body=f'<?xml version="1.0"?>\n<c>{CZECH_TEXT}</c>'.encode(encoding),
content_type=f'text/xml; charset={encoding.upper()}') content_type=f'text/xml; charset={encoding.upper()}'
)
r = http('GET', URL_EXAMPLE) r = http('GET', URL_EXAMPLE)
assert 'Financiën' in r assert CZECH_TEXT in r
@pytest.mark.parametrize('encoding', ENCODINGS) @pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate @responses.activate
def test_GET_encoding_detection_from_content(encoding): def test_encoding_detection_from_content(encoding):
body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>Financiën</c>' body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>{CZECH_TEXT}</c>'
responses.add(responses.GET, responses.add(
responses.GET,
URL_EXAMPLE, URL_EXAMPLE,
body=body.encode(encoding), body=body.encode(encoding),
content_type='text/xml') content_type='text/xml'
r = http('GET', URL_EXAMPLE) )
assert 'Financiën' in r r = http(URL_EXAMPLE)
assert CZECH_TEXT in r
@pytest.mark.parametrize('pretty', PRETTY_MAP.keys()) @pytest.mark.parametrize('pretty', PRETTY_MAP.keys())
@ -149,40 +158,45 @@ def test_GET_encoding_provided_by_option(pretty):
URL_EXAMPLE, URL_EXAMPLE,
body='卷首'.encode('big5'), body='卷首'.encode('big5'),
content_type='text/plain; charset=utf-8') content_type='text/plain; charset=utf-8')
args = ('--pretty=' + pretty, 'GET', URL_EXAMPLE) args = ('--pretty', pretty, URL_EXAMPLE)
# Encoding provided by Content-Type is incorrect, thus it should print something unreadable. # Encoding provided by Content-Type is incorrect, thus it should print something unreadable.
r = http(*args) r = http(*args)
assert '卷首' not in r assert '卷首' not in r
r = http('--response-charset=big5', *args)
# Specifying the correct encoding, both in short & long versions, should fix it.
r = http('--response-as', 'charset=big5', *args)
assert '卷首' in r
r = http('--response-as', 'text/plain; charset=big5', *args)
assert '卷首' in r assert '卷首' in r
@pytest.mark.parametrize('encoding', ENCODINGS) @pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate @responses.activate
def test_GET_encoding_provided_by_empty_option_should_use_content_detection(encoding): def test_encoding_detection_from_content_type_header(encoding):
body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>Financiën</c>' responses.add(
responses.add(responses.GET, responses.POST,
URL_EXAMPLE, URL_EXAMPLE,
body=body.encode(encoding), body=CZECH_TEXT.encode(encoding),
content_type='text/xml') content_type=f'text/plain; charset={encoding.upper()}'
r = http('--response-as', '', 'GET', URL_EXAMPLE) )
assert 'Financiën' in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_POST_encoding_detection_from_content_type_header(encoding):
responses.add(responses.POST,
URL_EXAMPLE,
body='Všichni lidé jsou si rovni.'.encode(encoding),
content_type=f'text/plain; charset={encoding.upper()}')
r = http('--form', 'POST', URL_EXAMPLE) r = http('--form', 'POST', URL_EXAMPLE)
assert 'Všichni lidé jsou si rovni.' in r assert CZECH_TEXT in r
@pytest.mark.parametrize('encoding', ENCODINGS)
def test_request_body_content_type_charset_used(encoding):
body_str = CZECH_TEXT
body_bytes = body_str.encode(encoding)
if encoding != UTF8:
with pytest.raises(UnicodeDecodeError):
assert body_str != body_bytes.decode()
r = http(
'--offline',
URL_EXAMPLE,
f'Content-Type: text/plain; charset={encoding.upper()}',
env=MockEnvironment(
stdin=StdinBytesIO(body_bytes),
stdin_isatty=False,
)
)
assert body_str in r
@pytest.mark.parametrize('encoding', ENCODINGS) @pytest.mark.parametrize('encoding', ENCODINGS)
@ -190,10 +204,10 @@ def test_POST_encoding_detection_from_content_type_header(encoding):
def test_POST_encoding_detection_from_content(encoding): def test_POST_encoding_detection_from_content(encoding):
responses.add(responses.POST, responses.add(responses.POST,
URL_EXAMPLE, URL_EXAMPLE,
body='Všichni lidé jsou si rovni.'.encode(encoding), body=CZECH_TEXT.encode(encoding),
content_type='text/plain') content_type='text/plain')
r = http('--form', 'POST', URL_EXAMPLE) r = http('--form', 'POST', URL_EXAMPLE)
assert 'Všichni lidé jsou si rovni.' in r assert CZECH_TEXT in r
@pytest.mark.parametrize('encoding', ENCODINGS) @pytest.mark.parametrize('encoding', ENCODINGS)
@ -202,8 +216,8 @@ def test_POST_encoding_detection_from_content(encoding):
def test_stream_encoding_detection_from_content_type_header(encoding, pretty): def test_stream_encoding_detection_from_content_type_header(encoding, pretty):
responses.add(responses.GET, responses.add(responses.GET,
URL_EXAMPLE, URL_EXAMPLE,
body='<?xml version="1.0"?>\n<c>Financiën</c>'.encode(encoding), body=f'<?xml version="1.0"?>\n<c>{CZECH_TEXT}</c>'.encode(encoding),
stream=True, stream=True,
content_type=f'text/xml; charset={encoding.upper()}') content_type=f'text/xml; charset={encoding.upper()}')
r = http('--pretty=' + pretty, '--stream', 'GET', URL_EXAMPLE) r = http('--pretty=' + pretty, '--stream', 'GET', URL_EXAMPLE)
assert 'Financiën' in r assert CZECH_TEXT in r

View File

@ -3,7 +3,7 @@ import sys
import pytest import pytest
import responses import responses
from httpie.constants import UTF8 from httpie.encoding import UTF8
from httpie.output.formatters.xml import parse_xml, pretty_xml from httpie.output.formatters.xml import parse_xml, pretty_xml
from .fixtures import XML_FILES_PATH, XML_FILES_VALID, XML_FILES_INVALID from .fixtures import XML_FILES_PATH, XML_FILES_VALID, XML_FILES_INVALID
@ -93,7 +93,7 @@ def test_content_type_from_option():
""" """
responses.add(responses.GET, URL_EXAMPLE, body=XML_DATA_RAW, responses.add(responses.GET, URL_EXAMPLE, body=XML_DATA_RAW,
content_type='text/plain') content_type='text/plain')
args = ('--response-as', 'application/xml', URL_EXAMPLE) args = ('--response-mime', 'application/xml', URL_EXAMPLE)
# Ensure the option is taken into account only for responses. # Ensure the option is taken into account only for responses.
# Request # Request
@ -114,5 +114,5 @@ def test_content_type_from_option_incomplete():
content_type='text/plain') content_type='text/plain')
# The provided Content-Type is simply ignored, and so no formatting is done. # The provided Content-Type is simply ignored, and so no formatting is done.
r = http('--response-as', 'charset=utf-8', URL_EXAMPLE) r = http('--response-charset', 'utf-8', URL_EXAMPLE)
assert XML_DATA_RAW in r assert XML_DATA_RAW in r