diff --git a/docs/README.md b/docs/README.md index 0d9ff39f..ecfaffdd 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1179,6 +1179,8 @@ HTTPie does several things by default in order to make its terminal output easy ### Colors and formatting +TODO: mention body colors/formatting are based on content-type + --response-mime (heuristics for JSON content-type) + Syntax highlighting is applied to HTTP headers and bodies (where it makes sense). You can choose your preferred color scheme via the `--style` option if you don’t like the default one. There are dozens of styles available, here are just a few notable ones: @@ -1259,26 +1261,6 @@ $ http --response-as='text/plain; charset=big5' pie.dev/get Given the encoding is not sent by the server, HTTPie will auto-detect it. -### Binary data - -Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data. -Binary data is also suppressed in redirected but prettified output. -The connection is closed as soon as we know that the response body is binary, - -```bash -$ http pie.dev/bytes/2000 -``` - -You will nearly instantly see something like this: - -```http -HTTP/1.1 200 OK -Content-Type: application/octet-stream - -+-----------------------------------------+ -| NOTE: binary data not shown in terminal | -+-----------------------------------------+ -``` ### Redirected output @@ -1320,6 +1302,36 @@ function httpless { http --pretty=all --print=hb "$@" | less -R; } ``` +### Binary data + +Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data. +Binary data is also suppressed in redirected but prettified output. +The connection is closed as soon as we know that the response body is binary, + +```bash +$ http pie.dev/bytes/2000 +``` + +You will nearly instantly see something like this: + +```http +HTTP/1.1 200 OK +Content-Type: application/octet-stream + ++-----------------------------------------+ +| NOTE: binary data not shown in terminal | ++-----------------------------------------+ +``` + +### Display encoding + +TODO: +(both request/response) +* we look at content-type +* else we detect +* short texts default to utf8 +(only response) +* --response-charset allows overwriting ## Download mode diff --git a/httpie/cli/argtypes.py b/httpie/cli/argtypes.py index f77d5e0a..b5069b03 100644 --- a/httpie/cli/argtypes.py +++ b/httpie/cli/argtypes.py @@ -242,3 +242,19 @@ PARSED_DEFAULT_FORMAT_OPTIONS = parse_format_options( s=','.join(DEFAULT_FORMAT_OPTIONS), defaults=None, ) + + +def response_charset_type(encoding: str) -> str: + try: + ''.encode(encoding) + except LookupError: + raise argparse.ArgumentTypeError( + f'{encoding!r} is not a supported encoding') + return encoding + + +def response_mime_type(mime_type: str) -> str: + if mime_type.count('/') != 1: + raise argparse.ArgumentTypeError( + f'{mime_type!r} doesn’t look like a mime type; use type/subtype') + return mime_type diff --git a/httpie/cli/definition.py b/httpie/cli/definition.py index eccfc44b..3df4c127 100644 --- a/httpie/cli/definition.py +++ b/httpie/cli/definition.py @@ -9,7 +9,7 @@ from .. import __doc__, __version__ from .argparser import HTTPieArgumentParser from .argtypes import ( KeyValueArgType, SessionNameValidator, - readable_file_arg, + readable_file_arg, response_charset_type, response_mime_type, ) from .constants import ( DEFAULT_FORMAT_OPTIONS, OUTPUT_OPTIONS, @@ -310,18 +310,28 @@ output_processing.add_argument( ) output_processing.add_argument( - '--response-as', - metavar='CONTENT_TYPE', + '--response-charset', + metavar='ENCODING', + type=response_charset_type, help=''' - Override the response Content-Type for display purposes, e.g.: - - --response-as=application/xml - --response-as=charset=utf-8 - --response-as='application/xml; charset=utf-8' - + Override the response encoding for terminal display purposes, e.g.: + --response-charset=utf8 + --response-charset=big5 ''' ) +output_processing.add_argument( + '--response-mime', + metavar='MIME_TYPE', + type=response_mime_type, + help=''' + Override the response mime type for coloring and formatting for the terminal, e.g.: + + --response-mime=application/json + --response-mime=text/xml + + ''' +) output_processing.add_argument( '--format-options', diff --git a/httpie/client.py b/httpie/client.py index 788a56c2..5feaf483 100644 --- a/httpie/client.py +++ b/httpie/client.py @@ -12,7 +12,7 @@ import requests import urllib3 from . import __version__ from .cli.dicts import RequestHeadersDict -from .constants import UTF8 +from .encoding import UTF8 from .plugins.registry import plugin_manager from .sessions import get_httpie_session from .ssl import AVAILABLE_SSL_VERSION_ARG_MAPPING, HTTPieHTTPSAdapter diff --git a/httpie/codec.py b/httpie/codec.py deleted file mode 100644 index 61057166..00000000 --- a/httpie/codec.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import Union - -from charset_normalizer import from_bytes - -from .constants import UTF8 - -Bytes = Union[bytearray, bytes] - - -def detect_encoding(content: Bytes) -> str: - """Detect the `content` encoding. - Fallback to UTF-8 when no suitable encoding found. - - """ - match = from_bytes(bytes(content)).best() - return match.encoding if match else UTF8 - - -def decode(content: Bytes, encoding: str) -> str: - """Decode `content` using the given `encoding`. - If no `encoding` is provided, the best effort is to guess it from `content`. - - Unicode errors are replaced. - - """ - if not encoding: - encoding = detect_encoding(content) - return content.decode(encoding, 'replace') - - -def encode(content: str, encoding: str) -> bytes: - """Encode `content` using the given `encoding`. - - Unicode errors are replaced. - - """ - return content.encode(encoding, 'replace') diff --git a/httpie/compat.py b/httpie/compat.py index f508bbb3..be84b3cf 100644 --- a/httpie/compat.py +++ b/httpie/compat.py @@ -2,3 +2,53 @@ import sys is_windows = 'win32' in str(sys.platform).lower() + + +try: + from functools import cached_property +except ImportError: + # Can be removed once we drop Pyth on <3.8 support + # Taken from: `django.utils.functional.cached_property` + class cached_property: + """ + Decorator that converts a method with a single self argument into a + property cached on the instance. + + A cached property can be made out of an existing method: + (e.g. ``url = cached_property(get_absolute_url)``). + The optional ``name`` argument is obsolete as of Python 3.6 and will be + deprecated in Django 4.0 (#30127). + """ + name = None + + @staticmethod + def func(instance): + raise TypeError( + 'Cannot use cached_property instance without calling ' + '__set_name__() on it.' + ) + + def __init__(self, func, name=None): + self.real_func = func + self.__doc__ = getattr(func, '__doc__') + + def __set_name__(self, owner, name): + if self.name is None: + self.name = name + self.func = self.real_func + elif name != self.name: + raise TypeError( + "Cannot assign the same cached_property to two different names " + "(%r and %r)." % (self.name, name) + ) + + def __get__(self, instance, cls=None): + """ + Call the function and put the return value in instance.__dict__ so that + subsequent attribute access on the instance returns the cached value + instead of calling cached_property.__get__(). + """ + if instance is None: + return self + res = instance.__dict__[self.name] = self.func(instance) + return res diff --git a/httpie/config.py b/httpie/config.py index 61f1accc..e2cc5e0e 100644 --- a/httpie/config.py +++ b/httpie/config.py @@ -5,7 +5,7 @@ from typing import Union from . import __version__ from .compat import is_windows -from .constants import UTF8 +from .encoding import UTF8 ENV_XDG_CONFIG_HOME = 'XDG_CONFIG_HOME' diff --git a/httpie/constants.py b/httpie/constants.py deleted file mode 100644 index 8b13f5dc..00000000 --- a/httpie/constants.py +++ /dev/null @@ -1,2 +0,0 @@ -# UTF-8 encoding name -UTF8 = 'utf-8' diff --git a/httpie/context.py b/httpie/context.py index a0b87b8e..be2e0565 100644 --- a/httpie/context.py +++ b/httpie/context.py @@ -11,7 +11,7 @@ except ImportError: from .compat import is_windows from .config import DEFAULT_CONFIG_DIR, Config, ConfigFileError -from .constants import UTF8 +from .encoding import UTF8 from .utils import repr_dict diff --git a/httpie/encoding.py b/httpie/encoding.py new file mode 100644 index 00000000..67bd9472 --- /dev/null +++ b/httpie/encoding.py @@ -0,0 +1,50 @@ +from typing import Union + +from charset_normalizer import from_bytes +from charset_normalizer.constant import TOO_SMALL_SEQUENCE + +UTF8 = 'utf-8' + +ContentBytes = Union[bytearray, bytes] + + +def detect_encoding(content: ContentBytes) -> str: + """ + We default to utf8 if text too short, because the detection + can return a random encoding leading to confusing results: + + >>> too_short = ']"foo"' + >>> detected = from_bytes(too_short.encode()).best().encoding + >>> detected + 'utf_16_be' + >>> too_short.encode().decode(detected) + '崢景漢' + + """ + encoding = UTF8 + if len(content) > TOO_SMALL_SEQUENCE: + match = from_bytes(bytes(content)).best() + if match: + encoding = match.encoding + return encoding + + +def smart_decode(content: ContentBytes, encoding: str) -> str: + """Decode `content` using the given `encoding`. + If no `encoding` is provided, the best effort is to guess it from `content`. + + Unicode errors are replaced. + + """ + if not encoding: + encoding = detect_encoding(content) + return content.decode(encoding, 'replace') + + +def smart_encode(content: str, encoding: str) -> bytes: + """Encode `content` using the given `encoding`. + + Unicode errors are replaced. + + """ + return content.encode(encoding, 'replace') diff --git a/httpie/models.py b/httpie/models.py index 21034a04..c554dca9 100644 --- a/httpie/models.py +++ b/httpie/models.py @@ -1,34 +1,33 @@ -from abc import ABCMeta, abstractmethod -from typing import Iterable, Optional +from typing import Iterable from urllib.parse import urlsplit -from .constants import UTF8 -from .utils import split_cookies +from .utils import split_cookies, parse_content_type_header +from .compat import cached_property -class HTTPMessage(metaclass=ABCMeta): +class HTTPMessage: """Abstract class for HTTP messages.""" def __init__(self, orig): self._orig = orig - @abstractmethod def iter_body(self, chunk_size: int) -> Iterable[bytes]: """Return an iterator over the body.""" + raise NotImplementedError - @abstractmethod def iter_lines(self, chunk_size: int) -> Iterable[bytes]: """Return an iterator over the body yielding (`line`, `line_feed`).""" + raise NotImplementedError @property - @abstractmethod def headers(self) -> str: """Return a `str` with the message's headers.""" + raise NotImplementedError - @property - @abstractmethod - def encoding(self) -> Optional[str]: - """Return a `str` with the message's encoding, if known.""" + @cached_property + def encoding(self) -> str: + ct, params = parse_content_type_header(self.content_type) + return params.get('charset', '') @property def content_type(self) -> str: @@ -77,10 +76,6 @@ class HTTPResponse(HTTPMessage): ) return '\r\n'.join(headers) - @property - def encoding(self): - return self._orig.encoding or UTF8 - class HTTPRequest(HTTPMessage): """A :class:`requests.models.Request` wrapper.""" @@ -114,10 +109,6 @@ class HTTPRequest(HTTPMessage): headers = '\r\n'.join(headers).strip() return headers - @property - def encoding(self): - return UTF8 - @property def body(self): body = self._orig.body diff --git a/httpie/output/formatters/xml.py b/httpie/output/formatters/xml.py index 2909f7c0..3d63fbd5 100644 --- a/httpie/output/formatters/xml.py +++ b/httpie/output/formatters/xml.py @@ -1,7 +1,7 @@ import sys from typing import TYPE_CHECKING, Optional -from ...constants import UTF8 +from ...encoding import UTF8 from ...plugins import FormatterPlugin if TYPE_CHECKING: diff --git a/httpie/output/streams.py b/httpie/output/streams.py index e53998c3..72335cab 100644 --- a/httpie/output/streams.py +++ b/httpie/output/streams.py @@ -1,13 +1,13 @@ from abc import ABCMeta, abstractmethod from itertools import chain -from typing import Callable, Dict, Iterable, Tuple, Union +from typing import Callable, Iterable, Union -from .. import codec -from ..context import Environment -from ..constants import UTF8 -from ..models import HTTPMessage, HTTPRequest from .processing import Conversion, Formatting -from .utils import parse_header_content_type +from .. import encoding +from ..context import Environment +from ..encoding import smart_decode, smart_encode, UTF8 +from ..models import HTTPMessage + BINARY_SUPPRESSED_NOTICE = ( b'\n' @@ -99,11 +99,16 @@ class EncodedStream(BaseStream): """ CHUNK_SIZE = 1 - def __init__(self, env=Environment(), response_as: str = None, **kwargs): + def __init__( + self, + env=Environment(), + mime_overwrite: str = None, + encoding_overwrite: str = None, + **kwargs + ): super().__init__(**kwargs) - self.response_as = response_as - self.mime, self.encoding = self._get_mime_and_encoding() - + self.mime = mime_overwrite or self.msg.content_type + self.encoding = encoding_overwrite or self.msg.encoding if env.stdout_isatty: # Use the encoding supported by the terminal. output_encoding = env.stdout_encoding @@ -113,32 +118,12 @@ class EncodedStream(BaseStream): # Default to UTF-8 when unsure. self.output_encoding = output_encoding or UTF8 - def _get_mime_and_encoding(self) -> Tuple[str, Dict[str, str]]: - """Parse `Content-Type` header or `--response-as` value to guess - correct mime type and encoding. - - """ - # Defaults from the `Content-Type` header. - mime, options = parse_header_content_type(self.msg.content_type) - - if isinstance(self.msg, HTTPRequest): - encoding = self.msg.encoding - elif self.response_as is None: - encoding = options.get('charset') - else: - # Override from the `--response-as` option. - forced_mime, forced_options = parse_header_content_type(self.response_as) - mime = forced_mime or mime - encoding = forced_options.get('charset') or options.get('charset') - - return mime, encoding or '' - def iter_body(self) -> Iterable[bytes]: for line, lf in self.msg.iter_lines(self.CHUNK_SIZE): if b'\0' in line: raise BinarySuppressedError() - line = codec.decode(line, self.encoding) - yield codec.encode(line, self.output_encoding) + lf + line = smart_decode(line, self.encoding) + yield smart_encode(line, self.output_encoding) + lf class PrettyStream(EncodedStream): @@ -190,9 +175,9 @@ class PrettyStream(EncodedStream): if not isinstance(chunk, str): # Text when a converter has been used, # otherwise it will always be bytes. - chunk = codec.decode(chunk, self.encoding) + chunk = encoding.smart_decode(chunk, self.encoding) chunk = self.formatting.format_body(content=chunk, mime=self.mime) - return codec.encode(chunk, self.output_encoding) + return encoding.smart_encode(chunk, self.output_encoding) class BufferedPrettyStream(PrettyStream): diff --git a/httpie/output/utils.py b/httpie/output/utils.py index f53aab21..875e8855 100644 --- a/httpie/output/utils.py +++ b/httpie/output/utils.py @@ -35,57 +35,3 @@ def parse_prefixed_json(data: str) -> Tuple[str, str]: data_prefix = matches[0] if matches else '' body = data[len(data_prefix):] return data_prefix, body - - -def parse_header_content_type(line): - """Parse a Content-Type like header. - Return the main Content-Type and a dictionary of options. - >>> parse_header_content_type('application/xml; charset=utf-8') - ('application/xml', {'charset': 'utf-8'}) - >>> parse_header_content_type('application/xml; charset = utf-8') - ('application/xml', {'charset': 'utf-8'}) - >>> parse_header_content_type('application/html+xml;ChArSeT="UTF-8"') - ('application/html+xml', {'charset': 'UTF-8'}) - >>> parse_header_content_type('application/xml') - ('application/xml', {}) - >>> parse_header_content_type(';charset=utf-8') - ('', {'charset': 'utf-8'}) - >>> parse_header_content_type('charset=utf-8') - ('', {'charset': 'utf-8'}) - >>> parse_header_content_type('multipart/mixed; boundary="gc0pJq0M:08jU534c0p"') - ('multipart/mixed', {'boundary': 'gc0pJq0M:08jU534c0p'}) - >>> parse_header_content_type('Message/Partial; number=3; total=3; id="oc=jpbe0M2Yt4s@foo.com"') - ('Message/Partial', {'number': '3', 'total': '3', 'id': 'oc=jpbe0M2Yt4s@foo.com'}) - """ - # Source: https://github.com/python/cpython/blob/bb3e0c2/Lib/cgi.py#L230 - - def _parseparam(s: str): - # Source: https://github.com/python/cpython/blob/bb3e0c2/Lib/cgi.py#L218 - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - yield f.strip() - s = s[end:] - - # Special case: 'key=value' only (without starting with ';'). - if ';' not in line and '=' in line: - line = ';' + line - - parts = _parseparam(';' + line) - key = parts.__next__() - pdict = {} - for p in parts: - i = p.find('=') - if i >= 0: - name = p[:i].strip().lower() - value = p[i + 1:].strip() - if len(value) >= 2 and value[0] == value[-1] == '"': - value = value[1:-1] - value = value.replace('\\\\', '\\').replace('\\"', '"') - pdict[name] = value - return key, pdict diff --git a/httpie/output/writer.py b/httpie/output/writer.py index 05b389eb..6f251f7c 100644 --- a/httpie/output/writer.py +++ b/httpie/output/writer.py @@ -5,7 +5,7 @@ from typing import IO, TextIO, Tuple, Type, Union import requests from ..context import Environment -from ..models import HTTPRequest, HTTPResponse +from ..models import HTTPRequest, HTTPResponse, HTTPMessage from .processing import Conversion, Formatting from .streams import ( BaseStream, BufferedPrettyStream, EncodedStream, PrettyStream, RawStream, @@ -97,16 +97,17 @@ def build_output_stream_for_message( with_headers: bool, with_body: bool, ): - stream_class, stream_kwargs = get_stream_type_and_kwargs( - env=env, - args=args, - ) - message_class = { + message_type = { requests.PreparedRequest: HTTPRequest, requests.Response: HTTPResponse, }[type(requests_message)] + stream_class, stream_kwargs = get_stream_type_and_kwargs( + env=env, + args=args, + message_type=message_type, + ) yield from stream_class( - msg=message_class(requests_message), + msg=message_type(requests_message), with_headers=with_headers, with_body=with_body, **stream_kwargs, @@ -120,7 +121,8 @@ def build_output_stream_for_message( def get_stream_type_and_kwargs( env: Environment, - args: argparse.Namespace + args: argparse.Namespace, + message_type: Type[HTTPMessage], ) -> Tuple[Type['BaseStream'], dict]: """Pick the right stream type and kwargs for it based on `env` and `args`. @@ -138,8 +140,12 @@ def get_stream_type_and_kwargs( stream_class = EncodedStream stream_kwargs = { 'env': env, - 'response_as': args.response_as, } + if message_type is HTTPResponse: + stream_kwargs.update({ + 'mime_overwrite': args.response_mime, + 'encoding_overwrite': args.response_charset, + }) if args.prettify: stream_class = PrettyStream if args.stream else BufferedPrettyStream stream_kwargs.update({ diff --git a/httpie/utils.py b/httpie/utils.py index c155aac5..4112cfbd 100644 --- a/httpie/utils.py +++ b/httpie/utils.py @@ -191,3 +191,21 @@ def _max_age_to_expires(cookies, now): max_age = cookie.get('max-age') if max_age and max_age.isdigit(): cookie['expires'] = now + float(max_age) + + +def parse_content_type_header(header): + """Borrowed from requests.""" + tokens = header.split(';') + content_type, params = tokens[0].strip(), tokens[1:] + params_dict = {} + items_to_strip = "\"' " + for param in params: + param = param.strip() + if param: + key, value = param, True + index_of_equals = param.find("=") + if index_of_equals != -1: + key = param[:index_of_equals].strip(items_to_strip) + value = param[index_of_equals + 1:].strip(items_to_strip) + params_dict[key.lower()] = value + return content_type, params_dict diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index cf979e5f..bc795195 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -1,7 +1,7 @@ """Test data""" from pathlib import Path -from httpie.constants import UTF8 +from httpie.encoding import UTF8 def patharg(path): diff --git a/tests/test_config.py b/tests/test_config.py index 680b16b9..be19d572 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -4,7 +4,7 @@ import pytest from _pytest.monkeypatch import MonkeyPatch from httpie.compat import is_windows -from httpie.constants import UTF8 +from httpie.encoding import UTF8 from httpie.config import ( Config, DEFAULT_CONFIG_DIRNAME, DEFAULT_RELATIVE_LEGACY_CONFIG_DIR, DEFAULT_RELATIVE_XDG_CONFIG_HOME, DEFAULT_WINDOWS_CONFIG_DIR, diff --git a/tests/test_errors.py b/tests/test_errors.py index abbf7235..f636db82 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -41,8 +41,19 @@ def test_max_headers_no_limit(httpbin_both): assert HTTP_OK in http('--max-headers=0', httpbin_both + '/get') -def test_charset_argument_unknown_encoding(httpbin_both): - with raises(LookupError) as e: - http('--response-as', 'charset=foobar', - 'GET', httpbin_both + '/get') - assert 'unknown encoding: foobar' in str(e.value) +def test_response_charset_option_unknown_encoding(httpbin_both): + r = http( + '--response-charset=foobar', + httpbin_both + '/get', + tolerate_error_exit_status=True + ) + assert "'foobar' is not a supported encoding" in r.stderr + + +def test_response_mime_option_unknown_encoding(httpbin_both): + r = http( + '--response-mime=foobar', + httpbin_both + '/get', + tolerate_error_exit_status=True + ) + assert "'foobar' doesn’t look like a mime type" in r.stderr diff --git a/tests/test_httpie.py b/tests/test_httpie.py index 3ed1bb2c..a6cda1c3 100644 --- a/tests/test_httpie.py +++ b/tests/test_httpie.py @@ -9,7 +9,7 @@ import httpie.__main__ from .fixtures import FILE_CONTENT, FILE_PATH from httpie.cli.exceptions import ParseError from httpie.context import Environment -from httpie.constants import UTF8 +from httpie.encoding import UTF8 from httpie.status import ExitStatus from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http diff --git a/tests/test_json.py b/tests/test_json.py index 8d73c779..26365e28 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -11,8 +11,25 @@ from httpie.utils import JsonDictPreservingDuplicateKeys from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH from .utils import MockEnvironment, http, URL_EXAMPLE -TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}') -TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float +TEST_JSON_XXSI_PREFIXES = [ + r")]}',\n", ")]}',", + 'while(1);', + 'for(;;)', + ')', + ']', + '}' +] +TEST_JSON_VALUES = [ + # FIXME: missing int & float + {}, + {'a': 0, 'b': 0}, + [], + ['a', 'b'], + 'foo', + True, + False, + None +] TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m' JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}' @@ -37,15 +54,19 @@ JSON_WITH_DUPES_FORMATTED_UNSORTED = '''{ def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_data, pretty): """Test JSON bodies preceded by non-JSON data.""" body = data_prefix + json.dumps(json_data) - content_type = 'application/json' - responses.add(responses.GET, URL_EXAMPLE, body=body, - content_type=content_type) + content_type = 'application/json;charset=utf8' + responses.add( + responses.GET, + URL_EXAMPLE, + body=body, + content_type=content_type + ) - colored_output = pretty in ('all', 'colors') + colored_output = pretty in {'all', 'colors'} env = MockEnvironment(colors=256) if colored_output else None - r = http('--pretty=' + pretty, URL_EXAMPLE, env=env) + r = http('--pretty', pretty, URL_EXAMPLE, env=env) - indent = None if pretty in ('none', 'colors') else 4 + indent = None if pretty in {'none', 'colors'} else 4 expected_body = data_prefix + json.dumps(json_data, indent=indent) if colored_output: fmt = ColorFormatter(env, format_options={'json': {'format': True, 'indent': 4}}) diff --git a/tests/test_output.py b/tests/test_output.py index 0d0ee2f0..5a0db654 100644 --- a/tests/test_output.py +++ b/tests/test_output.py @@ -15,7 +15,7 @@ from httpie.cli.argtypes import ( parse_format_options, ) from httpie.cli.definition import parser -from httpie.constants import UTF8 +from httpie.encoding import UTF8 from httpie.output.formatters.colors import get_lexer from httpie.status import ExitStatus from .utils import COLOR, CRLF, HTTP_OK, MockEnvironment, http diff --git a/tests/test_sessions.py b/tests/test_sessions.py index 3568382d..b5b5a67a 100644 --- a/tests/test_sessions.py +++ b/tests/test_sessions.py @@ -7,7 +7,7 @@ from unittest import mock import pytest from .fixtures import FILE_PATH_ARG, UNICODE -from httpie.constants import UTF8 +from httpie.encoding import UTF8 from httpie.plugins import AuthPlugin from httpie.plugins.builtin import HTTPBasicAuth from httpie.plugins.registry import plugin_manager diff --git a/tests/test_unicode.py b/tests/test_unicode.py index b1267592..90cbe24b 100644 --- a/tests/test_unicode.py +++ b/tests/test_unicode.py @@ -4,14 +4,19 @@ Various unicode handling related tests. """ import pytest import responses +from charset_normalizer.constant import TOO_SMALL_SEQUENCE from httpie.cli.constants import PRETTY_MAP -from httpie.constants import UTF8 +from httpie.encoding import UTF8 -from .utils import http, HTTP_OK, URL_EXAMPLE +from .utils import http, HTTP_OK, URL_EXAMPLE, MockEnvironment, StdinBytesIO from .fixtures import UNICODE -ENCODINGS = [UTF8, 'windows-1250'] + +CZECH_TEXT = 'Všichni lidé jsou si rovni. Všichni lidé jsou si rovni.' +assert len(CZECH_TEXT) > TOO_SMALL_SEQUENCE +CZECH_TEXT_SPECIFIC_CHARSET = 'windows-1250' +ENCODINGS = [UTF8, CZECH_TEXT_SPECIFIC_CHARSET] def test_unicode_headers(httpbin): @@ -122,24 +127,28 @@ def test_unicode_digest_auth(httpbin): @pytest.mark.parametrize('encoding', ENCODINGS) @responses.activate def test_GET_encoding_detection_from_content_type_header(encoding): - responses.add(responses.GET, - URL_EXAMPLE, - body='\nFinanciën'.encode(encoding), - content_type=f'text/xml; charset={encoding.upper()}') + responses.add( + responses.GET, + URL_EXAMPLE, + body=f'\n{CZECH_TEXT}'.encode(encoding), + content_type=f'text/xml; charset={encoding.upper()}' + ) r = http('GET', URL_EXAMPLE) - assert 'Financiën' in r + assert CZECH_TEXT in r @pytest.mark.parametrize('encoding', ENCODINGS) @responses.activate -def test_GET_encoding_detection_from_content(encoding): - body = f'\nFinanciën' - responses.add(responses.GET, - URL_EXAMPLE, - body=body.encode(encoding), - content_type='text/xml') - r = http('GET', URL_EXAMPLE) - assert 'Financiën' in r +def test_encoding_detection_from_content(encoding): + body = f'\n{CZECH_TEXT}' + responses.add( + responses.GET, + URL_EXAMPLE, + body=body.encode(encoding), + content_type='text/xml' + ) + r = http(URL_EXAMPLE) + assert CZECH_TEXT in r @pytest.mark.parametrize('pretty', PRETTY_MAP.keys()) @@ -149,40 +158,45 @@ def test_GET_encoding_provided_by_option(pretty): URL_EXAMPLE, body='卷首'.encode('big5'), content_type='text/plain; charset=utf-8') - args = ('--pretty=' + pretty, 'GET', URL_EXAMPLE) + args = ('--pretty', pretty, URL_EXAMPLE) # Encoding provided by Content-Type is incorrect, thus it should print something unreadable. r = http(*args) assert '卷首' not in r - - # Specifying the correct encoding, both in short & long versions, should fix it. - r = http('--response-as', 'charset=big5', *args) - assert '卷首' in r - r = http('--response-as', 'text/plain; charset=big5', *args) + r = http('--response-charset=big5', *args) assert '卷首' in r @pytest.mark.parametrize('encoding', ENCODINGS) @responses.activate -def test_GET_encoding_provided_by_empty_option_should_use_content_detection(encoding): - body = f'\nFinanciën' - responses.add(responses.GET, - URL_EXAMPLE, - body=body.encode(encoding), - content_type='text/xml') - r = http('--response-as', '', 'GET', URL_EXAMPLE) - assert 'Financiën' in r - - -@pytest.mark.parametrize('encoding', ENCODINGS) -@responses.activate -def test_POST_encoding_detection_from_content_type_header(encoding): - responses.add(responses.POST, - URL_EXAMPLE, - body='Všichni lidé jsou si rovni.'.encode(encoding), - content_type=f'text/plain; charset={encoding.upper()}') +def test_encoding_detection_from_content_type_header(encoding): + responses.add( + responses.POST, + URL_EXAMPLE, + body=CZECH_TEXT.encode(encoding), + content_type=f'text/plain; charset={encoding.upper()}' + ) r = http('--form', 'POST', URL_EXAMPLE) - assert 'Všichni lidé jsou si rovni.' in r + assert CZECH_TEXT in r + + +@pytest.mark.parametrize('encoding', ENCODINGS) +def test_request_body_content_type_charset_used(encoding): + body_str = CZECH_TEXT + body_bytes = body_str.encode(encoding) + if encoding != UTF8: + with pytest.raises(UnicodeDecodeError): + assert body_str != body_bytes.decode() + r = http( + '--offline', + URL_EXAMPLE, + f'Content-Type: text/plain; charset={encoding.upper()}', + env=MockEnvironment( + stdin=StdinBytesIO(body_bytes), + stdin_isatty=False, + ) + ) + assert body_str in r @pytest.mark.parametrize('encoding', ENCODINGS) @@ -190,10 +204,10 @@ def test_POST_encoding_detection_from_content_type_header(encoding): def test_POST_encoding_detection_from_content(encoding): responses.add(responses.POST, URL_EXAMPLE, - body='Všichni lidé jsou si rovni.'.encode(encoding), + body=CZECH_TEXT.encode(encoding), content_type='text/plain') r = http('--form', 'POST', URL_EXAMPLE) - assert 'Všichni lidé jsou si rovni.' in r + assert CZECH_TEXT in r @pytest.mark.parametrize('encoding', ENCODINGS) @@ -202,8 +216,8 @@ def test_POST_encoding_detection_from_content(encoding): def test_stream_encoding_detection_from_content_type_header(encoding, pretty): responses.add(responses.GET, URL_EXAMPLE, - body='\nFinanciën'.encode(encoding), + body=f'\n{CZECH_TEXT}'.encode(encoding), stream=True, content_type=f'text/xml; charset={encoding.upper()}') r = http('--pretty=' + pretty, '--stream', 'GET', URL_EXAMPLE) - assert 'Financiën' in r + assert CZECH_TEXT in r diff --git a/tests/test_xml.py b/tests/test_xml.py index 5727c94a..c0d50f54 100644 --- a/tests/test_xml.py +++ b/tests/test_xml.py @@ -3,7 +3,7 @@ import sys import pytest import responses -from httpie.constants import UTF8 +from httpie.encoding import UTF8 from httpie.output.formatters.xml import parse_xml, pretty_xml from .fixtures import XML_FILES_PATH, XML_FILES_VALID, XML_FILES_INVALID @@ -93,7 +93,7 @@ def test_content_type_from_option(): """ responses.add(responses.GET, URL_EXAMPLE, body=XML_DATA_RAW, content_type='text/plain') - args = ('--response-as', 'application/xml', URL_EXAMPLE) + args = ('--response-mime', 'application/xml', URL_EXAMPLE) # Ensure the option is taken into account only for responses. # Request @@ -114,5 +114,5 @@ def test_content_type_from_option_incomplete(): content_type='text/plain') # The provided Content-Type is simply ignored, and so no formatting is done. - r = http('--response-as', 'charset=utf-8', URL_EXAMPLE) + r = http('--response-charset', 'utf-8', URL_EXAMPLE) assert XML_DATA_RAW in r