diff --git a/docs/README.md b/docs/README.md
index 0d9ff39f..ecfaffdd 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1179,6 +1179,8 @@ HTTPie does several things by default in order to make its terminal output easy
### Colors and formatting
+TODO: mention body colors/formatting are based on content-type + --response-mime (heuristics for JSON content-type)
+
Syntax highlighting is applied to HTTP headers and bodies (where it makes sense).
You can choose your preferred color scheme via the `--style` option if you don’t like the default one.
There are dozens of styles available, here are just a few notable ones:
@@ -1259,26 +1261,6 @@ $ http --response-as='text/plain; charset=big5' pie.dev/get
Given the encoding is not sent by the server, HTTPie will auto-detect it.
-### Binary data
-
-Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
-Binary data is also suppressed in redirected but prettified output.
-The connection is closed as soon as we know that the response body is binary,
-
-```bash
-$ http pie.dev/bytes/2000
-```
-
-You will nearly instantly see something like this:
-
-```http
-HTTP/1.1 200 OK
-Content-Type: application/octet-stream
-
-+-----------------------------------------+
-| NOTE: binary data not shown in terminal |
-+-----------------------------------------+
-```
### Redirected output
@@ -1320,6 +1302,36 @@ function httpless {
http --pretty=all --print=hb "$@" | less -R;
}
```
+### Binary data
+
+Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
+Binary data is also suppressed in redirected but prettified output.
+The connection is closed as soon as we know that the response body is binary,
+
+```bash
+$ http pie.dev/bytes/2000
+```
+
+You will nearly instantly see something like this:
+
+```http
+HTTP/1.1 200 OK
+Content-Type: application/octet-stream
+
++-----------------------------------------+
+| NOTE: binary data not shown in terminal |
++-----------------------------------------+
+```
+
+### Display encoding
+
+TODO:
+(both request/response)
+* we look at content-type
+* else we detect
+* short texts default to utf8
+(only response)
+* --response-charset allows overwriting
## Download mode
diff --git a/httpie/cli/argtypes.py b/httpie/cli/argtypes.py
index f77d5e0a..b5069b03 100644
--- a/httpie/cli/argtypes.py
+++ b/httpie/cli/argtypes.py
@@ -242,3 +242,19 @@ PARSED_DEFAULT_FORMAT_OPTIONS = parse_format_options(
s=','.join(DEFAULT_FORMAT_OPTIONS),
defaults=None,
)
+
+
+def response_charset_type(encoding: str) -> str:
+ try:
+ ''.encode(encoding)
+ except LookupError:
+ raise argparse.ArgumentTypeError(
+ f'{encoding!r} is not a supported encoding')
+ return encoding
+
+
+def response_mime_type(mime_type: str) -> str:
+ if mime_type.count('/') != 1:
+ raise argparse.ArgumentTypeError(
+ f'{mime_type!r} doesn’t look like a mime type; use type/subtype')
+ return mime_type
diff --git a/httpie/cli/definition.py b/httpie/cli/definition.py
index eccfc44b..3df4c127 100644
--- a/httpie/cli/definition.py
+++ b/httpie/cli/definition.py
@@ -9,7 +9,7 @@ from .. import __doc__, __version__
from .argparser import HTTPieArgumentParser
from .argtypes import (
KeyValueArgType, SessionNameValidator,
- readable_file_arg,
+ readable_file_arg, response_charset_type, response_mime_type,
)
from .constants import (
DEFAULT_FORMAT_OPTIONS, OUTPUT_OPTIONS,
@@ -310,18 +310,28 @@ output_processing.add_argument(
)
output_processing.add_argument(
- '--response-as',
- metavar='CONTENT_TYPE',
+ '--response-charset',
+ metavar='ENCODING',
+ type=response_charset_type,
help='''
- Override the response Content-Type for display purposes, e.g.:
-
- --response-as=application/xml
- --response-as=charset=utf-8
- --response-as='application/xml; charset=utf-8'
-
+ Override the response encoding for terminal display purposes, e.g.:
+ --response-charset=utf8
+ --response-charset=big5
'''
)
+output_processing.add_argument(
+ '--response-mime',
+ metavar='MIME_TYPE',
+ type=response_mime_type,
+ help='''
+ Override the response mime type for coloring and formatting for the terminal, e.g.:
+
+ --response-mime=application/json
+ --response-mime=text/xml
+
+ '''
+)
output_processing.add_argument(
'--format-options',
diff --git a/httpie/client.py b/httpie/client.py
index 788a56c2..5feaf483 100644
--- a/httpie/client.py
+++ b/httpie/client.py
@@ -12,7 +12,7 @@ import requests
import urllib3
from . import __version__
from .cli.dicts import RequestHeadersDict
-from .constants import UTF8
+from .encoding import UTF8
from .plugins.registry import plugin_manager
from .sessions import get_httpie_session
from .ssl import AVAILABLE_SSL_VERSION_ARG_MAPPING, HTTPieHTTPSAdapter
diff --git a/httpie/codec.py b/httpie/codec.py
deleted file mode 100644
index 61057166..00000000
--- a/httpie/codec.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from typing import Union
-
-from charset_normalizer import from_bytes
-
-from .constants import UTF8
-
-Bytes = Union[bytearray, bytes]
-
-
-def detect_encoding(content: Bytes) -> str:
- """Detect the `content` encoding.
- Fallback to UTF-8 when no suitable encoding found.
-
- """
- match = from_bytes(bytes(content)).best()
- return match.encoding if match else UTF8
-
-
-def decode(content: Bytes, encoding: str) -> str:
- """Decode `content` using the given `encoding`.
- If no `encoding` is provided, the best effort is to guess it from `content`.
-
- Unicode errors are replaced.
-
- """
- if not encoding:
- encoding = detect_encoding(content)
- return content.decode(encoding, 'replace')
-
-
-def encode(content: str, encoding: str) -> bytes:
- """Encode `content` using the given `encoding`.
-
- Unicode errors are replaced.
-
- """
- return content.encode(encoding, 'replace')
diff --git a/httpie/compat.py b/httpie/compat.py
index f508bbb3..be84b3cf 100644
--- a/httpie/compat.py
+++ b/httpie/compat.py
@@ -2,3 +2,53 @@ import sys
is_windows = 'win32' in str(sys.platform).lower()
+
+
+try:
+ from functools import cached_property
+except ImportError:
+ # Can be removed once we drop Pyth on <3.8 support
+ # Taken from: `django.utils.functional.cached_property`
+ class cached_property:
+ """
+ Decorator that converts a method with a single self argument into a
+ property cached on the instance.
+
+ A cached property can be made out of an existing method:
+ (e.g. ``url = cached_property(get_absolute_url)``).
+ The optional ``name`` argument is obsolete as of Python 3.6 and will be
+ deprecated in Django 4.0 (#30127).
+ """
+ name = None
+
+ @staticmethod
+ def func(instance):
+ raise TypeError(
+ 'Cannot use cached_property instance without calling '
+ '__set_name__() on it.'
+ )
+
+ def __init__(self, func, name=None):
+ self.real_func = func
+ self.__doc__ = getattr(func, '__doc__')
+
+ def __set_name__(self, owner, name):
+ if self.name is None:
+ self.name = name
+ self.func = self.real_func
+ elif name != self.name:
+ raise TypeError(
+ "Cannot assign the same cached_property to two different names "
+ "(%r and %r)." % (self.name, name)
+ )
+
+ def __get__(self, instance, cls=None):
+ """
+ Call the function and put the return value in instance.__dict__ so that
+ subsequent attribute access on the instance returns the cached value
+ instead of calling cached_property.__get__().
+ """
+ if instance is None:
+ return self
+ res = instance.__dict__[self.name] = self.func(instance)
+ return res
diff --git a/httpie/config.py b/httpie/config.py
index 61f1accc..e2cc5e0e 100644
--- a/httpie/config.py
+++ b/httpie/config.py
@@ -5,7 +5,7 @@ from typing import Union
from . import __version__
from .compat import is_windows
-from .constants import UTF8
+from .encoding import UTF8
ENV_XDG_CONFIG_HOME = 'XDG_CONFIG_HOME'
diff --git a/httpie/constants.py b/httpie/constants.py
deleted file mode 100644
index 8b13f5dc..00000000
--- a/httpie/constants.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# UTF-8 encoding name
-UTF8 = 'utf-8'
diff --git a/httpie/context.py b/httpie/context.py
index a0b87b8e..be2e0565 100644
--- a/httpie/context.py
+++ b/httpie/context.py
@@ -11,7 +11,7 @@ except ImportError:
from .compat import is_windows
from .config import DEFAULT_CONFIG_DIR, Config, ConfigFileError
-from .constants import UTF8
+from .encoding import UTF8
from .utils import repr_dict
diff --git a/httpie/encoding.py b/httpie/encoding.py
new file mode 100644
index 00000000..67bd9472
--- /dev/null
+++ b/httpie/encoding.py
@@ -0,0 +1,50 @@
+from typing import Union
+
+from charset_normalizer import from_bytes
+from charset_normalizer.constant import TOO_SMALL_SEQUENCE
+
+UTF8 = 'utf-8'
+
+ContentBytes = Union[bytearray, bytes]
+
+
+def detect_encoding(content: ContentBytes) -> str:
+ """
+ We default to utf8 if text too short, because the detection
+ can return a random encoding leading to confusing results:
+
+ >>> too_short = ']"foo"'
+ >>> detected = from_bytes(too_short.encode()).best().encoding
+ >>> detected
+ 'utf_16_be'
+ >>> too_short.encode().decode(detected)
+ '崢景漢'
+
+ """
+ encoding = UTF8
+ if len(content) > TOO_SMALL_SEQUENCE:
+ match = from_bytes(bytes(content)).best()
+ if match:
+ encoding = match.encoding
+ return encoding
+
+
+def smart_decode(content: ContentBytes, encoding: str) -> str:
+ """Decode `content` using the given `encoding`.
+ If no `encoding` is provided, the best effort is to guess it from `content`.
+
+ Unicode errors are replaced.
+
+ """
+ if not encoding:
+ encoding = detect_encoding(content)
+ return content.decode(encoding, 'replace')
+
+
+def smart_encode(content: str, encoding: str) -> bytes:
+ """Encode `content` using the given `encoding`.
+
+ Unicode errors are replaced.
+
+ """
+ return content.encode(encoding, 'replace')
diff --git a/httpie/models.py b/httpie/models.py
index 21034a04..c554dca9 100644
--- a/httpie/models.py
+++ b/httpie/models.py
@@ -1,34 +1,33 @@
-from abc import ABCMeta, abstractmethod
-from typing import Iterable, Optional
+from typing import Iterable
from urllib.parse import urlsplit
-from .constants import UTF8
-from .utils import split_cookies
+from .utils import split_cookies, parse_content_type_header
+from .compat import cached_property
-class HTTPMessage(metaclass=ABCMeta):
+class HTTPMessage:
"""Abstract class for HTTP messages."""
def __init__(self, orig):
self._orig = orig
- @abstractmethod
def iter_body(self, chunk_size: int) -> Iterable[bytes]:
"""Return an iterator over the body."""
+ raise NotImplementedError
- @abstractmethod
def iter_lines(self, chunk_size: int) -> Iterable[bytes]:
"""Return an iterator over the body yielding (`line`, `line_feed`)."""
+ raise NotImplementedError
@property
- @abstractmethod
def headers(self) -> str:
"""Return a `str` with the message's headers."""
+ raise NotImplementedError
- @property
- @abstractmethod
- def encoding(self) -> Optional[str]:
- """Return a `str` with the message's encoding, if known."""
+ @cached_property
+ def encoding(self) -> str:
+ ct, params = parse_content_type_header(self.content_type)
+ return params.get('charset', '')
@property
def content_type(self) -> str:
@@ -77,10 +76,6 @@ class HTTPResponse(HTTPMessage):
)
return '\r\n'.join(headers)
- @property
- def encoding(self):
- return self._orig.encoding or UTF8
-
class HTTPRequest(HTTPMessage):
"""A :class:`requests.models.Request` wrapper."""
@@ -114,10 +109,6 @@ class HTTPRequest(HTTPMessage):
headers = '\r\n'.join(headers).strip()
return headers
- @property
- def encoding(self):
- return UTF8
-
@property
def body(self):
body = self._orig.body
diff --git a/httpie/output/formatters/xml.py b/httpie/output/formatters/xml.py
index 2909f7c0..3d63fbd5 100644
--- a/httpie/output/formatters/xml.py
+++ b/httpie/output/formatters/xml.py
@@ -1,7 +1,7 @@
import sys
from typing import TYPE_CHECKING, Optional
-from ...constants import UTF8
+from ...encoding import UTF8
from ...plugins import FormatterPlugin
if TYPE_CHECKING:
diff --git a/httpie/output/streams.py b/httpie/output/streams.py
index e53998c3..72335cab 100644
--- a/httpie/output/streams.py
+++ b/httpie/output/streams.py
@@ -1,13 +1,13 @@
from abc import ABCMeta, abstractmethod
from itertools import chain
-from typing import Callable, Dict, Iterable, Tuple, Union
+from typing import Callable, Iterable, Union
-from .. import codec
-from ..context import Environment
-from ..constants import UTF8
-from ..models import HTTPMessage, HTTPRequest
from .processing import Conversion, Formatting
-from .utils import parse_header_content_type
+from .. import encoding
+from ..context import Environment
+from ..encoding import smart_decode, smart_encode, UTF8
+from ..models import HTTPMessage
+
BINARY_SUPPRESSED_NOTICE = (
b'\n'
@@ -99,11 +99,16 @@ class EncodedStream(BaseStream):
"""
CHUNK_SIZE = 1
- def __init__(self, env=Environment(), response_as: str = None, **kwargs):
+ def __init__(
+ self,
+ env=Environment(),
+ mime_overwrite: str = None,
+ encoding_overwrite: str = None,
+ **kwargs
+ ):
super().__init__(**kwargs)
- self.response_as = response_as
- self.mime, self.encoding = self._get_mime_and_encoding()
-
+ self.mime = mime_overwrite or self.msg.content_type
+ self.encoding = encoding_overwrite or self.msg.encoding
if env.stdout_isatty:
# Use the encoding supported by the terminal.
output_encoding = env.stdout_encoding
@@ -113,32 +118,12 @@ class EncodedStream(BaseStream):
# Default to UTF-8 when unsure.
self.output_encoding = output_encoding or UTF8
- def _get_mime_and_encoding(self) -> Tuple[str, Dict[str, str]]:
- """Parse `Content-Type` header or `--response-as` value to guess
- correct mime type and encoding.
-
- """
- # Defaults from the `Content-Type` header.
- mime, options = parse_header_content_type(self.msg.content_type)
-
- if isinstance(self.msg, HTTPRequest):
- encoding = self.msg.encoding
- elif self.response_as is None:
- encoding = options.get('charset')
- else:
- # Override from the `--response-as` option.
- forced_mime, forced_options = parse_header_content_type(self.response_as)
- mime = forced_mime or mime
- encoding = forced_options.get('charset') or options.get('charset')
-
- return mime, encoding or ''
-
def iter_body(self) -> Iterable[bytes]:
for line, lf in self.msg.iter_lines(self.CHUNK_SIZE):
if b'\0' in line:
raise BinarySuppressedError()
- line = codec.decode(line, self.encoding)
- yield codec.encode(line, self.output_encoding) + lf
+ line = smart_decode(line, self.encoding)
+ yield smart_encode(line, self.output_encoding) + lf
class PrettyStream(EncodedStream):
@@ -190,9 +175,9 @@ class PrettyStream(EncodedStream):
if not isinstance(chunk, str):
# Text when a converter has been used,
# otherwise it will always be bytes.
- chunk = codec.decode(chunk, self.encoding)
+ chunk = encoding.smart_decode(chunk, self.encoding)
chunk = self.formatting.format_body(content=chunk, mime=self.mime)
- return codec.encode(chunk, self.output_encoding)
+ return encoding.smart_encode(chunk, self.output_encoding)
class BufferedPrettyStream(PrettyStream):
diff --git a/httpie/output/utils.py b/httpie/output/utils.py
index f53aab21..875e8855 100644
--- a/httpie/output/utils.py
+++ b/httpie/output/utils.py
@@ -35,57 +35,3 @@ def parse_prefixed_json(data: str) -> Tuple[str, str]:
data_prefix = matches[0] if matches else ''
body = data[len(data_prefix):]
return data_prefix, body
-
-
-def parse_header_content_type(line):
- """Parse a Content-Type like header.
- Return the main Content-Type and a dictionary of options.
- >>> parse_header_content_type('application/xml; charset=utf-8')
- ('application/xml', {'charset': 'utf-8'})
- >>> parse_header_content_type('application/xml; charset = utf-8')
- ('application/xml', {'charset': 'utf-8'})
- >>> parse_header_content_type('application/html+xml;ChArSeT="UTF-8"')
- ('application/html+xml', {'charset': 'UTF-8'})
- >>> parse_header_content_type('application/xml')
- ('application/xml', {})
- >>> parse_header_content_type(';charset=utf-8')
- ('', {'charset': 'utf-8'})
- >>> parse_header_content_type('charset=utf-8')
- ('', {'charset': 'utf-8'})
- >>> parse_header_content_type('multipart/mixed; boundary="gc0pJq0M:08jU534c0p"')
- ('multipart/mixed', {'boundary': 'gc0pJq0M:08jU534c0p'})
- >>> parse_header_content_type('Message/Partial; number=3; total=3; id="oc=jpbe0M2Yt4s@foo.com"')
- ('Message/Partial', {'number': '3', 'total': '3', 'id': 'oc=jpbe0M2Yt4s@foo.com'})
- """
- # Source: https://github.com/python/cpython/blob/bb3e0c2/Lib/cgi.py#L230
-
- def _parseparam(s: str):
- # Source: https://github.com/python/cpython/blob/bb3e0c2/Lib/cgi.py#L218
- while s[:1] == ';':
- s = s[1:]
- end = s.find(';')
- while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
- end = s.find(';', end + 1)
- if end < 0:
- end = len(s)
- f = s[:end]
- yield f.strip()
- s = s[end:]
-
- # Special case: 'key=value' only (without starting with ';').
- if ';' not in line and '=' in line:
- line = ';' + line
-
- parts = _parseparam(';' + line)
- key = parts.__next__()
- pdict = {}
- for p in parts:
- i = p.find('=')
- if i >= 0:
- name = p[:i].strip().lower()
- value = p[i + 1:].strip()
- if len(value) >= 2 and value[0] == value[-1] == '"':
- value = value[1:-1]
- value = value.replace('\\\\', '\\').replace('\\"', '"')
- pdict[name] = value
- return key, pdict
diff --git a/httpie/output/writer.py b/httpie/output/writer.py
index 05b389eb..6f251f7c 100644
--- a/httpie/output/writer.py
+++ b/httpie/output/writer.py
@@ -5,7 +5,7 @@ from typing import IO, TextIO, Tuple, Type, Union
import requests
from ..context import Environment
-from ..models import HTTPRequest, HTTPResponse
+from ..models import HTTPRequest, HTTPResponse, HTTPMessage
from .processing import Conversion, Formatting
from .streams import (
BaseStream, BufferedPrettyStream, EncodedStream, PrettyStream, RawStream,
@@ -97,16 +97,17 @@ def build_output_stream_for_message(
with_headers: bool,
with_body: bool,
):
- stream_class, stream_kwargs = get_stream_type_and_kwargs(
- env=env,
- args=args,
- )
- message_class = {
+ message_type = {
requests.PreparedRequest: HTTPRequest,
requests.Response: HTTPResponse,
}[type(requests_message)]
+ stream_class, stream_kwargs = get_stream_type_and_kwargs(
+ env=env,
+ args=args,
+ message_type=message_type,
+ )
yield from stream_class(
- msg=message_class(requests_message),
+ msg=message_type(requests_message),
with_headers=with_headers,
with_body=with_body,
**stream_kwargs,
@@ -120,7 +121,8 @@ def build_output_stream_for_message(
def get_stream_type_and_kwargs(
env: Environment,
- args: argparse.Namespace
+ args: argparse.Namespace,
+ message_type: Type[HTTPMessage],
) -> Tuple[Type['BaseStream'], dict]:
"""Pick the right stream type and kwargs for it based on `env` and `args`.
@@ -138,8 +140,12 @@ def get_stream_type_and_kwargs(
stream_class = EncodedStream
stream_kwargs = {
'env': env,
- 'response_as': args.response_as,
}
+ if message_type is HTTPResponse:
+ stream_kwargs.update({
+ 'mime_overwrite': args.response_mime,
+ 'encoding_overwrite': args.response_charset,
+ })
if args.prettify:
stream_class = PrettyStream if args.stream else BufferedPrettyStream
stream_kwargs.update({
diff --git a/httpie/utils.py b/httpie/utils.py
index c155aac5..4112cfbd 100644
--- a/httpie/utils.py
+++ b/httpie/utils.py
@@ -191,3 +191,21 @@ def _max_age_to_expires(cookies, now):
max_age = cookie.get('max-age')
if max_age and max_age.isdigit():
cookie['expires'] = now + float(max_age)
+
+
+def parse_content_type_header(header):
+ """Borrowed from requests."""
+ tokens = header.split(';')
+ content_type, params = tokens[0].strip(), tokens[1:]
+ params_dict = {}
+ items_to_strip = "\"' "
+ for param in params:
+ param = param.strip()
+ if param:
+ key, value = param, True
+ index_of_equals = param.find("=")
+ if index_of_equals != -1:
+ key = param[:index_of_equals].strip(items_to_strip)
+ value = param[index_of_equals + 1:].strip(items_to_strip)
+ params_dict[key.lower()] = value
+ return content_type, params_dict
diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py
index cf979e5f..bc795195 100644
--- a/tests/fixtures/__init__.py
+++ b/tests/fixtures/__init__.py
@@ -1,7 +1,7 @@
"""Test data"""
from pathlib import Path
-from httpie.constants import UTF8
+from httpie.encoding import UTF8
def patharg(path):
diff --git a/tests/test_config.py b/tests/test_config.py
index 680b16b9..be19d572 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -4,7 +4,7 @@ import pytest
from _pytest.monkeypatch import MonkeyPatch
from httpie.compat import is_windows
-from httpie.constants import UTF8
+from httpie.encoding import UTF8
from httpie.config import (
Config, DEFAULT_CONFIG_DIRNAME, DEFAULT_RELATIVE_LEGACY_CONFIG_DIR,
DEFAULT_RELATIVE_XDG_CONFIG_HOME, DEFAULT_WINDOWS_CONFIG_DIR,
diff --git a/tests/test_errors.py b/tests/test_errors.py
index abbf7235..f636db82 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -41,8 +41,19 @@ def test_max_headers_no_limit(httpbin_both):
assert HTTP_OK in http('--max-headers=0', httpbin_both + '/get')
-def test_charset_argument_unknown_encoding(httpbin_both):
- with raises(LookupError) as e:
- http('--response-as', 'charset=foobar',
- 'GET', httpbin_both + '/get')
- assert 'unknown encoding: foobar' in str(e.value)
+def test_response_charset_option_unknown_encoding(httpbin_both):
+ r = http(
+ '--response-charset=foobar',
+ httpbin_both + '/get',
+ tolerate_error_exit_status=True
+ )
+ assert "'foobar' is not a supported encoding" in r.stderr
+
+
+def test_response_mime_option_unknown_encoding(httpbin_both):
+ r = http(
+ '--response-mime=foobar',
+ httpbin_both + '/get',
+ tolerate_error_exit_status=True
+ )
+ assert "'foobar' doesn’t look like a mime type" in r.stderr
diff --git a/tests/test_httpie.py b/tests/test_httpie.py
index 3ed1bb2c..a6cda1c3 100644
--- a/tests/test_httpie.py
+++ b/tests/test_httpie.py
@@ -9,7 +9,7 @@ import httpie.__main__
from .fixtures import FILE_CONTENT, FILE_PATH
from httpie.cli.exceptions import ParseError
from httpie.context import Environment
-from httpie.constants import UTF8
+from httpie.encoding import UTF8
from httpie.status import ExitStatus
from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http
diff --git a/tests/test_json.py b/tests/test_json.py
index 8d73c779..26365e28 100644
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -11,8 +11,25 @@ from httpie.utils import JsonDictPreservingDuplicateKeys
from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH
from .utils import MockEnvironment, http, URL_EXAMPLE
-TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}')
-TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float
+TEST_JSON_XXSI_PREFIXES = [
+ r")]}',\n", ")]}',",
+ 'while(1);',
+ 'for(;;)',
+ ')',
+ ']',
+ '}'
+]
+TEST_JSON_VALUES = [
+ # FIXME: missing int & float
+ {},
+ {'a': 0, 'b': 0},
+ [],
+ ['a', 'b'],
+ 'foo',
+ True,
+ False,
+ None
+]
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'
JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}'
@@ -37,15 +54,19 @@ JSON_WITH_DUPES_FORMATTED_UNSORTED = '''{
def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_data, pretty):
"""Test JSON bodies preceded by non-JSON data."""
body = data_prefix + json.dumps(json_data)
- content_type = 'application/json'
- responses.add(responses.GET, URL_EXAMPLE, body=body,
- content_type=content_type)
+ content_type = 'application/json;charset=utf8'
+ responses.add(
+ responses.GET,
+ URL_EXAMPLE,
+ body=body,
+ content_type=content_type
+ )
- colored_output = pretty in ('all', 'colors')
+ colored_output = pretty in {'all', 'colors'}
env = MockEnvironment(colors=256) if colored_output else None
- r = http('--pretty=' + pretty, URL_EXAMPLE, env=env)
+ r = http('--pretty', pretty, URL_EXAMPLE, env=env)
- indent = None if pretty in ('none', 'colors') else 4
+ indent = None if pretty in {'none', 'colors'} else 4
expected_body = data_prefix + json.dumps(json_data, indent=indent)
if colored_output:
fmt = ColorFormatter(env, format_options={'json': {'format': True, 'indent': 4}})
diff --git a/tests/test_output.py b/tests/test_output.py
index 0d0ee2f0..5a0db654 100644
--- a/tests/test_output.py
+++ b/tests/test_output.py
@@ -15,7 +15,7 @@ from httpie.cli.argtypes import (
parse_format_options,
)
from httpie.cli.definition import parser
-from httpie.constants import UTF8
+from httpie.encoding import UTF8
from httpie.output.formatters.colors import get_lexer
from httpie.status import ExitStatus
from .utils import COLOR, CRLF, HTTP_OK, MockEnvironment, http
diff --git a/tests/test_sessions.py b/tests/test_sessions.py
index 3568382d..b5b5a67a 100644
--- a/tests/test_sessions.py
+++ b/tests/test_sessions.py
@@ -7,7 +7,7 @@ from unittest import mock
import pytest
from .fixtures import FILE_PATH_ARG, UNICODE
-from httpie.constants import UTF8
+from httpie.encoding import UTF8
from httpie.plugins import AuthPlugin
from httpie.plugins.builtin import HTTPBasicAuth
from httpie.plugins.registry import plugin_manager
diff --git a/tests/test_unicode.py b/tests/test_unicode.py
index b1267592..90cbe24b 100644
--- a/tests/test_unicode.py
+++ b/tests/test_unicode.py
@@ -4,14 +4,19 @@ Various unicode handling related tests.
"""
import pytest
import responses
+from charset_normalizer.constant import TOO_SMALL_SEQUENCE
from httpie.cli.constants import PRETTY_MAP
-from httpie.constants import UTF8
+from httpie.encoding import UTF8
-from .utils import http, HTTP_OK, URL_EXAMPLE
+from .utils import http, HTTP_OK, URL_EXAMPLE, MockEnvironment, StdinBytesIO
from .fixtures import UNICODE
-ENCODINGS = [UTF8, 'windows-1250']
+
+CZECH_TEXT = 'Všichni lidé jsou si rovni. Všichni lidé jsou si rovni.'
+assert len(CZECH_TEXT) > TOO_SMALL_SEQUENCE
+CZECH_TEXT_SPECIFIC_CHARSET = 'windows-1250'
+ENCODINGS = [UTF8, CZECH_TEXT_SPECIFIC_CHARSET]
def test_unicode_headers(httpbin):
@@ -122,24 +127,28 @@ def test_unicode_digest_auth(httpbin):
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_GET_encoding_detection_from_content_type_header(encoding):
- responses.add(responses.GET,
- URL_EXAMPLE,
- body='\nFinanciën'.encode(encoding),
- content_type=f'text/xml; charset={encoding.upper()}')
+ responses.add(
+ responses.GET,
+ URL_EXAMPLE,
+ body=f'\n{CZECH_TEXT}'.encode(encoding),
+ content_type=f'text/xml; charset={encoding.upper()}'
+ )
r = http('GET', URL_EXAMPLE)
- assert 'Financiën' in r
+ assert CZECH_TEXT in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
-def test_GET_encoding_detection_from_content(encoding):
- body = f'\nFinanciën'
- responses.add(responses.GET,
- URL_EXAMPLE,
- body=body.encode(encoding),
- content_type='text/xml')
- r = http('GET', URL_EXAMPLE)
- assert 'Financiën' in r
+def test_encoding_detection_from_content(encoding):
+ body = f'\n{CZECH_TEXT}'
+ responses.add(
+ responses.GET,
+ URL_EXAMPLE,
+ body=body.encode(encoding),
+ content_type='text/xml'
+ )
+ r = http(URL_EXAMPLE)
+ assert CZECH_TEXT in r
@pytest.mark.parametrize('pretty', PRETTY_MAP.keys())
@@ -149,40 +158,45 @@ def test_GET_encoding_provided_by_option(pretty):
URL_EXAMPLE,
body='卷首'.encode('big5'),
content_type='text/plain; charset=utf-8')
- args = ('--pretty=' + pretty, 'GET', URL_EXAMPLE)
+ args = ('--pretty', pretty, URL_EXAMPLE)
# Encoding provided by Content-Type is incorrect, thus it should print something unreadable.
r = http(*args)
assert '卷首' not in r
-
- # Specifying the correct encoding, both in short & long versions, should fix it.
- r = http('--response-as', 'charset=big5', *args)
- assert '卷首' in r
- r = http('--response-as', 'text/plain; charset=big5', *args)
+ r = http('--response-charset=big5', *args)
assert '卷首' in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
-def test_GET_encoding_provided_by_empty_option_should_use_content_detection(encoding):
- body = f'\nFinanciën'
- responses.add(responses.GET,
- URL_EXAMPLE,
- body=body.encode(encoding),
- content_type='text/xml')
- r = http('--response-as', '', 'GET', URL_EXAMPLE)
- assert 'Financiën' in r
-
-
-@pytest.mark.parametrize('encoding', ENCODINGS)
-@responses.activate
-def test_POST_encoding_detection_from_content_type_header(encoding):
- responses.add(responses.POST,
- URL_EXAMPLE,
- body='Všichni lidé jsou si rovni.'.encode(encoding),
- content_type=f'text/plain; charset={encoding.upper()}')
+def test_encoding_detection_from_content_type_header(encoding):
+ responses.add(
+ responses.POST,
+ URL_EXAMPLE,
+ body=CZECH_TEXT.encode(encoding),
+ content_type=f'text/plain; charset={encoding.upper()}'
+ )
r = http('--form', 'POST', URL_EXAMPLE)
- assert 'Všichni lidé jsou si rovni.' in r
+ assert CZECH_TEXT in r
+
+
+@pytest.mark.parametrize('encoding', ENCODINGS)
+def test_request_body_content_type_charset_used(encoding):
+ body_str = CZECH_TEXT
+ body_bytes = body_str.encode(encoding)
+ if encoding != UTF8:
+ with pytest.raises(UnicodeDecodeError):
+ assert body_str != body_bytes.decode()
+ r = http(
+ '--offline',
+ URL_EXAMPLE,
+ f'Content-Type: text/plain; charset={encoding.upper()}',
+ env=MockEnvironment(
+ stdin=StdinBytesIO(body_bytes),
+ stdin_isatty=False,
+ )
+ )
+ assert body_str in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@@ -190,10 +204,10 @@ def test_POST_encoding_detection_from_content_type_header(encoding):
def test_POST_encoding_detection_from_content(encoding):
responses.add(responses.POST,
URL_EXAMPLE,
- body='Všichni lidé jsou si rovni.'.encode(encoding),
+ body=CZECH_TEXT.encode(encoding),
content_type='text/plain')
r = http('--form', 'POST', URL_EXAMPLE)
- assert 'Všichni lidé jsou si rovni.' in r
+ assert CZECH_TEXT in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@@ -202,8 +216,8 @@ def test_POST_encoding_detection_from_content(encoding):
def test_stream_encoding_detection_from_content_type_header(encoding, pretty):
responses.add(responses.GET,
URL_EXAMPLE,
- body='\nFinanciën'.encode(encoding),
+ body=f'\n{CZECH_TEXT}'.encode(encoding),
stream=True,
content_type=f'text/xml; charset={encoding.upper()}')
r = http('--pretty=' + pretty, '--stream', 'GET', URL_EXAMPLE)
- assert 'Financiën' in r
+ assert CZECH_TEXT in r
diff --git a/tests/test_xml.py b/tests/test_xml.py
index 5727c94a..c0d50f54 100644
--- a/tests/test_xml.py
+++ b/tests/test_xml.py
@@ -3,7 +3,7 @@ import sys
import pytest
import responses
-from httpie.constants import UTF8
+from httpie.encoding import UTF8
from httpie.output.formatters.xml import parse_xml, pretty_xml
from .fixtures import XML_FILES_PATH, XML_FILES_VALID, XML_FILES_INVALID
@@ -93,7 +93,7 @@ def test_content_type_from_option():
"""
responses.add(responses.GET, URL_EXAMPLE, body=XML_DATA_RAW,
content_type='text/plain')
- args = ('--response-as', 'application/xml', URL_EXAMPLE)
+ args = ('--response-mime', 'application/xml', URL_EXAMPLE)
# Ensure the option is taken into account only for responses.
# Request
@@ -114,5 +114,5 @@ def test_content_type_from_option_incomplete():
content_type='text/plain')
# The provided Content-Type is simply ignored, and so no formatting is done.
- r = http('--response-as', 'charset=utf-8', URL_EXAMPLE)
+ r = http('--response-charset', 'utf-8', URL_EXAMPLE)
assert XML_DATA_RAW in r