Fix encoding error with non-prettified encoded responses (#1168)

* Fix encoding error with non-prettified encoded responses Removed `--format-option response.as` an promote `--response-as`: using the format option would be misleading as it is now also used by non-prettified responses. * Encoding refactoring * split --response-as into --response-mime and --response-charset * add support for Content-Type charset for requests printed to terminal * add support charset detection for requests printed to terminal without a Content-Type charset * etc. * `test_unicode.py` → `test_encoding.py` * Drop sequence length check * Clean-up tests * [skip ci] Tweaks * Use the compatible release clause for `charset_normalizer` requirement Cf. https://www.python.org/dev/peps/pep-0440/#version-specifiers * Clean-up * Partially revert d52a4833e4 * Changelog * Tweak tests * [skip ci] Better test name * Cleanup tests and add request body charset detection * More test suite cleanups * Cleanup * Fix code style in test * Improve detect_encoding() docstring * Uniformize pytest.mark.parametrize() calls * [skip ci] Comment out TODOs (will be tackled in a specific PR) Co-authored-by: Jakub Roztocil <jakub@roztocil.co>
2025-08-16 10:18:26 +02:00 · 2021-10-06 17:27:07 +02:00
parent 7989e438d2
commit 4f1c9441c5
34 changed files with 651 additions and 574 deletions
--- a/tests/test_encoding.py
+++ b/tests/test_encoding.py
@ -0,0 +1,222 @@
+"""
+Various encoding handling related tests.
+
+"""
+import pytest
+import responses
+from charset_normalizer.constant import TOO_SMALL_SEQUENCE
+
+from httpie.cli.constants import PRETTY_MAP
+from httpie.encoding import UTF8
+
+from .utils import http, HTTP_OK, DUMMY_URL, MockEnvironment
+from .fixtures import UNICODE
+
+
+CHARSET_TEXT_PAIRS = [
+    ('big5', '卷首卷首卷首卷首卷卷首卷首卷首卷首卷首卷首卷首卷首卷首卷首卷首卷首卷首'),
+    ('windows-1250', 'Všichni lidé jsou si rovni. Všichni lidé jsou si rovni.'),
+    (UTF8, 'Všichni lidé jsou si rovni. Všichni lidé jsou si rovni.'),
+]
+
+
+def test_charset_text_pairs():
+    # Verify our test data is legit.
+    for charset, text in CHARSET_TEXT_PAIRS:
+        assert len(text) > TOO_SMALL_SEQUENCE
+        if charset != UTF8:
+            with pytest.raises(UnicodeDecodeError):
+                assert text != text.encode(charset).decode(UTF8)
+
+
+def test_unicode_headers(httpbin):
+    # httpbin doesn't interpret UFT-8 headers
+    r = http(httpbin.url + '/headers', f'Test:{UNICODE}')
+    assert HTTP_OK in r
+
+
+def test_unicode_headers_verbose(httpbin):
+    # httpbin doesn't interpret UTF-8 headers
+    r = http('--verbose', httpbin.url + '/headers', f'Test:{UNICODE}')
+    assert HTTP_OK in r
+    assert UNICODE in r
+
+
+def test_unicode_raw(httpbin):
+    r = http('--raw', f'test {UNICODE}', 'POST', httpbin.url + '/post')
+    assert HTTP_OK in r
+    assert r.json['data'] == f'test {UNICODE}'
+
+
+def test_unicode_raw_verbose(httpbin):
+    r = http('--verbose', '--raw', f'test {UNICODE}',
+             'POST', httpbin.url + '/post')
+    assert HTTP_OK in r
+    assert UNICODE in r
+
+
+def test_unicode_form_item(httpbin):
+    r = http('--form', 'POST', httpbin.url + '/post', f'test={UNICODE}')
+    assert HTTP_OK in r
+    assert r.json['form'] == {'test': UNICODE}
+
+
+def test_unicode_form_item_verbose(httpbin):
+    r = http('--verbose', '--form',
+             'POST', httpbin.url + '/post', f'test={UNICODE}')
+    assert HTTP_OK in r
+    assert UNICODE in r
+
+
+def test_unicode_json_item(httpbin):
+    r = http('--json', 'POST', httpbin.url + '/post', f'test={UNICODE}')
+    assert HTTP_OK in r
+    assert r.json['json'] == {'test': UNICODE}
+
+
+def test_unicode_json_item_verbose(httpbin):
+    r = http('--verbose', '--json',
+             'POST', httpbin.url + '/post', f'test={UNICODE}')
+    assert HTTP_OK in r
+    assert UNICODE in r
+
+
+def test_unicode_raw_json_item(httpbin):
+    r = http('--json', 'POST', httpbin.url + '/post',
+             f'test:={{ "{UNICODE}" : [ "{UNICODE}" ] }}')
+    assert HTTP_OK in r
+    assert r.json['json'] == {'test': {UNICODE: [UNICODE]}}
+
+
+def test_unicode_raw_json_item_verbose(httpbin):
+    r = http('--json', 'POST', httpbin.url + '/post',
+             f'test:={{ "{UNICODE}" : [ "{UNICODE}" ] }}')
+    assert HTTP_OK in r
+    assert r.json['json'] == {'test': {UNICODE: [UNICODE]}}
+
+
+def test_unicode_url_query_arg_item(httpbin):
+    r = http(httpbin.url + '/get', f'test=={UNICODE}')
+    assert HTTP_OK in r
+    assert r.json['args'] == {'test': UNICODE}, r
+
+
+def test_unicode_url_query_arg_item_verbose(httpbin):
+    r = http('--verbose', httpbin.url + '/get', f'test=={UNICODE}')
+    assert HTTP_OK in r
+    assert UNICODE in r
+
+
+def test_unicode_url(httpbin):
+    r = http(f'{httpbin.url}/get?test={UNICODE}')
+    assert HTTP_OK in r
+    assert r.json['args'] == {'test': UNICODE}
+
+
+def test_unicode_url_verbose(httpbin):
+    r = http('--verbose', f'{httpbin.url}/get?test={UNICODE}')
+    assert HTTP_OK in r
+    assert r.json['args'] == {'test': UNICODE}
+
+
+def test_unicode_basic_auth(httpbin):
+    # it doesn't really authenticate us because httpbin
+    # doesn't interpret the UTF-8-encoded auth
+    http('--verbose', '--auth', f'test:{UNICODE}',
+         f'{httpbin.url}/basic-auth/test/{UNICODE}')
+
+
+def test_unicode_digest_auth(httpbin):
+    # it doesn't really authenticate us because httpbin
+    # doesn't interpret the UTF-8-encoded auth
+    http('--auth-type=digest',
+         '--auth', f'test:{UNICODE}',
+         f'{httpbin.url}/digest-auth/auth/test/{UNICODE}')
+
+
+@pytest.mark.parametrize('charset, text', CHARSET_TEXT_PAIRS)
+@responses.activate
+def test_terminal_output_response_charset_detection(text, charset):
+    responses.add(
+        method=responses.POST,
+        url=DUMMY_URL,
+        body=text.encode(charset),
+        content_type='text/plain',
+    )
+    r = http('--form', 'POST', DUMMY_URL)
+    assert text in r
+
+
+@pytest.mark.parametrize('charset, text', CHARSET_TEXT_PAIRS)
+@responses.activate
+def test_terminal_output_response_content_type_charset(charset, text):
+    responses.add(
+        method=responses.POST,
+        url=DUMMY_URL,
+        body=text.encode(charset),
+        content_type=f'text/plain; charset={charset}',
+    )
+    r = http('--form', 'POST', DUMMY_URL)
+    assert text in r
+
+
+@pytest.mark.parametrize('charset, text', CHARSET_TEXT_PAIRS)
+@pytest.mark.parametrize('pretty', PRETTY_MAP.keys())
+@responses.activate
+def test_terminal_output_response_content_type_charset_with_stream(charset, text, pretty):
+    responses.add(
+        method=responses.GET,
+        url=DUMMY_URL,
+        body=f'<?xml version="1.0"?>\n<c>{text}</c>'.encode(charset),
+        stream=True,
+        content_type=f'text/xml; charset={charset.upper()}',
+    )
+    r = http('--pretty', pretty, '--stream', DUMMY_URL)
+    assert text in r
+
+
+@pytest.mark.parametrize('charset, text', CHARSET_TEXT_PAIRS)
+@pytest.mark.parametrize('pretty', PRETTY_MAP.keys())
+@responses.activate
+def test_terminal_output_response_charset_override(charset, text, pretty):
+    responses.add(
+        responses.GET,
+        DUMMY_URL,
+        body=text.encode(charset),
+        content_type='text/plain; charset=utf-8',
+    )
+    args = ['--pretty', pretty, DUMMY_URL]
+    if charset != UTF8:
+        # Content-Type charset wrong -> garbled text expected.
+        r = http(*args)
+        assert text not in r
+    r = http('--response-charset', charset, *args)
+    assert text in r
+
+
+@pytest.mark.parametrize('charset, text', CHARSET_TEXT_PAIRS)
+def test_terminal_output_request_content_type_charset(charset, text):
+    r = http(
+        '--offline',
+        DUMMY_URL,
+        f'Content-Type: text/plain; charset={charset.upper()}',
+        env=MockEnvironment(
+            stdin=text.encode(charset),
+            stdin_isatty=False,
+        ),
+    )
+    assert text in r
+
+
+@pytest.mark.parametrize('charset, text', CHARSET_TEXT_PAIRS)
+def test_terminal_output_request_charset_detection(charset, text):
+    r = http(
+        '--offline',
+        DUMMY_URL,
+        'Content-Type: text/plain',
+        env=MockEnvironment(
+            stdin=text.encode(charset),
+            stdin_isatty=False,
+        ),
+    )
+    assert text in r