httpie-cli/httpie/codec.py

from typing import Union

from charset_normalizer import from_bytes

from .constants import UTF8

Bytes = Union[bytearray, bytes]


def detect_encoding(content: Bytes) -> str:
    """Detect the `content` encoding.
    Fallback to UTF-8 when no suitable encoding found.

    """
    match = from_bytes(bytes(content)).best()
    return match.encoding if match else UTF8


def decode(content: Bytes, encoding: str) -> str:
    """Decode `content` using the given `encoding`.
    If no `encoding` is provided, the best effort is to guess it from `content`.

    Unicode errors are replaced.

    """
    if not encoding:
        encoding = detect_encoding(content)
    return content.decode(encoding, 'replace')


def encode(content: str, encoding: str) -> bytes:
    """Encode `content` using the given `encoding`.

    Unicode errors are replaced.

    """
    return content.encode(encoding, 'replace')
Improve handling of prettified responses without correct content-type encoding (#1110) * Improve handling of responses without correct content-type charset * [skip ci] Minor tweaks in tests * [skip ci] Add documentation Co-authored-by: claudiatd <claudiatd@gmail.com> * Improve unknown encoding test [skip ci] * Review mime and options retrieval * Add full content-type example in help output * Simplify decoder * [skip ci] s/charset/encoding/ * Tweaks * [skip ci] Fix type annotation * [skip ci] s/charset/encoding/ * Tweaks * Fix type annoation * Improvement * Introduce `codec.encode()` * [skip ci] Tweak changelog Co-authored-by: claudiatd <claudiatd@gmail.com> 2021-09-29 20:22:19 +02:00			`from typing import Union`

			`from charset_normalizer import from_bytes`

			`from .constants import UTF8`

			`Bytes = Union[bytearray, bytes]`


			`def detect_encoding(content: Bytes) -> str:`
			"""Detect the `content` encoding.
			`Fallback to UTF-8 when no suitable encoding found.`

			`"""`
			`match = from_bytes(bytes(content)).best()`
			`return match.encoding if match else UTF8`


			`def decode(content: Bytes, encoding: str) -> str:`
			"""Decode `content` using the given `encoding`.
			If no `encoding` is provided, the best effort is to guess it from `content`.

			`Unicode errors are replaced.`

			`"""`
			`if not encoding:`
			`encoding = detect_encoding(content)`
			`return content.decode(encoding, 'replace')`


			`def encode(content: str, encoding: str) -> bytes:`
			"""Encode `content` using the given `encoding`.

			`Unicode errors are replaced.`

			`"""`
			`return content.encode(encoding, 'replace')`