httpie-cli/httpie/output/utils.py

import json
import re
from typing import Tuple

from ..utils import load_json_preserve_order_and_dupe_keys
from .lexers.json import PREFIX_REGEX


def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]:
    """Simple JSON loading from `data`.

    """
    # First, the full data.
    try:
        return '', load_json_preserve_order_and_dupe_keys(data)
    except ValueError:
        pass

    # Then, try to find the start of the actual body.
    data_prefix, body = parse_prefixed_json(data)
    try:
        return data_prefix, load_json_preserve_order_and_dupe_keys(body)
    except ValueError:
        raise ValueError('Invalid JSON')


def parse_prefixed_json(data: str) -> Tuple[str, str]:
    """Find the potential JSON body from `data`.

    Sometimes the JSON body is prefixed with a XSSI magic string, specific to the server.
    Return a tuple (data prefix, actual JSON body).

    """
    matches = re.findall(PREFIX_REGEX, data)
    data_prefix = matches[0] if matches else ''
    body = data[len(data_prefix):]
    return data_prefix, body


def parse_header_content_type(line):
    """Parse a Content-Type like header.
    Return the main Content-Type and a dictionary of options.
        >>> parse_header_content_type('application/xml; charset=utf-8')
        ('application/xml', {'charset': 'utf-8'})
        >>> parse_header_content_type('application/xml; charset = utf-8')
        ('application/xml', {'charset': 'utf-8'})
        >>> parse_header_content_type('application/html+xml;ChArSeT="UTF-8"')
        ('application/html+xml', {'charset': 'UTF-8'})
        >>> parse_header_content_type('application/xml')
        ('application/xml', {})
        >>> parse_header_content_type(';charset=utf-8')
        ('', {'charset': 'utf-8'})
        >>> parse_header_content_type('charset=utf-8')
        ('', {'charset': 'utf-8'})
        >>> parse_header_content_type('multipart/mixed; boundary="gc0pJq0M:08jU534c0p"')
        ('multipart/mixed', {'boundary': 'gc0pJq0M:08jU534c0p'})
        >>> parse_header_content_type('Message/Partial; number=3; total=3; id="oc=jpbe0M2Yt4s@foo.com"')
        ('Message/Partial', {'number': '3', 'total': '3', 'id': 'oc=jpbe0M2Yt4s@foo.com'})
    """
    # Source: https://github.com/python/cpython/blob/bb3e0c2/Lib/cgi.py#L230

    def _parseparam(s: str):
        # Source: https://github.com/python/cpython/blob/bb3e0c2/Lib/cgi.py#L218
        while s[:1] == ';':
            s = s[1:]
            end = s.find(';')
            while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
                end = s.find(';', end + 1)
            if end < 0:
                end = len(s)
            f = s[:end]
            yield f.strip()
            s = s[end:]

    # Special case: 'key=value' only (without starting with ';').
    if ';' not in line and '=' in line:
        line = ';' + line

    parts = _parseparam(';' + line)
    key = parts.__next__()
    pdict = {}
    for p in parts:
        i = p.find('=')
        if i >= 0:
            name = p[:i].strip().lower()
            value = p[i + 1:].strip()
            if len(value) >= 2 and value[0] == value[-1] == '"':
                value = value[1:-1]
                value = value.replace('\\\\', '\\').replace('\\"', '"')
            pdict[name] = value
    return key, pdict