Fix duplicate keys preservation of JSON data (#1163)

* Fix duplicate keys preservation of JSON data

* Update issue number

* Fix type annotations

* Changes after review

* Rewording
This commit is contained in:
Mickaël Schoentgen 2021-09-21 19:07:59 +02:00 committed by GitHub
parent e6c5cd3e4b
commit d7ed45bbcd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 124 additions and 23 deletions

View File

@ -5,6 +5,7 @@ This project adheres to [Semantic Versioning](https://semver.org/).
## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased)
- Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163))
- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130))
## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06)

View File

@ -15,7 +15,7 @@ from .dicts import (
RequestQueryParamsDict,
)
from .exceptions import ParseError
from ..utils import get_content_type, load_json_preserve_order
from ..utils import get_content_type, load_json_preserve_order_and_dupe_keys
class RequestItems:
@ -150,6 +150,6 @@ def load_text_file(item: KeyValueArg) -> str:
def load_json(arg: KeyValueArg, contents: str) -> JSONType:
try:
return load_json_preserve_order(contents)
return load_json_preserve_order_and_dupe_keys(contents)
except ValueError as e:
raise ParseError(f'{arg.orig!r}: {e}')

View File

@ -2,6 +2,7 @@ import json
import re
from typing import Tuple
from ..utils import load_json_preserve_order_and_dupe_keys
from .lexers.json import PREFIX_REGEX
@ -11,14 +12,14 @@ def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]:
"""
# First, the full data.
try:
return '', json.loads(data)
return '', load_json_preserve_order_and_dupe_keys(data)
except ValueError:
pass
# Then, try to find the start of the actual body.
data_prefix, body = parse_prefixed_json(data)
try:
return data_prefix, json.loads(body)
return data_prefix, load_json_preserve_order_and_dupe_keys(body)
except ValueError:
raise ValueError('Invalid JSON')

View File

@ -1,19 +1,69 @@
import json
import mimetypes
import re
import sys
import time
from collections import OrderedDict
from http.cookiejar import parse_ns_headers
from pprint import pformat
from typing import List, Optional, Tuple
import re
from typing import Any, List, Optional, Tuple
import requests.auth
RE_COOKIE_SPLIT = re.compile(r', (?=[^ ;]+=)')
Item = Tuple[str, Any]
Items = List[Item]
def load_json_preserve_order(s):
return json.loads(s, object_pairs_hook=OrderedDict)
class JsonDictPreservingDuplicateKeys(OrderedDict):
"""A specialized JSON dict preserving duplicate keys.
"""
# Python versions prior to 3.8 suffer from an issue with multiple keys with the same name.
# `json.dumps(obj, indent=N, sort_keys=True)` will output sorted keys when they are unique, and
# duplicate keys will be outputted as they were defined in the original data.
# See <https://bugs.python.org/issue23493#msg400929> for the behavior change between Python versions.
SUPPORTS_SORTING = sys.version_info >= (3, 8)
def __init__(self, items: Items):
self._items = items
self._ensure_items_used()
def _ensure_items_used(self) -> None:
"""HACK: Force `json.dumps()` to use `self.items()` instead of an empty dict.
Two JSON encoders are available on CPython: pure-Python (1) and C (2) implementations.
(1) The pure-python implementation will do a simple `if not dict: return '{}'`,
and we could fake that check by implementing the `__bool__()` method.
Source:
- <https://github.com/python/cpython/blob/9d318ad/Lib/json/encoder.py#L334-L336>
(2) On the other hand, the C implementation will do a check on the number of
items contained inside the dict, using a verification on `dict->ma_used`, which
is updated only when an item is added/removed from the dict. For that case,
there is no workaround but to add an item into the dict.
Sources:
- <https://github.com/python/cpython/blob/9d318ad/Modules/_json.c#L1581-L1582>
- <https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L53>
- <https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L17-L18>
To please both implementations, we simply add one item to the dict.
"""
if self._items:
self['__hack__'] = '__hack__'
def items(self) -> Items:
"""Return all items, duplicate ones included.
"""
return self._items
def load_json_preserve_order_and_dupe_keys(s):
return json.loads(s, object_pairs_hook=JsonDictPreservingDuplicateKeys)
def repr_dict(d: dict) -> str:

View File

@ -16,6 +16,7 @@ def patharg(path):
FIXTURES_ROOT = Path(__file__).parent
FILE_PATH = FIXTURES_ROOT / 'test.txt'
JSON_FILE_PATH = FIXTURES_ROOT / 'test.json'
JSON_WITH_DUPE_KEYS_FILE_PATH = FIXTURES_ROOT / 'test_with_dupe_keys.json'
BIN_FILE_PATH = FIXTURES_ROOT / 'test.bin'
XML_FILES_PATH = FIXTURES_ROOT / 'xmldata'
XML_FILES_VALID = list((XML_FILES_PATH / 'valid').glob('*_raw.xml'))

View File

@ -0,0 +1 @@
{"key":15,"key":15,"key":3,"key":7}

View File

@ -1,20 +1,21 @@
"""CLI argument parsing related tests."""
import argparse
import json
import pytest
from requests.exceptions import InvalidSchema
import httpie.cli.argparser
from .fixtures import (
FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT,
JSON_FILE_PATH_ARG,
)
from httpie.status import ExitStatus
from httpie.cli import constants
from httpie.cli.definition import parser
from httpie.cli.argtypes import KeyValueArg, KeyValueArgType
from httpie.cli.requestitems import RequestItems
from httpie.status import ExitStatus
from httpie.utils import load_json_preserve_order_and_dupe_keys
from .fixtures import (
FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT,
JSON_FILE_PATH_ARG,
)
from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http
@ -97,17 +98,15 @@ class TestItemParsing:
# Parsed data
raw_json_embed = items.data.pop('raw-json-embed')
assert raw_json_embed == json.loads(JSON_FILE_CONTENT)
assert raw_json_embed == load_json_preserve_order_and_dupe_keys(JSON_FILE_CONTENT)
items.data['string-embed'] = items.data['string-embed'].strip()
assert dict(items.data) == {
"ed": "",
"string": "value",
"bool": True,
"list": ["a", 1, {}, False],
"obj": {
"a": "b"
},
"string-embed": FILE_CONTENT,
'ed': '',
'string': 'value',
'bool': True,
'list': ['a', 1, {}, False],
'obj': load_json_preserve_order_and_dupe_keys('{"a": "b"}'),
'string-embed': FILE_CONTENT,
}
# Parsed query string parameters

View File

@ -6,13 +6,29 @@ import responses
from httpie.cli.constants import PRETTY_MAP
from httpie.compat import is_windows
from httpie.output.formatters.colors import ColorFormatter
from httpie.utils import JsonDictPreservingDuplicateKeys
from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH
from .utils import MockEnvironment, http, URL_EXAMPLE
TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}')
TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'
JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}'
JSON_WITH_DUPES_FORMATTED_SORTED = '''{
"key": 3,
"key": 7,
"key": 15,
"key": 15
}'''
JSON_WITH_DUPES_FORMATTED_UNSORTED = '''{
"key": 15,
"key": 15,
"key": 3,
"key": 7
}'''
@pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES)
@pytest.mark.parametrize('json_data', TEST_JSON_VALUES)
@ -38,3 +54,35 @@ def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_da
# meaning it was correctly handled as a whole.
assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body
assert expected_body in r
@responses.activate
def test_duplicate_keys_support_from_response():
"""JSON with duplicate keys should be handled correctly."""
responses.add(responses.GET, URL_EXAMPLE, body=JSON_WITH_DUPES_RAW,
content_type='application/json')
args = ('--pretty', 'format', URL_EXAMPLE)
# Check implicit --sorted
if JsonDictPreservingDuplicateKeys.SUPPORTS_SORTING:
r = http(*args)
assert JSON_WITH_DUPES_FORMATTED_SORTED in r
# Check --unsorted
r = http(*args, '--unsorted')
assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r
def test_duplicate_keys_support_from_input_file():
"""JSON file with duplicate keys should be handled correctly."""
args = ('--verbose', '--offline', URL_EXAMPLE,
f'@{JSON_WITH_DUPE_KEYS_FILE_PATH}')
# Check implicit --sorted
if JsonDictPreservingDuplicateKeys.SUPPORTS_SORTING:
r = http(*args)
assert JSON_WITH_DUPES_FORMATTED_SORTED in r
# Check --unsorted
r = http(*args, '--unsorted')
assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r