Fix duplicate keys preservation of JSON data (#1163)

* Fix duplicate keys preservation of JSON data

* Update issue number

* Fix type annotations

* Changes after review

* Rewording
This commit is contained in:
Mickaël Schoentgen 2021-09-21 19:07:59 +02:00 committed by GitHub
parent e6c5cd3e4b
commit d7ed45bbcd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 124 additions and 23 deletions

View File

@ -5,6 +5,7 @@ This project adheres to [Semantic Versioning](https://semver.org/).
## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased) ## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased)
- Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163))
- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130)) - Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130))
## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06) ## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06)

View File

@ -15,7 +15,7 @@ from .dicts import (
RequestQueryParamsDict, RequestQueryParamsDict,
) )
from .exceptions import ParseError from .exceptions import ParseError
from ..utils import get_content_type, load_json_preserve_order from ..utils import get_content_type, load_json_preserve_order_and_dupe_keys
class RequestItems: class RequestItems:
@ -150,6 +150,6 @@ def load_text_file(item: KeyValueArg) -> str:
def load_json(arg: KeyValueArg, contents: str) -> JSONType: def load_json(arg: KeyValueArg, contents: str) -> JSONType:
try: try:
return load_json_preserve_order(contents) return load_json_preserve_order_and_dupe_keys(contents)
except ValueError as e: except ValueError as e:
raise ParseError(f'{arg.orig!r}: {e}') raise ParseError(f'{arg.orig!r}: {e}')

View File

@ -2,6 +2,7 @@ import json
import re import re
from typing import Tuple from typing import Tuple
from ..utils import load_json_preserve_order_and_dupe_keys
from .lexers.json import PREFIX_REGEX from .lexers.json import PREFIX_REGEX
@ -11,14 +12,14 @@ def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]:
""" """
# First, the full data. # First, the full data.
try: try:
return '', json.loads(data) return '', load_json_preserve_order_and_dupe_keys(data)
except ValueError: except ValueError:
pass pass
# Then, try to find the start of the actual body. # Then, try to find the start of the actual body.
data_prefix, body = parse_prefixed_json(data) data_prefix, body = parse_prefixed_json(data)
try: try:
return data_prefix, json.loads(body) return data_prefix, load_json_preserve_order_and_dupe_keys(body)
except ValueError: except ValueError:
raise ValueError('Invalid JSON') raise ValueError('Invalid JSON')

View File

@ -1,19 +1,69 @@
import json import json
import mimetypes import mimetypes
import re
import sys
import time import time
from collections import OrderedDict from collections import OrderedDict
from http.cookiejar import parse_ns_headers from http.cookiejar import parse_ns_headers
from pprint import pformat from pprint import pformat
from typing import List, Optional, Tuple from typing import Any, List, Optional, Tuple
import re
import requests.auth import requests.auth
RE_COOKIE_SPLIT = re.compile(r', (?=[^ ;]+=)') RE_COOKIE_SPLIT = re.compile(r', (?=[^ ;]+=)')
Item = Tuple[str, Any]
Items = List[Item]
def load_json_preserve_order(s): class JsonDictPreservingDuplicateKeys(OrderedDict):
return json.loads(s, object_pairs_hook=OrderedDict) """A specialized JSON dict preserving duplicate keys.
"""
# Python versions prior to 3.8 suffer from an issue with multiple keys with the same name.
# `json.dumps(obj, indent=N, sort_keys=True)` will output sorted keys when they are unique, and
# duplicate keys will be outputted as they were defined in the original data.
# See <https://bugs.python.org/issue23493#msg400929> for the behavior change between Python versions.
SUPPORTS_SORTING = sys.version_info >= (3, 8)
def __init__(self, items: Items):
self._items = items
self._ensure_items_used()
def _ensure_items_used(self) -> None:
"""HACK: Force `json.dumps()` to use `self.items()` instead of an empty dict.
Two JSON encoders are available on CPython: pure-Python (1) and C (2) implementations.
(1) The pure-python implementation will do a simple `if not dict: return '{}'`,
and we could fake that check by implementing the `__bool__()` method.
Source:
- <https://github.com/python/cpython/blob/9d318ad/Lib/json/encoder.py#L334-L336>
(2) On the other hand, the C implementation will do a check on the number of
items contained inside the dict, using a verification on `dict->ma_used`, which
is updated only when an item is added/removed from the dict. For that case,
there is no workaround but to add an item into the dict.
Sources:
- <https://github.com/python/cpython/blob/9d318ad/Modules/_json.c#L1581-L1582>
- <https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L53>
- <https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L17-L18>
To please both implementations, we simply add one item to the dict.
"""
if self._items:
self['__hack__'] = '__hack__'
def items(self) -> Items:
"""Return all items, duplicate ones included.
"""
return self._items
def load_json_preserve_order_and_dupe_keys(s):
return json.loads(s, object_pairs_hook=JsonDictPreservingDuplicateKeys)
def repr_dict(d: dict) -> str: def repr_dict(d: dict) -> str:

View File

@ -16,6 +16,7 @@ def patharg(path):
FIXTURES_ROOT = Path(__file__).parent FIXTURES_ROOT = Path(__file__).parent
FILE_PATH = FIXTURES_ROOT / 'test.txt' FILE_PATH = FIXTURES_ROOT / 'test.txt'
JSON_FILE_PATH = FIXTURES_ROOT / 'test.json' JSON_FILE_PATH = FIXTURES_ROOT / 'test.json'
JSON_WITH_DUPE_KEYS_FILE_PATH = FIXTURES_ROOT / 'test_with_dupe_keys.json'
BIN_FILE_PATH = FIXTURES_ROOT / 'test.bin' BIN_FILE_PATH = FIXTURES_ROOT / 'test.bin'
XML_FILES_PATH = FIXTURES_ROOT / 'xmldata' XML_FILES_PATH = FIXTURES_ROOT / 'xmldata'
XML_FILES_VALID = list((XML_FILES_PATH / 'valid').glob('*_raw.xml')) XML_FILES_VALID = list((XML_FILES_PATH / 'valid').glob('*_raw.xml'))

View File

@ -0,0 +1 @@
{"key":15,"key":15,"key":3,"key":7}

View File

@ -1,20 +1,21 @@
"""CLI argument parsing related tests.""" """CLI argument parsing related tests."""
import argparse import argparse
import json
import pytest import pytest
from requests.exceptions import InvalidSchema from requests.exceptions import InvalidSchema
import httpie.cli.argparser import httpie.cli.argparser
from .fixtures import (
FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT,
JSON_FILE_PATH_ARG,
)
from httpie.status import ExitStatus
from httpie.cli import constants from httpie.cli import constants
from httpie.cli.definition import parser from httpie.cli.definition import parser
from httpie.cli.argtypes import KeyValueArg, KeyValueArgType from httpie.cli.argtypes import KeyValueArg, KeyValueArgType
from httpie.cli.requestitems import RequestItems from httpie.cli.requestitems import RequestItems
from httpie.status import ExitStatus
from httpie.utils import load_json_preserve_order_and_dupe_keys
from .fixtures import (
FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT,
JSON_FILE_PATH_ARG,
)
from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http
@ -97,17 +98,15 @@ class TestItemParsing:
# Parsed data # Parsed data
raw_json_embed = items.data.pop('raw-json-embed') raw_json_embed = items.data.pop('raw-json-embed')
assert raw_json_embed == json.loads(JSON_FILE_CONTENT) assert raw_json_embed == load_json_preserve_order_and_dupe_keys(JSON_FILE_CONTENT)
items.data['string-embed'] = items.data['string-embed'].strip() items.data['string-embed'] = items.data['string-embed'].strip()
assert dict(items.data) == { assert dict(items.data) == {
"ed": "", 'ed': '',
"string": "value", 'string': 'value',
"bool": True, 'bool': True,
"list": ["a", 1, {}, False], 'list': ['a', 1, {}, False],
"obj": { 'obj': load_json_preserve_order_and_dupe_keys('{"a": "b"}'),
"a": "b" 'string-embed': FILE_CONTENT,
},
"string-embed": FILE_CONTENT,
} }
# Parsed query string parameters # Parsed query string parameters

View File

@ -6,13 +6,29 @@ import responses
from httpie.cli.constants import PRETTY_MAP from httpie.cli.constants import PRETTY_MAP
from httpie.compat import is_windows from httpie.compat import is_windows
from httpie.output.formatters.colors import ColorFormatter from httpie.output.formatters.colors import ColorFormatter
from httpie.utils import JsonDictPreservingDuplicateKeys
from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH
from .utils import MockEnvironment, http, URL_EXAMPLE from .utils import MockEnvironment, http, URL_EXAMPLE
TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}') TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}')
TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m' TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'
JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}'
JSON_WITH_DUPES_FORMATTED_SORTED = '''{
"key": 3,
"key": 7,
"key": 15,
"key": 15
}'''
JSON_WITH_DUPES_FORMATTED_UNSORTED = '''{
"key": 15,
"key": 15,
"key": 3,
"key": 7
}'''
@pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES) @pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES)
@pytest.mark.parametrize('json_data', TEST_JSON_VALUES) @pytest.mark.parametrize('json_data', TEST_JSON_VALUES)
@ -38,3 +54,35 @@ def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_da
# meaning it was correctly handled as a whole. # meaning it was correctly handled as a whole.
assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body
assert expected_body in r assert expected_body in r
@responses.activate
def test_duplicate_keys_support_from_response():
"""JSON with duplicate keys should be handled correctly."""
responses.add(responses.GET, URL_EXAMPLE, body=JSON_WITH_DUPES_RAW,
content_type='application/json')
args = ('--pretty', 'format', URL_EXAMPLE)
# Check implicit --sorted
if JsonDictPreservingDuplicateKeys.SUPPORTS_SORTING:
r = http(*args)
assert JSON_WITH_DUPES_FORMATTED_SORTED in r
# Check --unsorted
r = http(*args, '--unsorted')
assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r
def test_duplicate_keys_support_from_input_file():
"""JSON file with duplicate keys should be handled correctly."""
args = ('--verbose', '--offline', URL_EXAMPLE,
f'@{JSON_WITH_DUPE_KEYS_FILE_PATH}')
# Check implicit --sorted
if JsonDictPreservingDuplicateKeys.SUPPORTS_SORTING:
r = http(*args)
assert JSON_WITH_DUPES_FORMATTED_SORTED in r
# Check --unsorted
r = http(*args, '--unsorted')
assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r