From d7ed45bbcda825af558c44b6b41eff4f99a092e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Schoentgen?= Date: Tue, 21 Sep 2021 19:07:59 +0200 Subject: [PATCH] Fix duplicate keys preservation of JSON data (#1163) * Fix duplicate keys preservation of JSON data * Update issue number * Fix type annotations * Changes after review * Rewording --- CHANGELOG.md | 1 + httpie/cli/requestitems.py | 4 +- httpie/output/utils.py | 5 ++- httpie/utils.py | 58 +++++++++++++++++++++++-- tests/fixtures/__init__.py | 1 + tests/fixtures/test_with_dupe_keys.json | 1 + tests/test_cli.py | 29 ++++++------- tests/test_json.py | 48 ++++++++++++++++++++ 8 files changed, 124 insertions(+), 23 deletions(-) create mode 100644 tests/fixtures/test_with_dupe_keys.json diff --git a/CHANGELOG.md b/CHANGELOG.md index ec56074a..ee0a89f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ This project adheres to [Semantic Versioning](https://semver.org/). ## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased) +- Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163)) - Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130)) ## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06) diff --git a/httpie/cli/requestitems.py b/httpie/cli/requestitems.py index 30b49562..1dd6594c 100644 --- a/httpie/cli/requestitems.py +++ b/httpie/cli/requestitems.py @@ -15,7 +15,7 @@ from .dicts import ( RequestQueryParamsDict, ) from .exceptions import ParseError -from ..utils import get_content_type, load_json_preserve_order +from ..utils import get_content_type, load_json_preserve_order_and_dupe_keys class RequestItems: @@ -150,6 +150,6 @@ def load_text_file(item: KeyValueArg) -> str: def load_json(arg: KeyValueArg, contents: str) -> JSONType: try: - return load_json_preserve_order(contents) + return load_json_preserve_order_and_dupe_keys(contents) except ValueError as e: raise ParseError(f'{arg.orig!r}: {e}') diff --git a/httpie/output/utils.py b/httpie/output/utils.py index 5ae7f603..875e8855 100644 --- a/httpie/output/utils.py +++ b/httpie/output/utils.py @@ -2,6 +2,7 @@ import json import re from typing import Tuple +from ..utils import load_json_preserve_order_and_dupe_keys from .lexers.json import PREFIX_REGEX @@ -11,14 +12,14 @@ def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]: """ # First, the full data. try: - return '', json.loads(data) + return '', load_json_preserve_order_and_dupe_keys(data) except ValueError: pass # Then, try to find the start of the actual body. data_prefix, body = parse_prefixed_json(data) try: - return data_prefix, json.loads(body) + return data_prefix, load_json_preserve_order_and_dupe_keys(body) except ValueError: raise ValueError('Invalid JSON') diff --git a/httpie/utils.py b/httpie/utils.py index 7c8a598c..c155aac5 100644 --- a/httpie/utils.py +++ b/httpie/utils.py @@ -1,19 +1,69 @@ import json import mimetypes +import re +import sys import time from collections import OrderedDict from http.cookiejar import parse_ns_headers from pprint import pformat -from typing import List, Optional, Tuple -import re +from typing import Any, List, Optional, Tuple import requests.auth RE_COOKIE_SPLIT = re.compile(r', (?=[^ ;]+=)') +Item = Tuple[str, Any] +Items = List[Item] -def load_json_preserve_order(s): - return json.loads(s, object_pairs_hook=OrderedDict) +class JsonDictPreservingDuplicateKeys(OrderedDict): + """A specialized JSON dict preserving duplicate keys. + + """ + + # Python versions prior to 3.8 suffer from an issue with multiple keys with the same name. + # `json.dumps(obj, indent=N, sort_keys=True)` will output sorted keys when they are unique, and + # duplicate keys will be outputted as they were defined in the original data. + # See for the behavior change between Python versions. + SUPPORTS_SORTING = sys.version_info >= (3, 8) + + def __init__(self, items: Items): + self._items = items + self._ensure_items_used() + + def _ensure_items_used(self) -> None: + """HACK: Force `json.dumps()` to use `self.items()` instead of an empty dict. + + Two JSON encoders are available on CPython: pure-Python (1) and C (2) implementations. + + (1) The pure-python implementation will do a simple `if not dict: return '{}'`, + and we could fake that check by implementing the `__bool__()` method. + Source: + - + + (2) On the other hand, the C implementation will do a check on the number of + items contained inside the dict, using a verification on `dict->ma_used`, which + is updated only when an item is added/removed from the dict. For that case, + there is no workaround but to add an item into the dict. + Sources: + - + - + - + + To please both implementations, we simply add one item to the dict. + + """ + if self._items: + self['__hack__'] = '__hack__' + + def items(self) -> Items: + """Return all items, duplicate ones included. + + """ + return self._items + + +def load_json_preserve_order_and_dupe_keys(s): + return json.loads(s, object_pairs_hook=JsonDictPreservingDuplicateKeys) def repr_dict(d: dict) -> str: diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index ca1f0337..cf979e5f 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -16,6 +16,7 @@ def patharg(path): FIXTURES_ROOT = Path(__file__).parent FILE_PATH = FIXTURES_ROOT / 'test.txt' JSON_FILE_PATH = FIXTURES_ROOT / 'test.json' +JSON_WITH_DUPE_KEYS_FILE_PATH = FIXTURES_ROOT / 'test_with_dupe_keys.json' BIN_FILE_PATH = FIXTURES_ROOT / 'test.bin' XML_FILES_PATH = FIXTURES_ROOT / 'xmldata' XML_FILES_VALID = list((XML_FILES_PATH / 'valid').glob('*_raw.xml')) diff --git a/tests/fixtures/test_with_dupe_keys.json b/tests/fixtures/test_with_dupe_keys.json new file mode 100644 index 00000000..480d7890 --- /dev/null +++ b/tests/fixtures/test_with_dupe_keys.json @@ -0,0 +1 @@ +{"key":15,"key":15,"key":3,"key":7} diff --git a/tests/test_cli.py b/tests/test_cli.py index 6d4998f9..f2a7260f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,20 +1,21 @@ """CLI argument parsing related tests.""" import argparse -import json import pytest from requests.exceptions import InvalidSchema import httpie.cli.argparser -from .fixtures import ( - FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT, - JSON_FILE_PATH_ARG, -) -from httpie.status import ExitStatus from httpie.cli import constants from httpie.cli.definition import parser from httpie.cli.argtypes import KeyValueArg, KeyValueArgType from httpie.cli.requestitems import RequestItems +from httpie.status import ExitStatus +from httpie.utils import load_json_preserve_order_and_dupe_keys + +from .fixtures import ( + FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT, + JSON_FILE_PATH_ARG, +) from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http @@ -97,17 +98,15 @@ class TestItemParsing: # Parsed data raw_json_embed = items.data.pop('raw-json-embed') - assert raw_json_embed == json.loads(JSON_FILE_CONTENT) + assert raw_json_embed == load_json_preserve_order_and_dupe_keys(JSON_FILE_CONTENT) items.data['string-embed'] = items.data['string-embed'].strip() assert dict(items.data) == { - "ed": "", - "string": "value", - "bool": True, - "list": ["a", 1, {}, False], - "obj": { - "a": "b" - }, - "string-embed": FILE_CONTENT, + 'ed': '', + 'string': 'value', + 'bool': True, + 'list': ['a', 1, {}, False], + 'obj': load_json_preserve_order_and_dupe_keys('{"a": "b"}'), + 'string-embed': FILE_CONTENT, } # Parsed query string parameters diff --git a/tests/test_json.py b/tests/test_json.py index 4d210f23..8d73c779 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -6,13 +6,29 @@ import responses from httpie.cli.constants import PRETTY_MAP from httpie.compat import is_windows from httpie.output.formatters.colors import ColorFormatter +from httpie.utils import JsonDictPreservingDuplicateKeys +from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH from .utils import MockEnvironment, http, URL_EXAMPLE TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}') TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m' +JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}' +JSON_WITH_DUPES_FORMATTED_SORTED = '''{ + "key": 3, + "key": 7, + "key": 15, + "key": 15 +}''' +JSON_WITH_DUPES_FORMATTED_UNSORTED = '''{ + "key": 15, + "key": 15, + "key": 3, + "key": 7 +}''' + @pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES) @pytest.mark.parametrize('json_data', TEST_JSON_VALUES) @@ -38,3 +54,35 @@ def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_da # meaning it was correctly handled as a whole. assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body assert expected_body in r + + +@responses.activate +def test_duplicate_keys_support_from_response(): + """JSON with duplicate keys should be handled correctly.""" + responses.add(responses.GET, URL_EXAMPLE, body=JSON_WITH_DUPES_RAW, + content_type='application/json') + args = ('--pretty', 'format', URL_EXAMPLE) + + # Check implicit --sorted + if JsonDictPreservingDuplicateKeys.SUPPORTS_SORTING: + r = http(*args) + assert JSON_WITH_DUPES_FORMATTED_SORTED in r + + # Check --unsorted + r = http(*args, '--unsorted') + assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r + + +def test_duplicate_keys_support_from_input_file(): + """JSON file with duplicate keys should be handled correctly.""" + args = ('--verbose', '--offline', URL_EXAMPLE, + f'@{JSON_WITH_DUPE_KEYS_FILE_PATH}') + + # Check implicit --sorted + if JsonDictPreservingDuplicateKeys.SUPPORTS_SORTING: + r = http(*args) + assert JSON_WITH_DUPES_FORMATTED_SORTED in r + + # Check --unsorted + r = http(*args, '--unsorted') + assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r