diff --git a/CHANGELOG.md b/CHANGELOG.md index 847e70b5..ec56074a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ This project adheres to [Semantic Versioning](https://semver.org/). ## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased) +- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130)) + ## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06) Blog post: [What’s new in HTTPie 2.5.0](https://httpie.io/blog/httpie-2.5.0) diff --git a/httpie/output/formatters/colors.py b/httpie/output/formatters/colors.py index ff182d23..d0187337 100644 --- a/httpie/output/formatters/colors.py +++ b/httpie/output/formatters/colors.py @@ -9,10 +9,12 @@ import pygments.token from pygments.formatters.terminal import TerminalFormatter from pygments.formatters.terminal256 import Terminal256Formatter from pygments.lexer import Lexer +from pygments.lexers.data import JsonLexer from pygments.lexers.special import TextLexer from pygments.lexers.text import HttpLexer as PygmentsHttpLexer from pygments.util import ClassNotFound +from ..lexers.json import EnhancedJsonLexer from ...compat import is_windows from ...context import Environment from ...plugins import FormatterPlugin @@ -60,6 +62,7 @@ class ColorFormatter(FormatterPlugin): http_lexer = PygmentsHttpLexer() formatter = TerminalFormatter() else: + from ..lexers.http import SimplifiedHTTPLexer http_lexer = SimplifiedHTTPLexer() formatter = Terminal256Formatter( style=self.get_style_class(color_scheme) @@ -151,57 +154,14 @@ def get_lexer( else: lexer = pygments.lexers.get_lexer_by_name('json') + # Use our own JSON lexer: it supports JSON bodies preceded by non-JSON data + # as well as legit JSON bodies. + if isinstance(lexer, JsonLexer): + lexer = EnhancedJsonLexer() + return lexer -class SimplifiedHTTPLexer(pygments.lexer.RegexLexer): - """Simplified HTTP lexer for Pygments. - - It only operates on headers and provides a stronger contrast between - their names and values than the original one bundled with Pygments - (:class:`pygments.lexers.text import HttpLexer`), especially when - Solarized color scheme is used. - - """ - name = 'HTTP' - aliases = ['http'] - filenames = ['*.http'] - tokens = { - 'root': [ - # Request-Line - (r'([A-Z]+)( +)([^ ]+)( +)(HTTP)(/)(\d+\.\d+)', - pygments.lexer.bygroups( - pygments.token.Name.Function, - pygments.token.Text, - pygments.token.Name.Namespace, - pygments.token.Text, - pygments.token.Keyword.Reserved, - pygments.token.Operator, - pygments.token.Number - )), - # Response Status-Line - (r'(HTTP)(/)(\d+\.\d+)( +)(\d{3})( +)(.+)', - pygments.lexer.bygroups( - pygments.token.Keyword.Reserved, # 'HTTP' - pygments.token.Operator, # '/' - pygments.token.Number, # Version - pygments.token.Text, - pygments.token.Number, # Status code - pygments.token.Text, - pygments.token.Name.Exception, # Reason - )), - # Header - (r'(.*?)( *)(:)( *)(.+)', pygments.lexer.bygroups( - pygments.token.Name.Attribute, # Name - pygments.token.Text, - pygments.token.Operator, # Colon - pygments.token.Text, - pygments.token.String # Value - )) - ] - } - - class Solarized256Style(pygments.style.Style): """ solarized256 diff --git a/httpie/output/formatters/json.py b/httpie/output/formatters/json.py index 65cbcd19..bc6151e4 100644 --- a/httpie/output/formatters/json.py +++ b/httpie/output/formatters/json.py @@ -17,15 +17,16 @@ class JSONFormatter(FormatterPlugin): ] if (self.kwargs['explicit_json'] or any(token in mime for token in maybe_json)): + from ..utils import load_prefixed_json try: - obj = json.loads(body) + data_prefix, json_obj = load_prefixed_json(body) except ValueError: pass # Invalid JSON, ignore. else: # Indent, sort keys by name, and avoid # unicode escapes to improve readability. - body = json.dumps( - obj=obj, + body = data_prefix + json.dumps( + obj=json_obj, sort_keys=self.format_options['json']['sort_keys'], ensure_ascii=False, indent=self.format_options['json']['indent'] diff --git a/httpie/output/lexers/__init__.py b/httpie/output/lexers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/httpie/output/lexers/http.py b/httpie/output/lexers/http.py new file mode 100644 index 00000000..4c2b00d2 --- /dev/null +++ b/httpie/output/lexers/http.py @@ -0,0 +1,49 @@ +import pygments + + +class SimplifiedHTTPLexer(pygments.lexer.RegexLexer): + """Simplified HTTP lexer for Pygments. + + It only operates on headers and provides a stronger contrast between + their names and values than the original one bundled with Pygments + (:class:`pygments.lexers.text import HttpLexer`), especially when + Solarized color scheme is used. + + """ + name = 'HTTP' + aliases = ['http'] + filenames = ['*.http'] + tokens = { + 'root': [ + # Request-Line + (r'([A-Z]+)( +)([^ ]+)( +)(HTTP)(/)(\d+\.\d+)', + pygments.lexer.bygroups( + pygments.token.Name.Function, + pygments.token.Text, + pygments.token.Name.Namespace, + pygments.token.Text, + pygments.token.Keyword.Reserved, + pygments.token.Operator, + pygments.token.Number + )), + # Response Status-Line + (r'(HTTP)(/)(\d+\.\d+)( +)(\d{3})( +)(.+)', + pygments.lexer.bygroups( + pygments.token.Keyword.Reserved, # 'HTTP' + pygments.token.Operator, # '/' + pygments.token.Number, # Version + pygments.token.Text, + pygments.token.Number, # Status code + pygments.token.Text, + pygments.token.Name.Exception, # Reason + )), + # Header + (r'(.*?)( *)(:)( *)(.+)', pygments.lexer.bygroups( + pygments.token.Name.Attribute, # Name + pygments.token.Text, + pygments.token.Operator, # Colon + pygments.token.Text, + pygments.token.String # Value + )) + ] + } diff --git a/httpie/output/lexers/json.py b/httpie/output/lexers/json.py new file mode 100644 index 00000000..a235c4f3 --- /dev/null +++ b/httpie/output/lexers/json.py @@ -0,0 +1,31 @@ +import re + +from pygments.lexer import bygroups, using, RegexLexer +from pygments.lexers.data import JsonLexer +from pygments.token import Token + +PREFIX_TOKEN = Token.Error +PREFIX_REGEX = r'[^{\["]+' + + +class EnhancedJsonLexer(RegexLexer): + """ + Enhanced JSON lexer for Pygments. + + It adds support for eventual data prefixing the actual JSON body. + + """ + name = 'JSON' + flags = re.IGNORECASE | re.DOTALL + tokens = { + 'root': [ + # Eventual non-JSON data prefix followed by actual JSON body. + # FIX: data prefix + number (integer or float) are not correctly handled. + ( + fr'({PREFIX_REGEX})' + r'((?:[{\["]|true|false|null).+)', + bygroups(PREFIX_TOKEN, using(JsonLexer)) + ), + # JSON body. + (r'.+', using(JsonLexer)), + ], + } diff --git a/httpie/output/utils.py b/httpie/output/utils.py new file mode 100644 index 00000000..5ae7f603 --- /dev/null +++ b/httpie/output/utils.py @@ -0,0 +1,36 @@ +import json +import re +from typing import Tuple + +from .lexers.json import PREFIX_REGEX + + +def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]: + """Simple JSON loading from `data`. + + """ + # First, the full data. + try: + return '', json.loads(data) + except ValueError: + pass + + # Then, try to find the start of the actual body. + data_prefix, body = parse_prefixed_json(data) + try: + return data_prefix, json.loads(body) + except ValueError: + raise ValueError('Invalid JSON') + + +def parse_prefixed_json(data: str) -> Tuple[str, str]: + """Find the potential JSON body from `data`. + + Sometimes the JSON body is prefixed with a XSSI magic string, specific to the server. + Return a tuple (data prefix, actual JSON body). + + """ + matches = re.findall(PREFIX_REGEX, data) + data_prefix = matches[0] if matches else '' + body = data[len(data_prefix):] + return data_prefix, body diff --git a/tests/test_json.py b/tests/test_json.py new file mode 100644 index 00000000..4d210f23 --- /dev/null +++ b/tests/test_json.py @@ -0,0 +1,40 @@ +import json + +import pytest +import responses + +from httpie.cli.constants import PRETTY_MAP +from httpie.compat import is_windows +from httpie.output.formatters.colors import ColorFormatter + +from .utils import MockEnvironment, http, URL_EXAMPLE + +TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}') +TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float +TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m' + + +@pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES) +@pytest.mark.parametrize('json_data', TEST_JSON_VALUES) +@pytest.mark.parametrize('pretty', PRETTY_MAP.keys()) +@responses.activate +def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_data, pretty): + """Test JSON bodies preceded by non-JSON data.""" + body = data_prefix + json.dumps(json_data) + content_type = 'application/json' + responses.add(responses.GET, URL_EXAMPLE, body=body, + content_type=content_type) + + colored_output = pretty in ('all', 'colors') + env = MockEnvironment(colors=256) if colored_output else None + r = http('--pretty=' + pretty, URL_EXAMPLE, env=env) + + indent = None if pretty in ('none', 'colors') else 4 + expected_body = data_prefix + json.dumps(json_data, indent=indent) + if colored_output: + fmt = ColorFormatter(env, format_options={'json': {'format': True, 'indent': 4}}) + expected_body = fmt.format_body(expected_body, content_type) + # Check to ensure the non-JSON data prefix is colored only one time, + # meaning it was correctly handled as a whole. + assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body + assert expected_body in r