forked from extern/httpie-cli
Improve JSON output when there is leading data before the actual JSON body (#1130)
In some special cases, to prevent against Cross Site Script Inclusion (XSSI) attacks, the JSON response body starts with a magic prefix line that must be stripped before feeding the rest of the response body to the JSON parser. Such prefix is now simply ignored from the parser but still printed in the terminal. * Fix Windows tests
This commit is contained in:
parent
273134123a
commit
e6c5cd3e4b
@ -5,6 +5,8 @@ This project adheres to [Semantic Versioning](https://semver.org/).
|
||||
|
||||
## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased)
|
||||
|
||||
- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130))
|
||||
|
||||
## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06)
|
||||
|
||||
Blog post: [What’s new in HTTPie 2.5.0](https://httpie.io/blog/httpie-2.5.0)
|
||||
|
@ -9,10 +9,12 @@ import pygments.token
|
||||
from pygments.formatters.terminal import TerminalFormatter
|
||||
from pygments.formatters.terminal256 import Terminal256Formatter
|
||||
from pygments.lexer import Lexer
|
||||
from pygments.lexers.data import JsonLexer
|
||||
from pygments.lexers.special import TextLexer
|
||||
from pygments.lexers.text import HttpLexer as PygmentsHttpLexer
|
||||
from pygments.util import ClassNotFound
|
||||
|
||||
from ..lexers.json import EnhancedJsonLexer
|
||||
from ...compat import is_windows
|
||||
from ...context import Environment
|
||||
from ...plugins import FormatterPlugin
|
||||
@ -60,6 +62,7 @@ class ColorFormatter(FormatterPlugin):
|
||||
http_lexer = PygmentsHttpLexer()
|
||||
formatter = TerminalFormatter()
|
||||
else:
|
||||
from ..lexers.http import SimplifiedHTTPLexer
|
||||
http_lexer = SimplifiedHTTPLexer()
|
||||
formatter = Terminal256Formatter(
|
||||
style=self.get_style_class(color_scheme)
|
||||
@ -151,57 +154,14 @@ def get_lexer(
|
||||
else:
|
||||
lexer = pygments.lexers.get_lexer_by_name('json')
|
||||
|
||||
# Use our own JSON lexer: it supports JSON bodies preceded by non-JSON data
|
||||
# as well as legit JSON bodies.
|
||||
if isinstance(lexer, JsonLexer):
|
||||
lexer = EnhancedJsonLexer()
|
||||
|
||||
return lexer
|
||||
|
||||
|
||||
class SimplifiedHTTPLexer(pygments.lexer.RegexLexer):
|
||||
"""Simplified HTTP lexer for Pygments.
|
||||
|
||||
It only operates on headers and provides a stronger contrast between
|
||||
their names and values than the original one bundled with Pygments
|
||||
(:class:`pygments.lexers.text import HttpLexer`), especially when
|
||||
Solarized color scheme is used.
|
||||
|
||||
"""
|
||||
name = 'HTTP'
|
||||
aliases = ['http']
|
||||
filenames = ['*.http']
|
||||
tokens = {
|
||||
'root': [
|
||||
# Request-Line
|
||||
(r'([A-Z]+)( +)([^ ]+)( +)(HTTP)(/)(\d+\.\d+)',
|
||||
pygments.lexer.bygroups(
|
||||
pygments.token.Name.Function,
|
||||
pygments.token.Text,
|
||||
pygments.token.Name.Namespace,
|
||||
pygments.token.Text,
|
||||
pygments.token.Keyword.Reserved,
|
||||
pygments.token.Operator,
|
||||
pygments.token.Number
|
||||
)),
|
||||
# Response Status-Line
|
||||
(r'(HTTP)(/)(\d+\.\d+)( +)(\d{3})( +)(.+)',
|
||||
pygments.lexer.bygroups(
|
||||
pygments.token.Keyword.Reserved, # 'HTTP'
|
||||
pygments.token.Operator, # '/'
|
||||
pygments.token.Number, # Version
|
||||
pygments.token.Text,
|
||||
pygments.token.Number, # Status code
|
||||
pygments.token.Text,
|
||||
pygments.token.Name.Exception, # Reason
|
||||
)),
|
||||
# Header
|
||||
(r'(.*?)( *)(:)( *)(.+)', pygments.lexer.bygroups(
|
||||
pygments.token.Name.Attribute, # Name
|
||||
pygments.token.Text,
|
||||
pygments.token.Operator, # Colon
|
||||
pygments.token.Text,
|
||||
pygments.token.String # Value
|
||||
))
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
class Solarized256Style(pygments.style.Style):
|
||||
"""
|
||||
solarized256
|
||||
|
@ -17,15 +17,16 @@ class JSONFormatter(FormatterPlugin):
|
||||
]
|
||||
if (self.kwargs['explicit_json']
|
||||
or any(token in mime for token in maybe_json)):
|
||||
from ..utils import load_prefixed_json
|
||||
try:
|
||||
obj = json.loads(body)
|
||||
data_prefix, json_obj = load_prefixed_json(body)
|
||||
except ValueError:
|
||||
pass # Invalid JSON, ignore.
|
||||
else:
|
||||
# Indent, sort keys by name, and avoid
|
||||
# unicode escapes to improve readability.
|
||||
body = json.dumps(
|
||||
obj=obj,
|
||||
body = data_prefix + json.dumps(
|
||||
obj=json_obj,
|
||||
sort_keys=self.format_options['json']['sort_keys'],
|
||||
ensure_ascii=False,
|
||||
indent=self.format_options['json']['indent']
|
||||
|
0
httpie/output/lexers/__init__.py
Normal file
0
httpie/output/lexers/__init__.py
Normal file
49
httpie/output/lexers/http.py
Normal file
49
httpie/output/lexers/http.py
Normal file
@ -0,0 +1,49 @@
|
||||
import pygments
|
||||
|
||||
|
||||
class SimplifiedHTTPLexer(pygments.lexer.RegexLexer):
|
||||
"""Simplified HTTP lexer for Pygments.
|
||||
|
||||
It only operates on headers and provides a stronger contrast between
|
||||
their names and values than the original one bundled with Pygments
|
||||
(:class:`pygments.lexers.text import HttpLexer`), especially when
|
||||
Solarized color scheme is used.
|
||||
|
||||
"""
|
||||
name = 'HTTP'
|
||||
aliases = ['http']
|
||||
filenames = ['*.http']
|
||||
tokens = {
|
||||
'root': [
|
||||
# Request-Line
|
||||
(r'([A-Z]+)( +)([^ ]+)( +)(HTTP)(/)(\d+\.\d+)',
|
||||
pygments.lexer.bygroups(
|
||||
pygments.token.Name.Function,
|
||||
pygments.token.Text,
|
||||
pygments.token.Name.Namespace,
|
||||
pygments.token.Text,
|
||||
pygments.token.Keyword.Reserved,
|
||||
pygments.token.Operator,
|
||||
pygments.token.Number
|
||||
)),
|
||||
# Response Status-Line
|
||||
(r'(HTTP)(/)(\d+\.\d+)( +)(\d{3})( +)(.+)',
|
||||
pygments.lexer.bygroups(
|
||||
pygments.token.Keyword.Reserved, # 'HTTP'
|
||||
pygments.token.Operator, # '/'
|
||||
pygments.token.Number, # Version
|
||||
pygments.token.Text,
|
||||
pygments.token.Number, # Status code
|
||||
pygments.token.Text,
|
||||
pygments.token.Name.Exception, # Reason
|
||||
)),
|
||||
# Header
|
||||
(r'(.*?)( *)(:)( *)(.+)', pygments.lexer.bygroups(
|
||||
pygments.token.Name.Attribute, # Name
|
||||
pygments.token.Text,
|
||||
pygments.token.Operator, # Colon
|
||||
pygments.token.Text,
|
||||
pygments.token.String # Value
|
||||
))
|
||||
]
|
||||
}
|
31
httpie/output/lexers/json.py
Normal file
31
httpie/output/lexers/json.py
Normal file
@ -0,0 +1,31 @@
|
||||
import re
|
||||
|
||||
from pygments.lexer import bygroups, using, RegexLexer
|
||||
from pygments.lexers.data import JsonLexer
|
||||
from pygments.token import Token
|
||||
|
||||
PREFIX_TOKEN = Token.Error
|
||||
PREFIX_REGEX = r'[^{\["]+'
|
||||
|
||||
|
||||
class EnhancedJsonLexer(RegexLexer):
|
||||
"""
|
||||
Enhanced JSON lexer for Pygments.
|
||||
|
||||
It adds support for eventual data prefixing the actual JSON body.
|
||||
|
||||
"""
|
||||
name = 'JSON'
|
||||
flags = re.IGNORECASE | re.DOTALL
|
||||
tokens = {
|
||||
'root': [
|
||||
# Eventual non-JSON data prefix followed by actual JSON body.
|
||||
# FIX: data prefix + number (integer or float) are not correctly handled.
|
||||
(
|
||||
fr'({PREFIX_REGEX})' + r'((?:[{\["]|true|false|null).+)',
|
||||
bygroups(PREFIX_TOKEN, using(JsonLexer))
|
||||
),
|
||||
# JSON body.
|
||||
(r'.+', using(JsonLexer)),
|
||||
],
|
||||
}
|
36
httpie/output/utils.py
Normal file
36
httpie/output/utils.py
Normal file
@ -0,0 +1,36 @@
|
||||
import json
|
||||
import re
|
||||
from typing import Tuple
|
||||
|
||||
from .lexers.json import PREFIX_REGEX
|
||||
|
||||
|
||||
def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]:
|
||||
"""Simple JSON loading from `data`.
|
||||
|
||||
"""
|
||||
# First, the full data.
|
||||
try:
|
||||
return '', json.loads(data)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Then, try to find the start of the actual body.
|
||||
data_prefix, body = parse_prefixed_json(data)
|
||||
try:
|
||||
return data_prefix, json.loads(body)
|
||||
except ValueError:
|
||||
raise ValueError('Invalid JSON')
|
||||
|
||||
|
||||
def parse_prefixed_json(data: str) -> Tuple[str, str]:
|
||||
"""Find the potential JSON body from `data`.
|
||||
|
||||
Sometimes the JSON body is prefixed with a XSSI magic string, specific to the server.
|
||||
Return a tuple (data prefix, actual JSON body).
|
||||
|
||||
"""
|
||||
matches = re.findall(PREFIX_REGEX, data)
|
||||
data_prefix = matches[0] if matches else ''
|
||||
body = data[len(data_prefix):]
|
||||
return data_prefix, body
|
40
tests/test_json.py
Normal file
40
tests/test_json.py
Normal file
@ -0,0 +1,40 @@
|
||||
import json
|
||||
|
||||
import pytest
|
||||
import responses
|
||||
|
||||
from httpie.cli.constants import PRETTY_MAP
|
||||
from httpie.compat import is_windows
|
||||
from httpie.output.formatters.colors import ColorFormatter
|
||||
|
||||
from .utils import MockEnvironment, http, URL_EXAMPLE
|
||||
|
||||
TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}')
|
||||
TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float
|
||||
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES)
|
||||
@pytest.mark.parametrize('json_data', TEST_JSON_VALUES)
|
||||
@pytest.mark.parametrize('pretty', PRETTY_MAP.keys())
|
||||
@responses.activate
|
||||
def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_data, pretty):
|
||||
"""Test JSON bodies preceded by non-JSON data."""
|
||||
body = data_prefix + json.dumps(json_data)
|
||||
content_type = 'application/json'
|
||||
responses.add(responses.GET, URL_EXAMPLE, body=body,
|
||||
content_type=content_type)
|
||||
|
||||
colored_output = pretty in ('all', 'colors')
|
||||
env = MockEnvironment(colors=256) if colored_output else None
|
||||
r = http('--pretty=' + pretty, URL_EXAMPLE, env=env)
|
||||
|
||||
indent = None if pretty in ('none', 'colors') else 4
|
||||
expected_body = data_prefix + json.dumps(json_data, indent=indent)
|
||||
if colored_output:
|
||||
fmt = ColorFormatter(env, format_options={'json': {'format': True, 'indent': 4}})
|
||||
expected_body = fmt.format_body(expected_body, content_type)
|
||||
# Check to ensure the non-JSON data prefix is colored only one time,
|
||||
# meaning it was correctly handled as a whole.
|
||||
assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body
|
||||
assert expected_body in r
|
Loading…
Reference in New Issue
Block a user