From 52e46beddaa9dccadb85378c76eaaece1e399a7e Mon Sep 17 00:00:00 2001 From: Jakub Roztocil Date: Wed, 1 Aug 2012 23:21:52 +0200 Subject: [PATCH] Take advantage of streaming. It's now possible to download huge files with HTTPie, and it's often faster than curl and wget! --- README.rst | 3 +- httpie/core.py | 144 ++++++++++++++++++++++------------------- httpie/input.py | 6 +- httpie/models.py | 7 ++ httpie/output.py | 42 ++++++------ tests/tests.py | 165 ++++++++++++++++++++++------------------------- 6 files changed, 184 insertions(+), 183 deletions(-) diff --git a/README.rst b/README.rst index 87f8a72e..475f1fe5 100644 --- a/README.rst +++ b/README.rst @@ -183,7 +183,7 @@ Note that when the **output is redirected** (like the examples above), HTTPie applies a different set of defaults than for a console output. Namely, colors aren't used (unless ``--pretty`` is set) and only the response body is printed (unless ``--print`` options specified). It is a convenience -that allows for things like the one above or downloading (smallish) binary +that allows for things like the one above or downloading binary files without having to set any flags: .. code-block:: shell @@ -373,6 +373,7 @@ Changelog ========= * `0.2.7dev`_ + * Support for efficient large file downloads. * Response body is fetched only when needed (e.g., not with ``--headers``). * Updated Solarized color scheme. * Windows: Added ``--output FILE`` to store output into a file diff --git a/httpie/core.py b/httpie/core.py index 73d51789..43382f4e 100644 --- a/httpie/core.py +++ b/httpie/core.py @@ -16,7 +16,7 @@ import requests.auth from requests.compat import str from .models import HTTPRequest, HTTPResponse, Environment -from .output import OutputProcessor, format +from .output import OutputProcessor, formatted_stream from .input import (OUT_REQ_BODY, OUT_REQ_HEAD, OUT_RESP_HEAD, OUT_RESP_BODY) from .cli import parser @@ -51,45 +51,35 @@ def get_response(args, env): # the `Content-Type` for us. args.headers['Content-Type'] = FORM - try: - credentials = None - if args.auth: - credentials = { - 'basic': requests.auth.HTTPBasicAuth, - 'digest': requests.auth.HTTPDigestAuth, - }[args.auth_type](args.auth.key, args.auth.value) + credentials = None + if args.auth: + credentials = { + 'basic': requests.auth.HTTPBasicAuth, + 'digest': requests.auth.HTTPDigestAuth, + }[args.auth_type](args.auth.key, args.auth.value) - if not (args.url.startswith(HTTP) or args.url.startswith(HTTPS)): - scheme = HTTPS if env.progname == 'https' else HTTP - url = scheme + args.url - else: - url = args.url + if not (args.url.startswith(HTTP) or args.url.startswith(HTTPS)): + scheme = HTTPS if env.progname == 'https' else HTTP + url = scheme + args.url + else: + url = args.url - return requests.request( - method=args.method.lower(), - url=url, - headers=args.headers, - data=args.data, - verify={'yes': True, 'no': False}.get(args.verify, args.verify), - timeout=args.timeout, - auth=credentials, - proxies=dict((p.key, p.value) for p in args.proxy), - files=args.files, - allow_redirects=args.allow_redirects, - params=args.params, - ) - - except (KeyboardInterrupt, SystemExit): - env.stderr.write('\n') - sys.exit(1) - except Exception as e: - if args.debug: - raise - env.stderr.write(str(repr(e) + '\n')) - sys.exit(1) + return requests.request( + method=args.method.lower(), + url=url, + headers=args.headers, + data=args.data, + verify={'yes': True, 'no': False}.get(args.verify, args.verify), + timeout=args.timeout, + auth=credentials, + proxies=dict((p.key, p.value) for p in args.proxy), + files=args.files, + allow_redirects=args.allow_redirects, + params=args.params, + ) -def get_output(args, env, request, response): +def output_stream(args, env, request, response): """Format parts of the `request`-`response` exchange according to `args` and `env` and return `bytes`. @@ -99,32 +89,38 @@ def get_output(args, env, request, response): prettifier = (OutputProcessor(env, pygments_style=args.style) if args.prettify else None) - if (OUT_REQ_HEAD in args.output_options - or OUT_REQ_BODY in args.output_options): - exchange.append(format( + with_request = (OUT_REQ_HEAD in args.output_options + or OUT_REQ_BODY in args.output_options) + with_response = (OUT_RESP_HEAD in args.output_options + or OUT_RESP_BODY in args.output_options) + + if with_request: + request_iter = formatted_stream( msg=HTTPRequest(request), env=env, prettifier=prettifier, with_headers=OUT_REQ_HEAD in args.output_options, - with_body=OUT_REQ_BODY in args.output_options - )) + with_body=OUT_REQ_BODY in args.output_options) - if (OUT_RESP_HEAD in args.output_options - or OUT_RESP_BODY in args.output_options): - exchange.append(format( + for chunk in request_iter: + yield chunk + + if with_response: + yield b'\n\n\n' + + if with_response: + response_iter = formatted_stream( msg=HTTPResponse(response), env=env, prettifier=prettifier, with_headers=OUT_RESP_HEAD in args.output_options, with_body=OUT_RESP_BODY in args.output_options) - ) - output = b'\n\n\n'.join(exchange) + for chunk in response_iter: + yield chunk if env.stdout_isatty: - output += b'\n\n' - - return output + yield b'\n\n' def get_exist_status(code, allow_redirects=False): @@ -155,25 +151,37 @@ def main(args=sys.argv[1:], env=Environment()): ' Please use `--output FILE\' instead.\n') return 1 - args = parser.parse_args(args=args, env=env) - - response = get_response(args, env) - - status = 0 - - if args.check_status: - status = get_exist_status(response.status_code, - args.allow_redirects) - if status and not env.stdout_isatty: - err = 'http error: %s %s\n' % ( - response.raw.status, response.raw.reason) - env.stderr.write(err) - - output = get_output(args, env, response.request, response) - try: - env.stdout.buffer.write(output) - except AttributeError: - env.stdout.write(output) + args = parser.parse_args(args=args, env=env) + response = get_response(args, env) + status = 0 + + if args.check_status: + status = get_exist_status(response.status_code, + args.allow_redirects) + if status and not env.stdout_isatty: + err = 'http error: %s %s\n' % ( + response.raw.status, response.raw.reason) + env.stderr.write(err) + + try: + # We are writing bytes so we use buffer on Python 3 + buffer = env.stdout.buffer + except AttributeError: + buffer = env.stdout + + for chunk in output_stream(args, env, response.request, response): + buffer.write(chunk) + if env.stdout_isatty: + env.stdout.flush() + + except (KeyboardInterrupt, SystemExit): + env.stderr.write('\n') + return 1 + except Exception as e: + if '--debug' in args: + raise + env.stderr.write(str(repr(e) + '\n')) + return 1 return status diff --git a/httpie/input.py b/httpie/input.py index b73d70db..9c524427 100644 --- a/httpie/input.py +++ b/httpie/input.py @@ -8,6 +8,7 @@ import json import argparse import mimetypes import getpass +from io import BytesIO try: from collections import OrderedDict @@ -424,8 +425,9 @@ def parse_items(items, data=None, headers=None, files=None, params=None): target = params elif item.sep == SEP_FILES: try: - value = (os.path.basename(value), - open(os.path.expanduser(value), 'rb')) + with open(os.path.expanduser(value), 'rb') as f: + value = (os.path.basename(value), + BytesIO(f.read())) except IOError as e: raise ParseError( 'Invalid argument "%s": %s' % (item.orig, e)) diff --git a/httpie/models.py b/httpie/models.py index ebb5a546..316a46fc 100644 --- a/httpie/models.py +++ b/httpie/models.py @@ -57,6 +57,10 @@ class HTTPMessage(object): class HTTPResponse(HTTPMessage): """A `requests.models.Response` wrapper.""" + def __iter__(self): + mb = 1024 * 1000 + return self._orig.iter_content(chunk_size=2 * mb) + @property def line(self): """Return Status-Line""" @@ -85,6 +89,9 @@ class HTTPResponse(HTTPMessage): class HTTPRequest(HTTPMessage): """A `requests.models.Request` wrapper.""" + def __iter__(self): + yield self.body + @property def line(self): """Return Request-Line""" diff --git a/httpie/output.py b/httpie/output.py index ce594b2d..fda8549a 100644 --- a/httpie/output.py +++ b/httpie/output.py @@ -26,11 +26,12 @@ BINARY_SUPPRESSED_NOTICE = ( ) -def format(msg, prettifier=None, with_headers=True, with_body=True, - env=Environment()): - """Return `bytes` representation of a `models.HTTPMessage`. +def formatted_stream(msg, prettifier=None, with_headers=True, with_body=True, + env=Environment()): + """Return an iterator yielding `bytes` representing `msg` + (a `models.HTTPMessage` subclass). - Sometimes the body contains binary data so we always return `bytes`. + The body can be binary so we always yield `bytes`. If `prettifier` is set or the output is a terminal then a binary body is not included in the output and is replaced with notice. @@ -41,7 +42,6 @@ def format(msg, prettifier=None, with_headers=True, with_body=True, then we prefer readability over precision. """ - # Output encoding. if env.stdout_isatty: # Use encoding suitable for the terminal. Unsupported characters @@ -59,46 +59,42 @@ def format(msg, prettifier=None, with_headers=True, with_body=True, if prettifier: env.init_colors() - #noinspection PyArgumentList - output = bytearray() - if with_headers: headers = '\n'.join([msg.line, msg.headers]) if prettifier: headers = prettifier.process_headers(headers) - output.extend( - headers.encode(output_encoding, errors).strip()) + yield headers.encode(output_encoding, errors).strip() - if with_body and msg.body: - output.extend(b'\n\n') + if with_body: - if with_body and msg.body: - - body = msg.body + prefix = b'\n\n' if with_headers else None if not (env.stdout_isatty or prettifier): # Verbatim body even if it's binary. - pass - else: + for body_chunk in msg: + if prefix: + yield prefix + prefix = None + yield body_chunk + elif msg.body: try: - body = body.decode(msg.encoding) + body = msg.body.decode(msg.encoding) except UnicodeDecodeError: # Suppress binary data. body = BINARY_SUPPRESSED_NOTICE.encode(output_encoding) if not with_headers: - output.extend(b'\n') + yield b'\n' else: if prettifier and msg.content_type: body = prettifier.process_body( body, msg.content_type).strip() body = body.encode(output_encoding, errors) - - output.extend(body) - - return bytes(output) + if prefix: + yield prefix + yield body class HTTPLexer(lexer.RegexLexer): diff --git a/tests/tests.py b/tests/tests.py index b0d5d79c..45b9eed1 100755 --- a/tests/tests.py +++ b/tests/tests.py @@ -22,9 +22,9 @@ To make it run faster and offline you can:: import os import sys import json -import tempfile import unittest import argparse +import tempfile try: from urllib.request import urlopen except ImportError: @@ -43,7 +43,7 @@ sys.path.insert(0, os.path.realpath(os.path.join(TESTS_ROOT, '..'))) from httpie import input from httpie.models import Environment -from httpie.core import main, get_output +from httpie.core import main, output_stream from httpie.output import BINARY_SUPPRESSED_NOTICE from httpie.input import ParseError @@ -104,42 +104,43 @@ def http(*args, **kwargs): except (Exception, SystemExit) as e: sys.stderr.write(stderr.read()) raise - - stdout.seek(0) - stderr.seek(0) - - output = stdout.read() - - try: - #noinspection PyArgumentList - r = StrResponse(output.decode('utf8')) - except UnicodeDecodeError: - #noinspection PyArgumentList - r = BytesResponse(output) else: - if TERMINAL_COLOR_PRESENCE_CHECK not in r: - # De-serialize JSON body if possible. - if r.strip().startswith('{'): - #noinspection PyTypeChecker - r.json = json.loads(r) - elif r.count('Content-Type:') == 1 and 'application/json' in r: - try: - j = r.strip()[r.strip().rindex('\n\n'):] - except ValueError: - pass - else: + stdout.seek(0) + stderr.seek(0) + + output = stdout.read() + + try: + #noinspection PyArgumentList + r = StrResponse(output.decode('utf8')) + except UnicodeDecodeError: + #noinspection PyArgumentList + r = BytesResponse(output) + else: + if TERMINAL_COLOR_PRESENCE_CHECK not in r: + # De-serialize JSON body if possible. + if r.strip().startswith('{'): + #noinspection PyTypeChecker + r.json = json.loads(r) + elif r.count('Content-Type:') == 1 and 'application/json' in r: try: - r.json = json.loads(j) + j = r.strip()[r.strip().rindex('\n\n'):] except ValueError: pass + else: + try: + r.json = json.loads(j) + except ValueError: + pass - r.stderr = stderr.read() - r.exit_status = exit_status + r.stderr = stderr.read() + r.exit_status = exit_status + return r + finally: + stdout.close() + stderr.close() - stdout.close() - stderr.close() - return r class BaseTestCase(unittest.TestCase): @@ -591,18 +592,19 @@ class MultipartFormDataFileUploadTest(BaseTestCase): class BinaryRequestDataTest(BaseTestCase): def test_binary_stdin(self): - env = Environment( - stdin=open(TEST_BIN_FILE_PATH, 'rb'), - stdin_isatty=False, - stdout_isatty=False - ) - r = http( - '--print=B', - 'POST', - httpbin('/post'), - env=env, - ) - self.assertEqual(r, TEST_BIN_FILE_CONTENT) + with open(TEST_BIN_FILE_PATH, 'rb') as stdin: + env = Environment( + stdin=stdin, + stdin_isatty=False, + stdout_isatty=False + ) + r = http( + '--print=B', + 'POST', + httpbin('/post'), + env=env, + ) + self.assertEqual(r, TEST_BIN_FILE_CONTENT) def test_binary_file_path(self): env = Environment( @@ -700,37 +702,24 @@ class RequestBodyFromFilePathTest(BaseTestCase): def test_request_body_from_file_by_path_no_field_name_allowed(self): env = Environment(stdin_isatty=True) - try: - http( - 'POST', - httpbin('/post'), - 'field-name@' + TEST_FILE_PATH, - env=env - ) - except SystemExit: - env.stderr.seek(0) - stderr = env.stderr.read() - self.assertIn('perhaps you meant --form?', stderr) - else: - self.fail('validation did not work') + r = http( + 'POST', + httpbin('/post'), + 'field-name@' + TEST_FILE_PATH, + env=env + ) + self.assertIn('perhaps you meant --form?', r.stderr) def test_request_body_from_file_by_path_no_data_items_allowed(self): env = Environment(stdin_isatty=True) - try: - http( - 'POST', - httpbin('/post'), - '@' + TEST_FILE_PATH, - 'foo=bar', - env=env - ) - except SystemExit: - env.stderr.seek(0) - self.assertIn( - 'cannot be mixed', - env.stderr.read()) - else: - self.fail('validation did not work') + r = http( + 'POST', + httpbin('/post'), + '@' + TEST_FILE_PATH, + 'foo=bar', + env=env + ) + self.assertIn('cannot be mixed', r.stderr) class AuthTest(BaseTestCase): @@ -852,24 +841,21 @@ class FakeWindowsTest(BaseTestCase): def test_output_file_pretty_not_allowed_on_windows(self): env = Environment( - is_windows=True, stdout_isatty=True, stdin_isatty=True) + is_windows=True, + stdout_isatty=True, + stdin_isatty=True + ) - try: - http( - '--output', - os.path.join(tempfile.gettempdir(), '__httpie_test_output__'), - '--pretty', - 'GET', - httpbin('/get'), - env=env - ) - except SystemExit: - env.stderr.seek(0) - err = env.stderr.read() - self.assertIn( - 'Only terminal output can be prettified on Windows', err) - else: - self.fail('validation did not work') + r = http( + '--output', + os.path.join(tempfile.gettempdir(), '__httpie_test_output__'), + '--pretty', + 'GET', + httpbin('/get'), + env=env + ) + self.assertIn( + 'Only terminal output can be prettified on Windows', r.stderr) ################################################################# @@ -1077,7 +1063,8 @@ class UnicodeOutputTestCase(BaseTestCase): args.style = 'default' # colorized output contains escape sequences - output = get_output(args, Environment(), response.request, response).decode('utf8') + output = output_stream(args, Environment(), response.request, response) + output = b''.join(output).decode('utf8') for key, value in response_dict.items(): self.assertIn(key, output) self.assertIn(value, output)