Take advantage of streaming.

It's now possible to download huge files with HTTPie, and it's often faster than curl and wget!
This commit is contained in:
Jakub Roztocil 2012-08-01 23:21:52 +02:00
parent 67ad5980b2
commit 52e46bedda
6 changed files with 184 additions and 183 deletions

View File

@ -183,7 +183,7 @@ Note that when the **output is redirected** (like the examples above), HTTPie
applies a different set of defaults than for a console output. Namely, colors applies a different set of defaults than for a console output. Namely, colors
aren't used (unless ``--pretty`` is set) and only the response body aren't used (unless ``--pretty`` is set) and only the response body
is printed (unless ``--print`` options specified). It is a convenience is printed (unless ``--print`` options specified). It is a convenience
that allows for things like the one above or downloading (smallish) binary that allows for things like the one above or downloading binary
files without having to set any flags: files without having to set any flags:
.. code-block:: shell .. code-block:: shell
@ -373,6 +373,7 @@ Changelog
========= =========
* `0.2.7dev`_ * `0.2.7dev`_
* Support for efficient large file downloads.
* Response body is fetched only when needed (e.g., not with ``--headers``). * Response body is fetched only when needed (e.g., not with ``--headers``).
* Updated Solarized color scheme. * Updated Solarized color scheme.
* Windows: Added ``--output FILE`` to store output into a file * Windows: Added ``--output FILE`` to store output into a file

View File

@ -16,7 +16,7 @@ import requests.auth
from requests.compat import str from requests.compat import str
from .models import HTTPRequest, HTTPResponse, Environment from .models import HTTPRequest, HTTPResponse, Environment
from .output import OutputProcessor, format from .output import OutputProcessor, formatted_stream
from .input import (OUT_REQ_BODY, OUT_REQ_HEAD, from .input import (OUT_REQ_BODY, OUT_REQ_HEAD,
OUT_RESP_HEAD, OUT_RESP_BODY) OUT_RESP_HEAD, OUT_RESP_BODY)
from .cli import parser from .cli import parser
@ -51,45 +51,35 @@ def get_response(args, env):
# the `Content-Type` for us. # the `Content-Type` for us.
args.headers['Content-Type'] = FORM args.headers['Content-Type'] = FORM
try: credentials = None
credentials = None if args.auth:
if args.auth: credentials = {
credentials = { 'basic': requests.auth.HTTPBasicAuth,
'basic': requests.auth.HTTPBasicAuth, 'digest': requests.auth.HTTPDigestAuth,
'digest': requests.auth.HTTPDigestAuth, }[args.auth_type](args.auth.key, args.auth.value)
}[args.auth_type](args.auth.key, args.auth.value)
if not (args.url.startswith(HTTP) or args.url.startswith(HTTPS)): if not (args.url.startswith(HTTP) or args.url.startswith(HTTPS)):
scheme = HTTPS if env.progname == 'https' else HTTP scheme = HTTPS if env.progname == 'https' else HTTP
url = scheme + args.url url = scheme + args.url
else: else:
url = args.url url = args.url
return requests.request( return requests.request(
method=args.method.lower(), method=args.method.lower(),
url=url, url=url,
headers=args.headers, headers=args.headers,
data=args.data, data=args.data,
verify={'yes': True, 'no': False}.get(args.verify, args.verify), verify={'yes': True, 'no': False}.get(args.verify, args.verify),
timeout=args.timeout, timeout=args.timeout,
auth=credentials, auth=credentials,
proxies=dict((p.key, p.value) for p in args.proxy), proxies=dict((p.key, p.value) for p in args.proxy),
files=args.files, files=args.files,
allow_redirects=args.allow_redirects, allow_redirects=args.allow_redirects,
params=args.params, params=args.params,
) )
except (KeyboardInterrupt, SystemExit):
env.stderr.write('\n')
sys.exit(1)
except Exception as e:
if args.debug:
raise
env.stderr.write(str(repr(e) + '\n'))
sys.exit(1)
def get_output(args, env, request, response): def output_stream(args, env, request, response):
"""Format parts of the `request`-`response` exchange """Format parts of the `request`-`response` exchange
according to `args` and `env` and return `bytes`. according to `args` and `env` and return `bytes`.
@ -99,32 +89,38 @@ def get_output(args, env, request, response):
prettifier = (OutputProcessor(env, pygments_style=args.style) prettifier = (OutputProcessor(env, pygments_style=args.style)
if args.prettify else None) if args.prettify else None)
if (OUT_REQ_HEAD in args.output_options with_request = (OUT_REQ_HEAD in args.output_options
or OUT_REQ_BODY in args.output_options): or OUT_REQ_BODY in args.output_options)
exchange.append(format( with_response = (OUT_RESP_HEAD in args.output_options
or OUT_RESP_BODY in args.output_options)
if with_request:
request_iter = formatted_stream(
msg=HTTPRequest(request), msg=HTTPRequest(request),
env=env, env=env,
prettifier=prettifier, prettifier=prettifier,
with_headers=OUT_REQ_HEAD in args.output_options, with_headers=OUT_REQ_HEAD in args.output_options,
with_body=OUT_REQ_BODY in args.output_options with_body=OUT_REQ_BODY in args.output_options)
))
if (OUT_RESP_HEAD in args.output_options for chunk in request_iter:
or OUT_RESP_BODY in args.output_options): yield chunk
exchange.append(format(
if with_response:
yield b'\n\n\n'
if with_response:
response_iter = formatted_stream(
msg=HTTPResponse(response), msg=HTTPResponse(response),
env=env, env=env,
prettifier=prettifier, prettifier=prettifier,
with_headers=OUT_RESP_HEAD in args.output_options, with_headers=OUT_RESP_HEAD in args.output_options,
with_body=OUT_RESP_BODY in args.output_options) with_body=OUT_RESP_BODY in args.output_options)
)
output = b'\n\n\n'.join(exchange) for chunk in response_iter:
yield chunk
if env.stdout_isatty: if env.stdout_isatty:
output += b'\n\n' yield b'\n\n'
return output
def get_exist_status(code, allow_redirects=False): def get_exist_status(code, allow_redirects=False):
@ -155,25 +151,37 @@ def main(args=sys.argv[1:], env=Environment()):
' Please use `--output FILE\' instead.\n') ' Please use `--output FILE\' instead.\n')
return 1 return 1
args = parser.parse_args(args=args, env=env)
response = get_response(args, env)
status = 0
if args.check_status:
status = get_exist_status(response.status_code,
args.allow_redirects)
if status and not env.stdout_isatty:
err = 'http error: %s %s\n' % (
response.raw.status, response.raw.reason)
env.stderr.write(err)
output = get_output(args, env, response.request, response)
try: try:
env.stdout.buffer.write(output) args = parser.parse_args(args=args, env=env)
except AttributeError: response = get_response(args, env)
env.stdout.write(output) status = 0
if args.check_status:
status = get_exist_status(response.status_code,
args.allow_redirects)
if status and not env.stdout_isatty:
err = 'http error: %s %s\n' % (
response.raw.status, response.raw.reason)
env.stderr.write(err)
try:
# We are writing bytes so we use buffer on Python 3
buffer = env.stdout.buffer
except AttributeError:
buffer = env.stdout
for chunk in output_stream(args, env, response.request, response):
buffer.write(chunk)
if env.stdout_isatty:
env.stdout.flush()
except (KeyboardInterrupt, SystemExit):
env.stderr.write('\n')
return 1
except Exception as e:
if '--debug' in args:
raise
env.stderr.write(str(repr(e) + '\n'))
return 1
return status return status

View File

@ -8,6 +8,7 @@ import json
import argparse import argparse
import mimetypes import mimetypes
import getpass import getpass
from io import BytesIO
try: try:
from collections import OrderedDict from collections import OrderedDict
@ -424,8 +425,9 @@ def parse_items(items, data=None, headers=None, files=None, params=None):
target = params target = params
elif item.sep == SEP_FILES: elif item.sep == SEP_FILES:
try: try:
value = (os.path.basename(value), with open(os.path.expanduser(value), 'rb') as f:
open(os.path.expanduser(value), 'rb')) value = (os.path.basename(value),
BytesIO(f.read()))
except IOError as e: except IOError as e:
raise ParseError( raise ParseError(
'Invalid argument "%s": %s' % (item.orig, e)) 'Invalid argument "%s": %s' % (item.orig, e))

View File

@ -57,6 +57,10 @@ class HTTPMessage(object):
class HTTPResponse(HTTPMessage): class HTTPResponse(HTTPMessage):
"""A `requests.models.Response` wrapper.""" """A `requests.models.Response` wrapper."""
def __iter__(self):
mb = 1024 * 1000
return self._orig.iter_content(chunk_size=2 * mb)
@property @property
def line(self): def line(self):
"""Return Status-Line""" """Return Status-Line"""
@ -85,6 +89,9 @@ class HTTPResponse(HTTPMessage):
class HTTPRequest(HTTPMessage): class HTTPRequest(HTTPMessage):
"""A `requests.models.Request` wrapper.""" """A `requests.models.Request` wrapper."""
def __iter__(self):
yield self.body
@property @property
def line(self): def line(self):
"""Return Request-Line""" """Return Request-Line"""

View File

@ -26,11 +26,12 @@ BINARY_SUPPRESSED_NOTICE = (
) )
def format(msg, prettifier=None, with_headers=True, with_body=True, def formatted_stream(msg, prettifier=None, with_headers=True, with_body=True,
env=Environment()): env=Environment()):
"""Return `bytes` representation of a `models.HTTPMessage`. """Return an iterator yielding `bytes` representing `msg`
(a `models.HTTPMessage` subclass).
Sometimes the body contains binary data so we always return `bytes`. The body can be binary so we always yield `bytes`.
If `prettifier` is set or the output is a terminal then a binary If `prettifier` is set or the output is a terminal then a binary
body is not included in the output and is replaced with notice. body is not included in the output and is replaced with notice.
@ -41,7 +42,6 @@ def format(msg, prettifier=None, with_headers=True, with_body=True,
then we prefer readability over precision. then we prefer readability over precision.
""" """
# Output encoding. # Output encoding.
if env.stdout_isatty: if env.stdout_isatty:
# Use encoding suitable for the terminal. Unsupported characters # Use encoding suitable for the terminal. Unsupported characters
@ -59,46 +59,42 @@ def format(msg, prettifier=None, with_headers=True, with_body=True,
if prettifier: if prettifier:
env.init_colors() env.init_colors()
#noinspection PyArgumentList
output = bytearray()
if with_headers: if with_headers:
headers = '\n'.join([msg.line, msg.headers]) headers = '\n'.join([msg.line, msg.headers])
if prettifier: if prettifier:
headers = prettifier.process_headers(headers) headers = prettifier.process_headers(headers)
output.extend( yield headers.encode(output_encoding, errors).strip()
headers.encode(output_encoding, errors).strip())
if with_body and msg.body: if with_body:
output.extend(b'\n\n')
if with_body and msg.body: prefix = b'\n\n' if with_headers else None
body = msg.body
if not (env.stdout_isatty or prettifier): if not (env.stdout_isatty or prettifier):
# Verbatim body even if it's binary. # Verbatim body even if it's binary.
pass for body_chunk in msg:
else: if prefix:
yield prefix
prefix = None
yield body_chunk
elif msg.body:
try: try:
body = body.decode(msg.encoding) body = msg.body.decode(msg.encoding)
except UnicodeDecodeError: except UnicodeDecodeError:
# Suppress binary data. # Suppress binary data.
body = BINARY_SUPPRESSED_NOTICE.encode(output_encoding) body = BINARY_SUPPRESSED_NOTICE.encode(output_encoding)
if not with_headers: if not with_headers:
output.extend(b'\n') yield b'\n'
else: else:
if prettifier and msg.content_type: if prettifier and msg.content_type:
body = prettifier.process_body( body = prettifier.process_body(
body, msg.content_type).strip() body, msg.content_type).strip()
body = body.encode(output_encoding, errors) body = body.encode(output_encoding, errors)
if prefix:
output.extend(body) yield prefix
yield body
return bytes(output)
class HTTPLexer(lexer.RegexLexer): class HTTPLexer(lexer.RegexLexer):

View File

@ -22,9 +22,9 @@ To make it run faster and offline you can::
import os import os
import sys import sys
import json import json
import tempfile
import unittest import unittest
import argparse import argparse
import tempfile
try: try:
from urllib.request import urlopen from urllib.request import urlopen
except ImportError: except ImportError:
@ -43,7 +43,7 @@ sys.path.insert(0, os.path.realpath(os.path.join(TESTS_ROOT, '..')))
from httpie import input from httpie import input
from httpie.models import Environment from httpie.models import Environment
from httpie.core import main, get_output from httpie.core import main, output_stream
from httpie.output import BINARY_SUPPRESSED_NOTICE from httpie.output import BINARY_SUPPRESSED_NOTICE
from httpie.input import ParseError from httpie.input import ParseError
@ -104,42 +104,43 @@ def http(*args, **kwargs):
except (Exception, SystemExit) as e: except (Exception, SystemExit) as e:
sys.stderr.write(stderr.read()) sys.stderr.write(stderr.read())
raise raise
stdout.seek(0)
stderr.seek(0)
output = stdout.read()
try:
#noinspection PyArgumentList
r = StrResponse(output.decode('utf8'))
except UnicodeDecodeError:
#noinspection PyArgumentList
r = BytesResponse(output)
else: else:
if TERMINAL_COLOR_PRESENCE_CHECK not in r: stdout.seek(0)
# De-serialize JSON body if possible. stderr.seek(0)
if r.strip().startswith('{'):
#noinspection PyTypeChecker output = stdout.read()
r.json = json.loads(r)
elif r.count('Content-Type:') == 1 and 'application/json' in r: try:
try: #noinspection PyArgumentList
j = r.strip()[r.strip().rindex('\n\n'):] r = StrResponse(output.decode('utf8'))
except ValueError: except UnicodeDecodeError:
pass #noinspection PyArgumentList
else: r = BytesResponse(output)
else:
if TERMINAL_COLOR_PRESENCE_CHECK not in r:
# De-serialize JSON body if possible.
if r.strip().startswith('{'):
#noinspection PyTypeChecker
r.json = json.loads(r)
elif r.count('Content-Type:') == 1 and 'application/json' in r:
try: try:
r.json = json.loads(j) j = r.strip()[r.strip().rindex('\n\n'):]
except ValueError: except ValueError:
pass pass
else:
try:
r.json = json.loads(j)
except ValueError:
pass
r.stderr = stderr.read() r.stderr = stderr.read()
r.exit_status = exit_status r.exit_status = exit_status
return r
finally:
stdout.close()
stderr.close()
stdout.close()
stderr.close()
return r
class BaseTestCase(unittest.TestCase): class BaseTestCase(unittest.TestCase):
@ -591,18 +592,19 @@ class MultipartFormDataFileUploadTest(BaseTestCase):
class BinaryRequestDataTest(BaseTestCase): class BinaryRequestDataTest(BaseTestCase):
def test_binary_stdin(self): def test_binary_stdin(self):
env = Environment( with open(TEST_BIN_FILE_PATH, 'rb') as stdin:
stdin=open(TEST_BIN_FILE_PATH, 'rb'), env = Environment(
stdin_isatty=False, stdin=stdin,
stdout_isatty=False stdin_isatty=False,
) stdout_isatty=False
r = http( )
'--print=B', r = http(
'POST', '--print=B',
httpbin('/post'), 'POST',
env=env, httpbin('/post'),
) env=env,
self.assertEqual(r, TEST_BIN_FILE_CONTENT) )
self.assertEqual(r, TEST_BIN_FILE_CONTENT)
def test_binary_file_path(self): def test_binary_file_path(self):
env = Environment( env = Environment(
@ -700,37 +702,24 @@ class RequestBodyFromFilePathTest(BaseTestCase):
def test_request_body_from_file_by_path_no_field_name_allowed(self): def test_request_body_from_file_by_path_no_field_name_allowed(self):
env = Environment(stdin_isatty=True) env = Environment(stdin_isatty=True)
try: r = http(
http( 'POST',
'POST', httpbin('/post'),
httpbin('/post'), 'field-name@' + TEST_FILE_PATH,
'field-name@' + TEST_FILE_PATH, env=env
env=env )
) self.assertIn('perhaps you meant --form?', r.stderr)
except SystemExit:
env.stderr.seek(0)
stderr = env.stderr.read()
self.assertIn('perhaps you meant --form?', stderr)
else:
self.fail('validation did not work')
def test_request_body_from_file_by_path_no_data_items_allowed(self): def test_request_body_from_file_by_path_no_data_items_allowed(self):
env = Environment(stdin_isatty=True) env = Environment(stdin_isatty=True)
try: r = http(
http( 'POST',
'POST', httpbin('/post'),
httpbin('/post'), '@' + TEST_FILE_PATH,
'@' + TEST_FILE_PATH, 'foo=bar',
'foo=bar', env=env
env=env )
) self.assertIn('cannot be mixed', r.stderr)
except SystemExit:
env.stderr.seek(0)
self.assertIn(
'cannot be mixed',
env.stderr.read())
else:
self.fail('validation did not work')
class AuthTest(BaseTestCase): class AuthTest(BaseTestCase):
@ -852,24 +841,21 @@ class FakeWindowsTest(BaseTestCase):
def test_output_file_pretty_not_allowed_on_windows(self): def test_output_file_pretty_not_allowed_on_windows(self):
env = Environment( env = Environment(
is_windows=True, stdout_isatty=True, stdin_isatty=True) is_windows=True,
stdout_isatty=True,
stdin_isatty=True
)
try: r = http(
http( '--output',
'--output', os.path.join(tempfile.gettempdir(), '__httpie_test_output__'),
os.path.join(tempfile.gettempdir(), '__httpie_test_output__'), '--pretty',
'--pretty', 'GET',
'GET', httpbin('/get'),
httpbin('/get'), env=env
env=env )
) self.assertIn(
except SystemExit: 'Only terminal output can be prettified on Windows', r.stderr)
env.stderr.seek(0)
err = env.stderr.read()
self.assertIn(
'Only terminal output can be prettified on Windows', err)
else:
self.fail('validation did not work')
################################################################# #################################################################
@ -1077,7 +1063,8 @@ class UnicodeOutputTestCase(BaseTestCase):
args.style = 'default' args.style = 'default'
# colorized output contains escape sequences # colorized output contains escape sequences
output = get_output(args, Environment(), response.request, response).decode('utf8') output = output_stream(args, Environment(), response.request, response)
output = b''.join(output).decode('utf8')
for key, value in response_dict.items(): for key, value in response_dict.items():
self.assertIn(key, output) self.assertIn(key, output)
self.assertIn(value, output) self.assertIn(value, output)