Take advantage of streaming.

It's now possible to download huge files with HTTPie, and it's often faster than curl and wget!
This commit is contained in:
Jakub Roztocil 2012-08-01 23:21:52 +02:00
parent 67ad5980b2
commit 52e46bedda
6 changed files with 184 additions and 183 deletions

View File

@ -183,7 +183,7 @@ Note that when the **output is redirected** (like the examples above), HTTPie
applies a different set of defaults than for a console output. Namely, colors
aren't used (unless ``--pretty`` is set) and only the response body
is printed (unless ``--print`` options specified). It is a convenience
that allows for things like the one above or downloading (smallish) binary
that allows for things like the one above or downloading binary
files without having to set any flags:
.. code-block:: shell
@ -373,6 +373,7 @@ Changelog
=========
* `0.2.7dev`_
* Support for efficient large file downloads.
* Response body is fetched only when needed (e.g., not with ``--headers``).
* Updated Solarized color scheme.
* Windows: Added ``--output FILE`` to store output into a file

View File

@ -16,7 +16,7 @@ import requests.auth
from requests.compat import str
from .models import HTTPRequest, HTTPResponse, Environment
from .output import OutputProcessor, format
from .output import OutputProcessor, formatted_stream
from .input import (OUT_REQ_BODY, OUT_REQ_HEAD,
OUT_RESP_HEAD, OUT_RESP_BODY)
from .cli import parser
@ -51,45 +51,35 @@ def get_response(args, env):
# the `Content-Type` for us.
args.headers['Content-Type'] = FORM
try:
credentials = None
if args.auth:
credentials = {
'basic': requests.auth.HTTPBasicAuth,
'digest': requests.auth.HTTPDigestAuth,
}[args.auth_type](args.auth.key, args.auth.value)
credentials = None
if args.auth:
credentials = {
'basic': requests.auth.HTTPBasicAuth,
'digest': requests.auth.HTTPDigestAuth,
}[args.auth_type](args.auth.key, args.auth.value)
if not (args.url.startswith(HTTP) or args.url.startswith(HTTPS)):
scheme = HTTPS if env.progname == 'https' else HTTP
url = scheme + args.url
else:
url = args.url
if not (args.url.startswith(HTTP) or args.url.startswith(HTTPS)):
scheme = HTTPS if env.progname == 'https' else HTTP
url = scheme + args.url
else:
url = args.url
return requests.request(
method=args.method.lower(),
url=url,
headers=args.headers,
data=args.data,
verify={'yes': True, 'no': False}.get(args.verify, args.verify),
timeout=args.timeout,
auth=credentials,
proxies=dict((p.key, p.value) for p in args.proxy),
files=args.files,
allow_redirects=args.allow_redirects,
params=args.params,
)
except (KeyboardInterrupt, SystemExit):
env.stderr.write('\n')
sys.exit(1)
except Exception as e:
if args.debug:
raise
env.stderr.write(str(repr(e) + '\n'))
sys.exit(1)
return requests.request(
method=args.method.lower(),
url=url,
headers=args.headers,
data=args.data,
verify={'yes': True, 'no': False}.get(args.verify, args.verify),
timeout=args.timeout,
auth=credentials,
proxies=dict((p.key, p.value) for p in args.proxy),
files=args.files,
allow_redirects=args.allow_redirects,
params=args.params,
)
def get_output(args, env, request, response):
def output_stream(args, env, request, response):
"""Format parts of the `request`-`response` exchange
according to `args` and `env` and return `bytes`.
@ -99,32 +89,38 @@ def get_output(args, env, request, response):
prettifier = (OutputProcessor(env, pygments_style=args.style)
if args.prettify else None)
if (OUT_REQ_HEAD in args.output_options
or OUT_REQ_BODY in args.output_options):
exchange.append(format(
with_request = (OUT_REQ_HEAD in args.output_options
or OUT_REQ_BODY in args.output_options)
with_response = (OUT_RESP_HEAD in args.output_options
or OUT_RESP_BODY in args.output_options)
if with_request:
request_iter = formatted_stream(
msg=HTTPRequest(request),
env=env,
prettifier=prettifier,
with_headers=OUT_REQ_HEAD in args.output_options,
with_body=OUT_REQ_BODY in args.output_options
))
with_body=OUT_REQ_BODY in args.output_options)
if (OUT_RESP_HEAD in args.output_options
or OUT_RESP_BODY in args.output_options):
exchange.append(format(
for chunk in request_iter:
yield chunk
if with_response:
yield b'\n\n\n'
if with_response:
response_iter = formatted_stream(
msg=HTTPResponse(response),
env=env,
prettifier=prettifier,
with_headers=OUT_RESP_HEAD in args.output_options,
with_body=OUT_RESP_BODY in args.output_options)
)
output = b'\n\n\n'.join(exchange)
for chunk in response_iter:
yield chunk
if env.stdout_isatty:
output += b'\n\n'
return output
yield b'\n\n'
def get_exist_status(code, allow_redirects=False):
@ -155,25 +151,37 @@ def main(args=sys.argv[1:], env=Environment()):
' Please use `--output FILE\' instead.\n')
return 1
args = parser.parse_args(args=args, env=env)
response = get_response(args, env)
status = 0
if args.check_status:
status = get_exist_status(response.status_code,
args.allow_redirects)
if status and not env.stdout_isatty:
err = 'http error: %s %s\n' % (
response.raw.status, response.raw.reason)
env.stderr.write(err)
output = get_output(args, env, response.request, response)
try:
env.stdout.buffer.write(output)
except AttributeError:
env.stdout.write(output)
args = parser.parse_args(args=args, env=env)
response = get_response(args, env)
status = 0
if args.check_status:
status = get_exist_status(response.status_code,
args.allow_redirects)
if status and not env.stdout_isatty:
err = 'http error: %s %s\n' % (
response.raw.status, response.raw.reason)
env.stderr.write(err)
try:
# We are writing bytes so we use buffer on Python 3
buffer = env.stdout.buffer
except AttributeError:
buffer = env.stdout
for chunk in output_stream(args, env, response.request, response):
buffer.write(chunk)
if env.stdout_isatty:
env.stdout.flush()
except (KeyboardInterrupt, SystemExit):
env.stderr.write('\n')
return 1
except Exception as e:
if '--debug' in args:
raise
env.stderr.write(str(repr(e) + '\n'))
return 1
return status

View File

@ -8,6 +8,7 @@ import json
import argparse
import mimetypes
import getpass
from io import BytesIO
try:
from collections import OrderedDict
@ -424,8 +425,9 @@ def parse_items(items, data=None, headers=None, files=None, params=None):
target = params
elif item.sep == SEP_FILES:
try:
value = (os.path.basename(value),
open(os.path.expanduser(value), 'rb'))
with open(os.path.expanduser(value), 'rb') as f:
value = (os.path.basename(value),
BytesIO(f.read()))
except IOError as e:
raise ParseError(
'Invalid argument "%s": %s' % (item.orig, e))

View File

@ -57,6 +57,10 @@ class HTTPMessage(object):
class HTTPResponse(HTTPMessage):
"""A `requests.models.Response` wrapper."""
def __iter__(self):
mb = 1024 * 1000
return self._orig.iter_content(chunk_size=2 * mb)
@property
def line(self):
"""Return Status-Line"""
@ -85,6 +89,9 @@ class HTTPResponse(HTTPMessage):
class HTTPRequest(HTTPMessage):
"""A `requests.models.Request` wrapper."""
def __iter__(self):
yield self.body
@property
def line(self):
"""Return Request-Line"""

View File

@ -26,11 +26,12 @@ BINARY_SUPPRESSED_NOTICE = (
)
def format(msg, prettifier=None, with_headers=True, with_body=True,
env=Environment()):
"""Return `bytes` representation of a `models.HTTPMessage`.
def formatted_stream(msg, prettifier=None, with_headers=True, with_body=True,
env=Environment()):
"""Return an iterator yielding `bytes` representing `msg`
(a `models.HTTPMessage` subclass).
Sometimes the body contains binary data so we always return `bytes`.
The body can be binary so we always yield `bytes`.
If `prettifier` is set or the output is a terminal then a binary
body is not included in the output and is replaced with notice.
@ -41,7 +42,6 @@ def format(msg, prettifier=None, with_headers=True, with_body=True,
then we prefer readability over precision.
"""
# Output encoding.
if env.stdout_isatty:
# Use encoding suitable for the terminal. Unsupported characters
@ -59,46 +59,42 @@ def format(msg, prettifier=None, with_headers=True, with_body=True,
if prettifier:
env.init_colors()
#noinspection PyArgumentList
output = bytearray()
if with_headers:
headers = '\n'.join([msg.line, msg.headers])
if prettifier:
headers = prettifier.process_headers(headers)
output.extend(
headers.encode(output_encoding, errors).strip())
yield headers.encode(output_encoding, errors).strip()
if with_body and msg.body:
output.extend(b'\n\n')
if with_body:
if with_body and msg.body:
body = msg.body
prefix = b'\n\n' if with_headers else None
if not (env.stdout_isatty or prettifier):
# Verbatim body even if it's binary.
pass
else:
for body_chunk in msg:
if prefix:
yield prefix
prefix = None
yield body_chunk
elif msg.body:
try:
body = body.decode(msg.encoding)
body = msg.body.decode(msg.encoding)
except UnicodeDecodeError:
# Suppress binary data.
body = BINARY_SUPPRESSED_NOTICE.encode(output_encoding)
if not with_headers:
output.extend(b'\n')
yield b'\n'
else:
if prettifier and msg.content_type:
body = prettifier.process_body(
body, msg.content_type).strip()
body = body.encode(output_encoding, errors)
output.extend(body)
return bytes(output)
if prefix:
yield prefix
yield body
class HTTPLexer(lexer.RegexLexer):

View File

@ -22,9 +22,9 @@ To make it run faster and offline you can::
import os
import sys
import json
import tempfile
import unittest
import argparse
import tempfile
try:
from urllib.request import urlopen
except ImportError:
@ -43,7 +43,7 @@ sys.path.insert(0, os.path.realpath(os.path.join(TESTS_ROOT, '..')))
from httpie import input
from httpie.models import Environment
from httpie.core import main, get_output
from httpie.core import main, output_stream
from httpie.output import BINARY_SUPPRESSED_NOTICE
from httpie.input import ParseError
@ -104,42 +104,43 @@ def http(*args, **kwargs):
except (Exception, SystemExit) as e:
sys.stderr.write(stderr.read())
raise
stdout.seek(0)
stderr.seek(0)
output = stdout.read()
try:
#noinspection PyArgumentList
r = StrResponse(output.decode('utf8'))
except UnicodeDecodeError:
#noinspection PyArgumentList
r = BytesResponse(output)
else:
if TERMINAL_COLOR_PRESENCE_CHECK not in r:
# De-serialize JSON body if possible.
if r.strip().startswith('{'):
#noinspection PyTypeChecker
r.json = json.loads(r)
elif r.count('Content-Type:') == 1 and 'application/json' in r:
try:
j = r.strip()[r.strip().rindex('\n\n'):]
except ValueError:
pass
else:
stdout.seek(0)
stderr.seek(0)
output = stdout.read()
try:
#noinspection PyArgumentList
r = StrResponse(output.decode('utf8'))
except UnicodeDecodeError:
#noinspection PyArgumentList
r = BytesResponse(output)
else:
if TERMINAL_COLOR_PRESENCE_CHECK not in r:
# De-serialize JSON body if possible.
if r.strip().startswith('{'):
#noinspection PyTypeChecker
r.json = json.loads(r)
elif r.count('Content-Type:') == 1 and 'application/json' in r:
try:
r.json = json.loads(j)
j = r.strip()[r.strip().rindex('\n\n'):]
except ValueError:
pass
else:
try:
r.json = json.loads(j)
except ValueError:
pass
r.stderr = stderr.read()
r.exit_status = exit_status
r.stderr = stderr.read()
r.exit_status = exit_status
return r
finally:
stdout.close()
stderr.close()
stdout.close()
stderr.close()
return r
class BaseTestCase(unittest.TestCase):
@ -591,18 +592,19 @@ class MultipartFormDataFileUploadTest(BaseTestCase):
class BinaryRequestDataTest(BaseTestCase):
def test_binary_stdin(self):
env = Environment(
stdin=open(TEST_BIN_FILE_PATH, 'rb'),
stdin_isatty=False,
stdout_isatty=False
)
r = http(
'--print=B',
'POST',
httpbin('/post'),
env=env,
)
self.assertEqual(r, TEST_BIN_FILE_CONTENT)
with open(TEST_BIN_FILE_PATH, 'rb') as stdin:
env = Environment(
stdin=stdin,
stdin_isatty=False,
stdout_isatty=False
)
r = http(
'--print=B',
'POST',
httpbin('/post'),
env=env,
)
self.assertEqual(r, TEST_BIN_FILE_CONTENT)
def test_binary_file_path(self):
env = Environment(
@ -700,37 +702,24 @@ class RequestBodyFromFilePathTest(BaseTestCase):
def test_request_body_from_file_by_path_no_field_name_allowed(self):
env = Environment(stdin_isatty=True)
try:
http(
'POST',
httpbin('/post'),
'field-name@' + TEST_FILE_PATH,
env=env
)
except SystemExit:
env.stderr.seek(0)
stderr = env.stderr.read()
self.assertIn('perhaps you meant --form?', stderr)
else:
self.fail('validation did not work')
r = http(
'POST',
httpbin('/post'),
'field-name@' + TEST_FILE_PATH,
env=env
)
self.assertIn('perhaps you meant --form?', r.stderr)
def test_request_body_from_file_by_path_no_data_items_allowed(self):
env = Environment(stdin_isatty=True)
try:
http(
'POST',
httpbin('/post'),
'@' + TEST_FILE_PATH,
'foo=bar',
env=env
)
except SystemExit:
env.stderr.seek(0)
self.assertIn(
'cannot be mixed',
env.stderr.read())
else:
self.fail('validation did not work')
r = http(
'POST',
httpbin('/post'),
'@' + TEST_FILE_PATH,
'foo=bar',
env=env
)
self.assertIn('cannot be mixed', r.stderr)
class AuthTest(BaseTestCase):
@ -852,24 +841,21 @@ class FakeWindowsTest(BaseTestCase):
def test_output_file_pretty_not_allowed_on_windows(self):
env = Environment(
is_windows=True, stdout_isatty=True, stdin_isatty=True)
is_windows=True,
stdout_isatty=True,
stdin_isatty=True
)
try:
http(
'--output',
os.path.join(tempfile.gettempdir(), '__httpie_test_output__'),
'--pretty',
'GET',
httpbin('/get'),
env=env
)
except SystemExit:
env.stderr.seek(0)
err = env.stderr.read()
self.assertIn(
'Only terminal output can be prettified on Windows', err)
else:
self.fail('validation did not work')
r = http(
'--output',
os.path.join(tempfile.gettempdir(), '__httpie_test_output__'),
'--pretty',
'GET',
httpbin('/get'),
env=env
)
self.assertIn(
'Only terminal output can be prettified on Windows', r.stderr)
#################################################################
@ -1077,7 +1063,8 @@ class UnicodeOutputTestCase(BaseTestCase):
args.style = 'default'
# colorized output contains escape sequences
output = get_output(args, Environment(), response.request, response).decode('utf8')
output = output_stream(args, Environment(), response.request, response)
output = b''.join(output).decode('utf8')
for key, value in response_dict.items():
self.assertIn(key, output)
self.assertIn(value, output)