Download resume improvements.

- Set correct Range
- Validate respnse status
- Validate Content-Range

 #104
This commit is contained in:
Jakub Roztocil 2013-04-11 02:29:10 -03:00
parent 21613faa5a
commit 599bc0519f

View File

@ -3,10 +3,11 @@ Download mode implementation.
""" """
from __future__ import division from __future__ import division
import mimetypes
import os import os
import re
import sys import sys
import errno import errno
import mimetypes
from time import time from time import time
from .output import RawStream from .output import RawStream
@ -14,6 +15,8 @@ from .models import HTTPResponse
from .utils import humanize_bytes from .utils import humanize_bytes
from .compat import urlsplit from .compat import urlsplit
PARTIAL_CONTENT = 206
class Download(object): class Download(object):
@ -37,16 +40,16 @@ class Download(object):
self._progress = Progress(output=progress_file) self._progress = Progress(output=progress_file)
self._resumed_from = 0 self._resumed_from = 0
def pre_request(self, headers): def pre_request(self, request_headers):
"""Called just before the HTTP request is sent. """Called just before the HTTP request is sent.
Might alter `headers`. Might alter `request_headers`.
:type headers: dict :type request_headers: dict
""" """
# Disable content encoding so that we can resume, etc. # Disable content encoding so that we can resume, etc.
headers['Accept-Encoding'] = '' request_headers['Accept-Encoding'] = ''
if self._resume: if self._resume:
try: try:
bytes_have = os.path.getsize(self._output_file.name) bytes_have = os.path.getsize(self._output_file.name)
@ -56,8 +59,7 @@ class Download(object):
else: else:
self._resumed_from = bytes_have self._resumed_from = bytes_have
# Set ``Range`` header to resume the download # Set ``Range`` header to resume the download
# TODO: What if Range isn't supported by the server? request_headers['Range'] = 'bytes=%d-' % bytes_have
headers['Range'] = '%d-' % bytes_have
def start(self, response): def start(self, response):
""" """
@ -72,12 +74,25 @@ class Download(object):
""" """
assert not self._progress._time_started assert not self._progress._time_started
content_length = response.headers.get('Content-Length') total_size = response.headers.get('Content-Length')
if content_length: if total_size:
content_length = int(content_length) total_size = int(total_size)
if self._output_file: if self._output_file:
if not self._resume: if self._resume and response.status_code == PARTIAL_CONTENT:
# "Content-Range: bytes 21010-47021/47022"
content_range = response.headers.get('Content-Range', '')
pattern = '^bytes {have:d}-\d+/(\*|\d+)$'.format(
have=self._resumed_from)
match = re.match(pattern, content_range)
if not match:
raise ValueError(
'The server returned invalid Content-Range: %s'
% content_range
)
total_size += self._resumed_from
else:
self._resumed_from = 0
self._output_file.seek(0) self._output_file.seek(0)
self._output_file.truncate() self._output_file.truncate()
else: else:
@ -94,7 +109,7 @@ class Download(object):
self._progress.started( self._progress.started(
resumed_from=self._resumed_from, resumed_from=self._resumed_from,
content_length=content_length total_size=total_size
) )
stream = RawStream( stream = RawStream(
@ -121,8 +136,8 @@ class Download(object):
def interrupted(self): def interrupted(self):
return ( return (
self._output_file.closed self._output_file.closed
and self._progress.content_length and self._progress.total_size
and self._progress.content_length != self._progress.downloaded and self._progress.total_size != self._progress.downloaded
) )
def _on_progress(self, chunk): def _on_progress(self, chunk):
@ -175,7 +190,7 @@ class Progress(object):
""" """
self.output = output self.output = output
self.downloaded = 0 self.downloaded = 0
self.content_length = None self.total_size = None
self._resumed_from = 0 self._resumed_from = 0
self._downloaded_prev = 0 self._downloaded_prev = 0
self._content_length_humanized = '?' self._content_length_humanized = '?'
@ -184,11 +199,11 @@ class Progress(object):
self._time_prev = None self._time_prev = None
self._speed = 0 self._speed = 0
def started(self, resumed_from=0, content_length=None): def started(self, resumed_from=0, total_size=None):
assert self._time_started is None assert self._time_started is None
if content_length is not None: if total_size is not None:
self._content_length_humanized = humanize_bytes(content_length) self._content_length_humanized = humanize_bytes(total_size)
self.content_length = content_length self.total_size = total_size
self.downloaded = self._resumed_from = resumed_from self.downloaded = self._resumed_from = resumed_from
self._time_started = time() self._time_started = time()
self._time_prev = self._time_started self._time_prev = self._time_started
@ -210,9 +225,9 @@ class Progress(object):
self._time_prev = now self._time_prev = now
self._downloaded_prev = self.downloaded self._downloaded_prev = self.downloaded
if self.content_length: if self.total_size:
template = self.PROGRESS template = self.PROGRESS
percentage = self.downloaded / self.content_length * 100 percentage = self.downloaded / self.total_size * 100
else: else:
template = self.PROGRESS_NO_CONTENT_LENGTH template = self.PROGRESS_NO_CONTENT_LENGTH
percentage = None percentage = None