Download resume improvements.

- Set correct Range
- Validate respnse status
- Validate Content-Range

 #104
This commit is contained in:
Jakub Roztocil 2013-04-11 02:29:10 -03:00
parent 21613faa5a
commit 599bc0519f

View File

@ -3,10 +3,11 @@ Download mode implementation.
"""
from __future__ import division
import mimetypes
import os
import re
import sys
import errno
import mimetypes
from time import time
from .output import RawStream
@ -14,6 +15,8 @@ from .models import HTTPResponse
from .utils import humanize_bytes
from .compat import urlsplit
PARTIAL_CONTENT = 206
class Download(object):
@ -37,16 +40,16 @@ class Download(object):
self._progress = Progress(output=progress_file)
self._resumed_from = 0
def pre_request(self, headers):
def pre_request(self, request_headers):
"""Called just before the HTTP request is sent.
Might alter `headers`.
Might alter `request_headers`.
:type headers: dict
:type request_headers: dict
"""
# Disable content encoding so that we can resume, etc.
headers['Accept-Encoding'] = ''
request_headers['Accept-Encoding'] = ''
if self._resume:
try:
bytes_have = os.path.getsize(self._output_file.name)
@ -56,8 +59,7 @@ class Download(object):
else:
self._resumed_from = bytes_have
# Set ``Range`` header to resume the download
# TODO: What if Range isn't supported by the server?
headers['Range'] = '%d-' % bytes_have
request_headers['Range'] = 'bytes=%d-' % bytes_have
def start(self, response):
"""
@ -72,12 +74,25 @@ class Download(object):
"""
assert not self._progress._time_started
content_length = response.headers.get('Content-Length')
if content_length:
content_length = int(content_length)
total_size = response.headers.get('Content-Length')
if total_size:
total_size = int(total_size)
if self._output_file:
if not self._resume:
if self._resume and response.status_code == PARTIAL_CONTENT:
# "Content-Range: bytes 21010-47021/47022"
content_range = response.headers.get('Content-Range', '')
pattern = '^bytes {have:d}-\d+/(\*|\d+)$'.format(
have=self._resumed_from)
match = re.match(pattern, content_range)
if not match:
raise ValueError(
'The server returned invalid Content-Range: %s'
% content_range
)
total_size += self._resumed_from
else:
self._resumed_from = 0
self._output_file.seek(0)
self._output_file.truncate()
else:
@ -94,7 +109,7 @@ class Download(object):
self._progress.started(
resumed_from=self._resumed_from,
content_length=content_length
total_size=total_size
)
stream = RawStream(
@ -121,8 +136,8 @@ class Download(object):
def interrupted(self):
return (
self._output_file.closed
and self._progress.content_length
and self._progress.content_length != self._progress.downloaded
and self._progress.total_size
and self._progress.total_size != self._progress.downloaded
)
def _on_progress(self, chunk):
@ -175,7 +190,7 @@ class Progress(object):
"""
self.output = output
self.downloaded = 0
self.content_length = None
self.total_size = None
self._resumed_from = 0
self._downloaded_prev = 0
self._content_length_humanized = '?'
@ -184,11 +199,11 @@ class Progress(object):
self._time_prev = None
self._speed = 0
def started(self, resumed_from=0, content_length=None):
def started(self, resumed_from=0, total_size=None):
assert self._time_started is None
if content_length is not None:
self._content_length_humanized = humanize_bytes(content_length)
self.content_length = content_length
if total_size is not None:
self._content_length_humanized = humanize_bytes(total_size)
self.total_size = total_size
self.downloaded = self._resumed_from = resumed_from
self._time_started = time()
self._time_prev = self._time_started
@ -210,9 +225,9 @@ class Progress(object):
self._time_prev = now
self._downloaded_prev = self.downloaded
if self.content_length:
if self.total_size:
template = self.PROGRESS
percentage = self.downloaded / self.content_length * 100
percentage = self.downloaded / self.total_size * 100
else:
template = self.PROGRESS_NO_CONTENT_LENGTH
percentage = None