Changed the way the output filename is generated

When ``--download`` without ``--output`` results in a redirect,
now only the initial URL is considered, not the final one.
This commit is contained in:
Jakub Roztocil 2019-06-24 12:19:29 +02:00
parent e92b831e6e
commit df36d6255d
4 changed files with 64 additions and 26 deletions

View File

@ -9,7 +9,9 @@ This project adheres to `Semantic Versioning <http://semver.org/>`_.
`1.0.3-dev`_ (unreleased) `1.0.3-dev`_ (unreleased)
------------------------- -------------------------
* No changes yet. * Changed the way the output filename is generated for ``--download`` requests
without ``--output`` and with a redirect — now only the initial URL is
considered, not the final one.
`1.0.2`_ (2018-11-14) `1.0.2`_ (2018-11-14)

View File

@ -1316,10 +1316,22 @@ is being saved to a file.
Downloaded file name Downloaded file name
-------------------- --------------------
If not provided via ``--output, -o``, the output filename will be determined There are three mutually exclusive ways through which HTTPie determines
from ``Content-Disposition`` (if available), or from the URL and the output file name (with decreasing priority):
``Content-Type``. If the guessed filename already exists, HTTPie adds a unique
suffix to it. 1. You can explicitly provide the exact output file name via ``--output, -o``.
The file gets overwritten if it already exists
(or appended to with ``--continue, -c``).
2. The server may specify the file name in the optional ``Content-Disposition``
response header. Any leading dots are stripped from a server-provided filename.
3. The last resort HTTPie uses is to generate the filename from a combination
of the request URL and the response ``Content-Type``.
The initial URL is always used as the basis for
the generated filename — even if there has been one or more redirects.
To prevent data loss, HTTPie adds a unique numerical suffix to the
filename, unless the name has been explicitly provided via ``--output, -o``.
Piping while downloading Piping while downloading

View File

@ -224,13 +224,13 @@ class Downloader(object):
request_headers['Range'] = 'bytes=%d-' % bytes_have request_headers['Range'] = 'bytes=%d-' % bytes_have
self._resumed_from = bytes_have self._resumed_from = bytes_have
def start(self, response): def start(self, final_response):
""" """
Initiate and return a stream for `response` body with progress Initiate and return a stream for `response` body with progress
callback attached. Can be called only once. callback attached. Can be called only once.
:param response: Initiated response object with headers already fetched :param final_response: Initiated response object with headers already fetched
:type response: requests.models.Response :type final_response: requests.models.Response
:return: RawStream, output_file :return: RawStream, output_file
@ -240,14 +240,18 @@ class Downloader(object):
# FIXME: some servers still might sent Content-Encoding: gzip # FIXME: some servers still might sent Content-Encoding: gzip
# <https://github.com/jakubroztocil/httpie/issues/423> # <https://github.com/jakubroztocil/httpie/issues/423>
try: try:
total_size = int(response.headers['Content-Length']) total_size = int(final_response.headers['Content-Length'])
except (KeyError, ValueError, TypeError): except (KeyError, ValueError, TypeError):
total_size = None total_size = None
if self._output_file: if not self._output_file:
if self._resume and response.status_code == PARTIAL_CONTENT: self._output_file = self._get_output_file_from_response(
final_response)
else:
# `--continue, -c` provided
if self._resume and final_response.status_code == PARTIAL_CONTENT:
total_size = parse_content_range( total_size = parse_content_range(
response.headers.get('Content-Range'), final_response.headers.get('Content-Range'),
self._resumed_from self._resumed_from
) )
@ -258,19 +262,6 @@ class Downloader(object):
self._output_file.truncate() self._output_file.truncate()
except IOError: except IOError:
pass # stdout pass # stdout
else:
# TODO: Should the filename be taken from response.history[0].url?
# Output file not specified. Pick a name that doesn't exist yet.
filename = None
if 'Content-Disposition' in response.headers:
filename = filename_from_content_disposition(
response.headers['Content-Disposition'])
if not filename:
filename = filename_from_url(
url=response.url,
content_type=response.headers.get('Content-Type'),
)
self._output_file = open(get_unique_filename(filename), mode='a+b')
self.status.started( self.status.started(
resumed_from=self._resumed_from, resumed_from=self._resumed_from,
@ -278,7 +269,7 @@ class Downloader(object):
) )
stream = RawStream( stream = RawStream(
msg=HTTPResponse(response), msg=HTTPResponse(final_response),
with_headers=False, with_headers=False,
with_body=True, with_body=True,
on_body_chunk_downloaded=self.chunk_downloaded, on_body_chunk_downloaded=self.chunk_downloaded,
@ -324,6 +315,25 @@ class Downloader(object):
""" """
self.status.chunk_downloaded(len(chunk)) self.status.chunk_downloaded(len(chunk))
@staticmethod
def _get_output_file_from_response(final_response):
# Output file not specified. Pick a name that doesn't exist yet.
filename = None
if 'Content-Disposition' in final_response.headers:
filename = filename_from_content_disposition(
final_response.headers['Content-Disposition'])
if not filename:
initial_response = (
final_response.history[0] if final_response.history
else final_response
)
filename = filename_from_url(
url=initial_response.url,
content_type=final_response.headers.get('Content-Type'),
)
unique_filename = get_unique_filename(filename)
return open(unique_filename, mode='a+b')
class Status(object): class Status(object):
"""Holds details about the downland status.""" """Holds details about the downland status."""

View File

@ -1,4 +1,5 @@
import os import os
import tempfile
import time import time
import pytest import pytest
@ -22,6 +23,7 @@ class Response(object):
class TestDownloadUtils: class TestDownloadUtils:
def test_Content_Range_parsing(self): def test_Content_Range_parsing(self):
parse = parse_content_range parse = parse_content_range
@ -166,3 +168,15 @@ class TestDownloads:
downloader.finish() downloader.finish()
assert downloader.interrupted assert downloader.interrupted
downloader._progress_reporter.join() downloader._progress_reporter.join()
def test_download_with_redirect_original_url_used_for_filename(self, httpbin):
# Redirect from `/redirect/1` to `/get`.
expected_filename = '1.json'
orig_cwd = os.getcwd()
os.chdir(tempfile.mkdtemp(prefix='httpie_download_test_'))
try:
assert os.listdir('.') == []
http('--download', httpbin.url + '/redirect/1')
assert os.listdir('.') == [expected_filename]
finally:
os.chdir(orig_cwd)