From df36d6255df5793129b02ac82f1010171bd8a0a8 Mon Sep 17 00:00:00 2001 From: Jakub Roztocil Date: Mon, 24 Jun 2019 12:19:29 +0200 Subject: [PATCH] Changed the way the output filename is generated When ``--download`` without ``--output`` results in a redirect, now only the initial URL is considered, not the final one. --- CHANGELOG.rst | 4 +++- README.rst | 20 ++++++++++++---- httpie/downloads.py | 52 ++++++++++++++++++++++++----------------- tests/test_downloads.py | 14 +++++++++++ 4 files changed, 64 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 39da3253..03cab7be 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,7 +9,9 @@ This project adheres to `Semantic Versioning `_. `1.0.3-dev`_ (unreleased) ------------------------- -* No changes yet. +* Changed the way the output filename is generated for ``--download`` requests + without ``--output`` and with a redirect — now only the initial URL is + considered, not the final one. `1.0.2`_ (2018-11-14) diff --git a/README.rst b/README.rst index a210d1dd..a18a385e 100644 --- a/README.rst +++ b/README.rst @@ -1316,10 +1316,22 @@ is being saved to a file. Downloaded file name -------------------- -If not provided via ``--output, -o``, the output filename will be determined -from ``Content-Disposition`` (if available), or from the URL and -``Content-Type``. If the guessed filename already exists, HTTPie adds a unique -suffix to it. +There are three mutually exclusive ways through which HTTPie determines +the output file name (with decreasing priority): + +1. You can explicitly provide the exact output file name via ``--output, -o``. + The file gets overwritten if it already exists + (or appended to with ``--continue, -c``). +2. The server may specify the file name in the optional ``Content-Disposition`` + response header. Any leading dots are stripped from a server-provided filename. +3. The last resort HTTPie uses is to generate the filename from a combination + of the request URL and the response ``Content-Type``. + The initial URL is always used as the basis for + the generated filename — even if there has been one or more redirects. + + +To prevent data loss, HTTPie adds a unique numerical suffix to the +filename, unless the name has been explicitly provided via ``--output, -o``. Piping while downloading diff --git a/httpie/downloads.py b/httpie/downloads.py index 8c714c54..200dfb84 100644 --- a/httpie/downloads.py +++ b/httpie/downloads.py @@ -224,13 +224,13 @@ class Downloader(object): request_headers['Range'] = 'bytes=%d-' % bytes_have self._resumed_from = bytes_have - def start(self, response): + def start(self, final_response): """ Initiate and return a stream for `response` body with progress callback attached. Can be called only once. - :param response: Initiated response object with headers already fetched - :type response: requests.models.Response + :param final_response: Initiated response object with headers already fetched + :type final_response: requests.models.Response :return: RawStream, output_file @@ -240,14 +240,18 @@ class Downloader(object): # FIXME: some servers still might sent Content-Encoding: gzip # try: - total_size = int(response.headers['Content-Length']) + total_size = int(final_response.headers['Content-Length']) except (KeyError, ValueError, TypeError): total_size = None - if self._output_file: - if self._resume and response.status_code == PARTIAL_CONTENT: + if not self._output_file: + self._output_file = self._get_output_file_from_response( + final_response) + else: + # `--continue, -c` provided + if self._resume and final_response.status_code == PARTIAL_CONTENT: total_size = parse_content_range( - response.headers.get('Content-Range'), + final_response.headers.get('Content-Range'), self._resumed_from ) @@ -258,19 +262,6 @@ class Downloader(object): self._output_file.truncate() except IOError: pass # stdout - else: - # TODO: Should the filename be taken from response.history[0].url? - # Output file not specified. Pick a name that doesn't exist yet. - filename = None - if 'Content-Disposition' in response.headers: - filename = filename_from_content_disposition( - response.headers['Content-Disposition']) - if not filename: - filename = filename_from_url( - url=response.url, - content_type=response.headers.get('Content-Type'), - ) - self._output_file = open(get_unique_filename(filename), mode='a+b') self.status.started( resumed_from=self._resumed_from, @@ -278,7 +269,7 @@ class Downloader(object): ) stream = RawStream( - msg=HTTPResponse(response), + msg=HTTPResponse(final_response), with_headers=False, with_body=True, on_body_chunk_downloaded=self.chunk_downloaded, @@ -324,6 +315,25 @@ class Downloader(object): """ self.status.chunk_downloaded(len(chunk)) + @staticmethod + def _get_output_file_from_response(final_response): + # Output file not specified. Pick a name that doesn't exist yet. + filename = None + if 'Content-Disposition' in final_response.headers: + filename = filename_from_content_disposition( + final_response.headers['Content-Disposition']) + if not filename: + initial_response = ( + final_response.history[0] if final_response.history + else final_response + ) + filename = filename_from_url( + url=initial_response.url, + content_type=final_response.headers.get('Content-Type'), + ) + unique_filename = get_unique_filename(filename) + return open(unique_filename, mode='a+b') + class Status(object): """Holds details about the downland status.""" diff --git a/tests/test_downloads.py b/tests/test_downloads.py index 5356721f..e14fcef3 100644 --- a/tests/test_downloads.py +++ b/tests/test_downloads.py @@ -1,4 +1,5 @@ import os +import tempfile import time import pytest @@ -22,6 +23,7 @@ class Response(object): class TestDownloadUtils: + def test_Content_Range_parsing(self): parse = parse_content_range @@ -166,3 +168,15 @@ class TestDownloads: downloader.finish() assert downloader.interrupted downloader._progress_reporter.join() + + def test_download_with_redirect_original_url_used_for_filename(self, httpbin): + # Redirect from `/redirect/1` to `/get`. + expected_filename = '1.json' + orig_cwd = os.getcwd() + os.chdir(tempfile.mkdtemp(prefix='httpie_download_test_')) + try: + assert os.listdir('.') == [] + http('--download', httpbin.url + '/redirect/1') + assert os.listdir('.') == [expected_filename] + finally: + os.chdir(orig_cwd)