Changed the way the output filename is generated

When ``--download`` without ``--output`` results in a redirect, now only the initial URL is considered, not the final one.
2019-06-24 12:19:29 +02:00 · 2019-06-24 12:19:29 +02:00 · df36d6255d
commit df36d6255d
parent e92b831e6e
4 changed files with 64 additions and 26 deletions
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@ -9,7 +9,9 @@ This project adheres to `Semantic Versioning <http://semver.org/>`_.
 `1.0.3-dev`_ (unreleased)
 -------------------------

-* No changes yet.
+* Changed the way the output filename is generated for ``--download`` requests
+  without ``--output`` and with a redirect — now only the initial URL is
+  considered, not the final one.


 `1.0.2`_ (2018-11-14)
--- a/README.rst
+++ b/README.rst
@ -1316,10 +1316,22 @@ is being saved to a file.
 Downloaded file name
 --------------------

-If not provided via ``--output, -o``, the output filename will be determined
-from ``Content-Disposition`` (if available), or from the URL and
-``Content-Type``. If the guessed filename already exists, HTTPie adds a unique
-suffix to it.
+There are three mutually exclusive ways through which HTTPie determines
+the output file name (with decreasing priority):
+
+1. You can explicitly provide the exact output file name via ``--output, -o``.
+   The file gets overwritten if it already exists
+   (or appended to with ``--continue, -c``).
+2. The server may specify the file name in the optional ``Content-Disposition``
+   response header. Any leading dots are stripped from a server-provided filename.
+3. The last resort HTTPie uses is to generate the filename from a combination
+   of the request URL and the response ``Content-Type``.
+   The initial URL is always used as the basis for
+   the generated filename — even if there has been one or more redirects.
+
+
+To prevent data loss, HTTPie adds a unique numerical suffix to the
+filename, unless the name has been explicitly provided via ``--output, -o``.


 Piping while downloading
--- a/httpie/downloads.py
+++ b/httpie/downloads.py
@ -224,13 +224,13 @@ class Downloader(object):
                request_headers['Range'] = 'bytes=%d-' % bytes_have
                self._resumed_from = bytes_have

-    def start(self, response):
+    def start(self, final_response):
        """
        Initiate and return a stream for `response` body  with progress
        callback attached. Can be called only once.

-        :param response: Initiated response object with headers already fetched
-        :type response: requests.models.Response
+        :param final_response: Initiated response object with headers already fetched
+        :type final_response: requests.models.Response

        :return: RawStream, output_file

@ -240,14 +240,18 @@ class Downloader(object):
        # FIXME: some servers still might sent Content-Encoding: gzip
        # <https://github.com/jakubroztocil/httpie/issues/423>
        try:
-            total_size = int(response.headers['Content-Length'])
+            total_size = int(final_response.headers['Content-Length'])
        except (KeyError, ValueError, TypeError):
            total_size = None

-        if self._output_file:
-            if self._resume and response.status_code == PARTIAL_CONTENT:
+        if not self._output_file:
+            self._output_file = self._get_output_file_from_response(
+                final_response)
+        else:
+            # `--continue, -c` provided
+            if self._resume and final_response.status_code == PARTIAL_CONTENT:
                total_size = parse_content_range(
-                    response.headers.get('Content-Range'),
+                    final_response.headers.get('Content-Range'),
                    self._resumed_from
                )

@ -258,19 +262,6 @@ class Downloader(object):
                    self._output_file.truncate()
                except IOError:
                    pass  # stdout
-        else:
-            # TODO: Should the filename be taken from response.history[0].url?
-            # Output file not specified. Pick a name that doesn't exist yet.
-            filename = None
-            if 'Content-Disposition' in response.headers:
-                filename = filename_from_content_disposition(
-                    response.headers['Content-Disposition'])
-            if not filename:
-                filename = filename_from_url(
-                    url=response.url,
-                    content_type=response.headers.get('Content-Type'),
-                )
-            self._output_file = open(get_unique_filename(filename), mode='a+b')

        self.status.started(
            resumed_from=self._resumed_from,
@ -278,7 +269,7 @@ class Downloader(object):
        )

        stream = RawStream(
-            msg=HTTPResponse(response),
+            msg=HTTPResponse(final_response),
            with_headers=False,
            with_body=True,
            on_body_chunk_downloaded=self.chunk_downloaded,
@ -324,6 +315,25 @@ class Downloader(object):
        """
        self.status.chunk_downloaded(len(chunk))

+    @staticmethod
+    def _get_output_file_from_response(final_response):
+        # Output file not specified. Pick a name that doesn't exist yet.
+        filename = None
+        if 'Content-Disposition' in final_response.headers:
+            filename = filename_from_content_disposition(
+                final_response.headers['Content-Disposition'])
+        if not filename:
+            initial_response = (
+                final_response.history[0] if final_response.history
+                else final_response
+            )
+            filename = filename_from_url(
+                url=initial_response.url,
+                content_type=final_response.headers.get('Content-Type'),
+            )
+        unique_filename = get_unique_filename(filename)
+        return open(unique_filename, mode='a+b')
+

 class Status(object):
    """Holds details about the downland status."""
--- a/tests/test_downloads.py
+++ b/tests/test_downloads.py
@ -1,4 +1,5 @@
 import os
+import tempfile
 import time

 import pytest
@ -22,6 +23,7 @@ class Response(object):


 class TestDownloadUtils:
+
    def test_Content_Range_parsing(self):
        parse = parse_content_range

@ -166,3 +168,15 @@ class TestDownloads:
            downloader.finish()
            assert downloader.interrupted
            downloader._progress_reporter.join()
+
+    def test_download_with_redirect_original_url_used_for_filename(self, httpbin):
+        # Redirect from `/redirect/1` to `/get`.
+        expected_filename = '1.json'
+        orig_cwd = os.getcwd()
+        os.chdir(tempfile.mkdtemp(prefix='httpie_download_test_'))
+        try:
+            assert os.listdir('.') == []
+            http('--download', httpbin.url + '/redirect/1')
+            assert os.listdir('.') == [expected_filename]
+        finally:
+            os.chdir(orig_cwd)