From df36d6255df5793129b02ac82f1010171bd8a0a8 Mon Sep 17 00:00:00 2001
From: Jakub Roztocil <jakub@roztocil.co>
Date: Mon, 24 Jun 2019 12:19:29 +0200
Subject: [PATCH] Changed the way the output filename is generated

When ``--download`` without ``--output`` results in a redirect,
now only the initial URL is considered, not the final one.
---
 CHANGELOG.rst           |  4 +++-
 README.rst              | 20 ++++++++++++----
 httpie/downloads.py     | 52 ++++++++++++++++++++++++-----------------
 tests/test_downloads.py | 14 +++++++++++
 4 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 39da3253..03cab7be 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -9,7 +9,9 @@ This project adheres to `Semantic Versioning <http://semver.org/>`_.
 `1.0.3-dev`_ (unreleased)
 -------------------------
 
-* No changes yet.
+* Changed the way the output filename is generated for ``--download`` requests
+  without ``--output`` and with a redirect — now only the initial URL is
+  considered, not the final one.
 
 
 `1.0.2`_ (2018-11-14)
diff --git a/README.rst b/README.rst
index a210d1dd..a18a385e 100644
--- a/README.rst
+++ b/README.rst
@@ -1316,10 +1316,22 @@ is being saved to a file.
 Downloaded file name
 --------------------
 
-If not provided via ``--output, -o``, the output filename will be determined
-from ``Content-Disposition`` (if available), or from the URL and
-``Content-Type``. If the guessed filename already exists, HTTPie adds a unique
-suffix to it.
+There are three mutually exclusive ways through which HTTPie determines
+the output file name (with decreasing priority):
+
+1. You can explicitly provide the exact output file name via ``--output, -o``.
+   The file gets overwritten if it already exists
+   (or appended to with ``--continue, -c``).
+2. The server may specify the file name in the optional ``Content-Disposition``
+   response header. Any leading dots are stripped from a server-provided filename.
+3. The last resort HTTPie uses is to generate the filename from a combination
+   of the request URL and the response ``Content-Type``.
+   The initial URL is always used as the basis for
+   the generated filename — even if there has been one or more redirects.
+
+
+To prevent data loss, HTTPie adds a unique numerical suffix to the
+filename, unless the name has been explicitly provided via ``--output, -o``.
 
 
 Piping while downloading
diff --git a/httpie/downloads.py b/httpie/downloads.py
index 8c714c54..200dfb84 100644
--- a/httpie/downloads.py
+++ b/httpie/downloads.py
@@ -224,13 +224,13 @@ class Downloader(object):
                 request_headers['Range'] = 'bytes=%d-' % bytes_have
                 self._resumed_from = bytes_have
 
-    def start(self, response):
+    def start(self, final_response):
         """
         Initiate and return a stream for `response` body  with progress
         callback attached. Can be called only once.
 
-        :param response: Initiated response object with headers already fetched
-        :type response: requests.models.Response
+        :param final_response: Initiated response object with headers already fetched
+        :type final_response: requests.models.Response
 
         :return: RawStream, output_file
 
@@ -240,14 +240,18 @@ class Downloader(object):
         # FIXME: some servers still might sent Content-Encoding: gzip
         # <https://github.com/jakubroztocil/httpie/issues/423>
         try:
-            total_size = int(response.headers['Content-Length'])
+            total_size = int(final_response.headers['Content-Length'])
         except (KeyError, ValueError, TypeError):
             total_size = None
 
-        if self._output_file:
-            if self._resume and response.status_code == PARTIAL_CONTENT:
+        if not self._output_file:
+            self._output_file = self._get_output_file_from_response(
+                final_response)
+        else:
+            # `--continue, -c` provided
+            if self._resume and final_response.status_code == PARTIAL_CONTENT:
                 total_size = parse_content_range(
-                    response.headers.get('Content-Range'),
+                    final_response.headers.get('Content-Range'),
                     self._resumed_from
                 )
 
@@ -258,19 +262,6 @@ class Downloader(object):
                     self._output_file.truncate()
                 except IOError:
                     pass  # stdout
-        else:
-            # TODO: Should the filename be taken from response.history[0].url?
-            # Output file not specified. Pick a name that doesn't exist yet.
-            filename = None
-            if 'Content-Disposition' in response.headers:
-                filename = filename_from_content_disposition(
-                    response.headers['Content-Disposition'])
-            if not filename:
-                filename = filename_from_url(
-                    url=response.url,
-                    content_type=response.headers.get('Content-Type'),
-                )
-            self._output_file = open(get_unique_filename(filename), mode='a+b')
 
         self.status.started(
             resumed_from=self._resumed_from,
@@ -278,7 +269,7 @@ class Downloader(object):
         )
 
         stream = RawStream(
-            msg=HTTPResponse(response),
+            msg=HTTPResponse(final_response),
             with_headers=False,
             with_body=True,
             on_body_chunk_downloaded=self.chunk_downloaded,
@@ -324,6 +315,25 @@ class Downloader(object):
         """
         self.status.chunk_downloaded(len(chunk))
 
+    @staticmethod
+    def _get_output_file_from_response(final_response):
+        # Output file not specified. Pick a name that doesn't exist yet.
+        filename = None
+        if 'Content-Disposition' in final_response.headers:
+            filename = filename_from_content_disposition(
+                final_response.headers['Content-Disposition'])
+        if not filename:
+            initial_response = (
+                final_response.history[0] if final_response.history
+                else final_response
+            )
+            filename = filename_from_url(
+                url=initial_response.url,
+                content_type=final_response.headers.get('Content-Type'),
+            )
+        unique_filename = get_unique_filename(filename)
+        return open(unique_filename, mode='a+b')
+
 
 class Status(object):
     """Holds details about the downland status."""
diff --git a/tests/test_downloads.py b/tests/test_downloads.py
index 5356721f..e14fcef3 100644
--- a/tests/test_downloads.py
+++ b/tests/test_downloads.py
@@ -1,4 +1,5 @@
 import os
+import tempfile
 import time
 
 import pytest
@@ -22,6 +23,7 @@ class Response(object):
 
 
 class TestDownloadUtils:
+
     def test_Content_Range_parsing(self):
         parse = parse_content_range
 
@@ -166,3 +168,15 @@ class TestDownloads:
             downloader.finish()
             assert downloader.interrupted
             downloader._progress_reporter.join()
+
+    def test_download_with_redirect_original_url_used_for_filename(self, httpbin):
+        # Redirect from `/redirect/1` to `/get`.
+        expected_filename = '1.json'
+        orig_cwd = os.getcwd()
+        os.chdir(tempfile.mkdtemp(prefix='httpie_download_test_'))
+        try:
+            assert os.listdir('.') == []
+            http('--download', httpbin.url + '/redirect/1')
+            assert os.listdir('.') == [expected_filename]
+        finally:
+            os.chdir(orig_cwd)