2014-04-24 14:07:31 +02:00
|
|
|
|
import os
|
2019-06-24 12:19:29 +02:00
|
|
|
|
import tempfile
|
2014-04-24 14:07:31 +02:00
|
|
|
|
import time
|
2021-04-30 15:08:27 +02:00
|
|
|
|
from unittest import mock
|
2019-08-29 08:53:56 +02:00
|
|
|
|
from urllib.request import urlopen
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
2024-06-14 13:30:34 +02:00
|
|
|
|
import niquests
|
2014-04-24 17:08:40 +02:00
|
|
|
|
import pytest
|
2024-06-14 13:30:34 +02:00
|
|
|
|
import responses
|
2014-04-24 14:07:31 +02:00
|
|
|
|
from httpie.downloads import (
|
2024-06-14 13:30:34 +02:00
|
|
|
|
parse_content_range,
|
|
|
|
|
filename_from_content_disposition,
|
|
|
|
|
filename_from_url,
|
|
|
|
|
get_unique_filename,
|
|
|
|
|
ContentRangeError,
|
|
|
|
|
Downloader,
|
|
|
|
|
PARTIAL_CONTENT,
|
|
|
|
|
DECODED_SIZE_NOTE_SUFFIX,
|
|
|
|
|
DECODED_FROM_SUFFIX,
|
2014-04-24 14:07:31 +02:00
|
|
|
|
)
|
2024-06-14 13:30:34 +02:00
|
|
|
|
from niquests.structures import CaseInsensitiveDict
|
|
|
|
|
from .utils import http, MockEnvironment, cd_clean_tmp_dir, DUMMY_URL
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
|
|
|
|
|
2023-10-03 14:12:32 +02:00
|
|
|
|
class Response(niquests.Response):
|
2014-04-24 15:48:01 +02:00
|
|
|
|
# noinspection PyDefaultArgument
|
|
|
|
|
def __init__(self, url, headers={}, status_code=200):
|
|
|
|
|
self.url = url
|
|
|
|
|
self.headers = CaseInsensitiveDict(headers)
|
|
|
|
|
self.status_code = status_code
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
|
|
|
|
|
2014-04-25 11:39:59 +02:00
|
|
|
|
class TestDownloadUtils:
|
2019-06-24 12:19:29 +02:00
|
|
|
|
|
2014-04-24 15:48:01 +02:00
|
|
|
|
def test_Content_Range_parsing(self):
|
2014-04-24 14:07:31 +02:00
|
|
|
|
parse = parse_content_range
|
|
|
|
|
|
2014-04-24 14:58:15 +02:00
|
|
|
|
assert parse('bytes 100-199/200', 100) == 200
|
|
|
|
|
assert parse('bytes 100-199/*', 100) == 200
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
2021-02-14 13:30:58 +01:00
|
|
|
|
# single byte
|
|
|
|
|
assert parse('bytes 100-100/*', 100) == 101
|
|
|
|
|
|
2014-04-24 14:07:31 +02:00
|
|
|
|
# missing
|
2014-04-24 17:08:40 +02:00
|
|
|
|
pytest.raises(ContentRangeError, parse, None, 100)
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
|
|
|
|
# syntax error
|
2014-04-24 17:08:40 +02:00
|
|
|
|
pytest.raises(ContentRangeError, parse, 'beers 100-199/*', 100)
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
|
|
|
|
# unexpected range
|
2014-04-24 17:08:40 +02:00
|
|
|
|
pytest.raises(ContentRangeError, parse, 'bytes 100-199/*', 99)
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
|
|
|
|
# invalid instance-length
|
2014-04-24 17:08:40 +02:00
|
|
|
|
pytest.raises(ContentRangeError, parse, 'bytes 100-199/199', 100)
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
|
|
|
|
# invalid byte-range-resp-spec
|
2014-04-24 17:08:40 +02:00
|
|
|
|
pytest.raises(ContentRangeError, parse, 'bytes 100-99/199', 100)
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
2014-04-25 11:39:59 +02:00
|
|
|
|
@pytest.mark.parametrize('header, expected_filename', [
|
|
|
|
|
('attachment; filename=hello-WORLD_123.txt', 'hello-WORLD_123.txt'),
|
|
|
|
|
('attachment; filename=".hello-WORLD_123.txt"', 'hello-WORLD_123.txt'),
|
|
|
|
|
('attachment; filename="white space.txt"', 'white space.txt'),
|
|
|
|
|
(r'attachment; filename="\"quotes\".txt"', '"quotes".txt'),
|
|
|
|
|
('attachment; filename=/etc/hosts', 'hosts'),
|
|
|
|
|
('attachment; filename=', None)
|
|
|
|
|
])
|
|
|
|
|
def test_Content_Disposition_parsing(self, header, expected_filename):
|
|
|
|
|
assert filename_from_content_disposition(header) == expected_filename
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
|
|
|
|
def test_filename_from_url(self):
|
2014-04-24 14:58:15 +02:00
|
|
|
|
assert 'foo.txt' == filename_from_url(
|
2014-04-24 14:07:31 +02:00
|
|
|
|
url='http://example.org/foo',
|
|
|
|
|
content_type='text/plain'
|
2014-04-24 14:58:15 +02:00
|
|
|
|
)
|
|
|
|
|
assert 'foo.html' == filename_from_url(
|
2014-04-24 14:07:31 +02:00
|
|
|
|
url='http://example.org/foo',
|
2021-08-05 20:58:43 +02:00
|
|
|
|
content_type='text/html; charset=UTF-8'
|
2014-04-24 14:58:15 +02:00
|
|
|
|
)
|
|
|
|
|
assert 'foo' == filename_from_url(
|
2014-04-24 14:07:31 +02:00
|
|
|
|
url='http://example.org/foo',
|
|
|
|
|
content_type=None
|
2014-04-24 14:58:15 +02:00
|
|
|
|
)
|
|
|
|
|
assert 'foo' == filename_from_url(
|
2014-04-24 14:07:31 +02:00
|
|
|
|
url='http://example.org/foo',
|
|
|
|
|
content_type='x-foo/bar'
|
2014-04-24 14:58:15 +02:00
|
|
|
|
)
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
2016-03-17 08:58:01 +01:00
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
|
'orig_name, unique_on_attempt, expected',
|
|
|
|
|
[
|
|
|
|
|
# Simple
|
|
|
|
|
('foo.bar', 0, 'foo.bar'),
|
|
|
|
|
('foo.bar', 1, 'foo.bar-1'),
|
|
|
|
|
('foo.bar', 10, 'foo.bar-10'),
|
|
|
|
|
# Trim
|
|
|
|
|
('A' * 20, 0, 'A' * 10),
|
|
|
|
|
('A' * 20, 1, 'A' * 8 + '-1'),
|
|
|
|
|
('A' * 20, 10, 'A' * 7 + '-10'),
|
|
|
|
|
# Trim before ext
|
|
|
|
|
('A' * 20 + '.txt', 0, 'A' * 6 + '.txt'),
|
|
|
|
|
('A' * 20 + '.txt', 1, 'A' * 4 + '.txt-1'),
|
|
|
|
|
# Trim at the end
|
|
|
|
|
('foo.' + 'A' * 20, 0, 'foo.' + 'A' * 6),
|
|
|
|
|
('foo.' + 'A' * 20, 1, 'foo.' + 'A' * 4 + '-1'),
|
|
|
|
|
('foo.' + 'A' * 20, 10, 'foo.' + 'A' * 3 + '-10'),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
@mock.patch('httpie.downloads.get_filename_max_length')
|
|
|
|
|
def test_unique_filename(self, get_filename_max_length,
|
|
|
|
|
orig_name, unique_on_attempt,
|
|
|
|
|
expected):
|
2014-04-24 15:48:01 +02:00
|
|
|
|
def attempts(unique_on_attempt=0):
|
2014-04-24 14:07:31 +02:00
|
|
|
|
# noinspection PyUnresolvedReferences,PyUnusedLocal
|
|
|
|
|
def exists(filename):
|
|
|
|
|
if exists.attempt == unique_on_attempt:
|
|
|
|
|
return False
|
|
|
|
|
exists.attempt += 1
|
|
|
|
|
return True
|
2014-04-24 15:48:01 +02:00
|
|
|
|
|
2014-04-24 14:07:31 +02:00
|
|
|
|
exists.attempt = 0
|
|
|
|
|
return exists
|
|
|
|
|
|
2016-03-17 08:58:01 +01:00
|
|
|
|
get_filename_max_length.return_value = 10
|
|
|
|
|
|
|
|
|
|
actual = get_unique_filename(orig_name, attempts(unique_on_attempt))
|
|
|
|
|
assert expected == actual
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
|
|
|
|
|
2024-06-14 13:30:34 +02:00
|
|
|
|
class TestDownloader:
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
2016-03-06 10:42:35 +01:00
|
|
|
|
def test_actual_download(self, httpbin_both, httpbin):
|
|
|
|
|
robots_txt = '/robots.txt'
|
|
|
|
|
body = urlopen(httpbin + robots_txt).read().decode()
|
2022-04-14 16:43:10 +02:00
|
|
|
|
env = MockEnvironment(stdin_isatty=True, stdout_isatty=False, show_displays=True)
|
2016-03-06 10:42:35 +01:00
|
|
|
|
r = http('--download', httpbin_both.url + robots_txt, env=env)
|
2014-04-24 14:58:15 +02:00
|
|
|
|
assert 'Downloading' in r.stderr
|
|
|
|
|
assert body == r
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
2022-04-14 16:43:10 +02:00
|
|
|
|
def test_download_with_Content_Length(self, mock_env, httpbin_both):
|
2019-02-04 10:00:30 +01:00
|
|
|
|
with open(os.devnull, 'w') as devnull:
|
2022-04-14 16:43:10 +02:00
|
|
|
|
downloader = Downloader(mock_env, output_file=devnull)
|
2019-09-03 17:14:39 +02:00
|
|
|
|
downloader.start(
|
|
|
|
|
initial_url='/',
|
|
|
|
|
final_response=Response(
|
|
|
|
|
url=httpbin_both.url + '/',
|
|
|
|
|
headers={'Content-Length': 10}
|
|
|
|
|
)
|
|
|
|
|
)
|
2019-02-04 10:00:30 +01:00
|
|
|
|
time.sleep(1.1)
|
|
|
|
|
downloader.chunk_downloaded(b'12345')
|
|
|
|
|
time.sleep(1.1)
|
|
|
|
|
downloader.chunk_downloaded(b'12345')
|
|
|
|
|
downloader.finish()
|
2024-06-14 13:30:34 +02:00
|
|
|
|
assert not downloader.is_interrupted
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
2022-04-14 16:43:10 +02:00
|
|
|
|
def test_download_no_Content_Length(self, mock_env, httpbin_both):
|
2019-02-04 10:00:30 +01:00
|
|
|
|
with open(os.devnull, 'w') as devnull:
|
2022-04-14 16:43:10 +02:00
|
|
|
|
downloader = Downloader(mock_env, output_file=devnull)
|
2019-09-03 17:14:39 +02:00
|
|
|
|
downloader.start(
|
|
|
|
|
final_response=Response(url=httpbin_both.url + '/'),
|
|
|
|
|
initial_url='/'
|
|
|
|
|
)
|
2019-02-04 10:00:30 +01:00
|
|
|
|
time.sleep(1.1)
|
|
|
|
|
downloader.chunk_downloaded(b'12345')
|
|
|
|
|
downloader.finish()
|
2024-06-14 13:30:34 +02:00
|
|
|
|
assert not downloader.is_interrupted
|
2014-04-24 14:07:31 +02:00
|
|
|
|
|
2022-04-14 16:43:10 +02:00
|
|
|
|
def test_download_output_from_content_disposition(self, mock_env, httpbin_both):
|
2024-06-03 14:35:54 +02:00
|
|
|
|
output_file_name = 'filename.bin'
|
|
|
|
|
with cd_clean_tmp_dir(assert_filenames_after=[output_file_name]):
|
|
|
|
|
downloader = Downloader(mock_env)
|
|
|
|
|
downloader.start(
|
|
|
|
|
final_response=Response(
|
|
|
|
|
url=httpbin_both.url + '/',
|
|
|
|
|
headers={
|
|
|
|
|
'Content-Length': 5,
|
|
|
|
|
'Content-Disposition': f'attachment; filename="{output_file_name}"',
|
|
|
|
|
}
|
|
|
|
|
),
|
|
|
|
|
initial_url='/'
|
|
|
|
|
)
|
|
|
|
|
downloader.chunk_downloaded(b'12345')
|
|
|
|
|
downloader.finish()
|
|
|
|
|
downloader.failed() # Stop the reporter
|
2024-06-14 13:30:34 +02:00
|
|
|
|
assert not downloader.is_interrupted
|
2021-07-26 20:27:36 +02:00
|
|
|
|
|
2024-06-03 14:35:54 +02:00
|
|
|
|
# TODO: Auto-close the file in that case?
|
|
|
|
|
downloader._output_file.close()
|
2021-07-26 20:27:36 +02:00
|
|
|
|
|
2024-06-14 13:30:34 +02:00
|
|
|
|
def test_downloader_is_interrupted(self, mock_env, httpbin_both):
|
2019-02-04 10:00:30 +01:00
|
|
|
|
with open(os.devnull, 'w') as devnull:
|
2022-04-14 16:43:10 +02:00
|
|
|
|
downloader = Downloader(mock_env, output_file=devnull)
|
2019-09-03 17:14:39 +02:00
|
|
|
|
downloader.start(
|
|
|
|
|
final_response=Response(
|
|
|
|
|
url=httpbin_both.url + '/',
|
|
|
|
|
headers={'Content-Length': 5}
|
|
|
|
|
),
|
|
|
|
|
initial_url='/'
|
|
|
|
|
)
|
2019-02-04 10:00:30 +01:00
|
|
|
|
downloader.chunk_downloaded(b'1234')
|
|
|
|
|
downloader.finish()
|
2024-06-14 13:30:34 +02:00
|
|
|
|
assert downloader.is_interrupted
|
2019-06-24 12:19:29 +02:00
|
|
|
|
|
2022-04-14 16:43:10 +02:00
|
|
|
|
def test_download_resumed(self, mock_env, httpbin_both):
|
2021-07-26 20:27:36 +02:00
|
|
|
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
|
|
|
|
file = os.path.join(tmp_dirname, 'file.bin')
|
|
|
|
|
with open(file, 'a'):
|
|
|
|
|
pass
|
|
|
|
|
|
2022-04-14 16:43:10 +02:00
|
|
|
|
with open(file, 'a+b') as output_file:
|
2021-07-26 20:27:36 +02:00
|
|
|
|
# Start and interrupt the transfer after 3 bytes written
|
2022-04-14 16:43:10 +02:00
|
|
|
|
downloader = Downloader(mock_env, output_file=output_file)
|
2021-07-26 20:27:36 +02:00
|
|
|
|
downloader.start(
|
|
|
|
|
final_response=Response(
|
|
|
|
|
url=httpbin_both.url + '/',
|
|
|
|
|
headers={'Content-Length': 5}
|
|
|
|
|
),
|
|
|
|
|
initial_url='/'
|
|
|
|
|
)
|
|
|
|
|
downloader.chunk_downloaded(b'123')
|
|
|
|
|
downloader.finish()
|
|
|
|
|
downloader.failed()
|
2024-06-14 13:30:34 +02:00
|
|
|
|
assert downloader.is_interrupted
|
2021-07-26 20:27:36 +02:00
|
|
|
|
|
|
|
|
|
# Write bytes
|
|
|
|
|
with open(file, 'wb') as fh:
|
|
|
|
|
fh.write(b'123')
|
|
|
|
|
|
2022-04-14 16:43:10 +02:00
|
|
|
|
with open(file, 'a+b') as output_file:
|
2021-07-26 20:27:36 +02:00
|
|
|
|
# Resume the transfer
|
2022-04-14 16:43:10 +02:00
|
|
|
|
downloader = Downloader(mock_env, output_file=output_file, resume=True)
|
2021-07-26 20:27:36 +02:00
|
|
|
|
|
|
|
|
|
# Ensure `pre_request()` is working as expected too
|
|
|
|
|
headers = {}
|
|
|
|
|
downloader.pre_request(headers)
|
|
|
|
|
assert headers['Accept-Encoding'] == 'identity'
|
|
|
|
|
assert headers['Range'] == 'bytes=3-'
|
|
|
|
|
|
|
|
|
|
downloader.start(
|
|
|
|
|
final_response=Response(
|
|
|
|
|
url=httpbin_both.url + '/',
|
2024-06-03 14:35:54 +02:00
|
|
|
|
headers={
|
|
|
|
|
'Content-Length': 5,
|
|
|
|
|
'Content-Range': 'bytes 3-4/5',
|
|
|
|
|
},
|
2021-07-26 20:27:36 +02:00
|
|
|
|
status_code=PARTIAL_CONTENT
|
|
|
|
|
),
|
|
|
|
|
initial_url='/'
|
|
|
|
|
)
|
|
|
|
|
downloader.chunk_downloaded(b'45')
|
|
|
|
|
downloader.finish()
|
|
|
|
|
|
2019-06-24 12:19:29 +02:00
|
|
|
|
def test_download_with_redirect_original_url_used_for_filename(self, httpbin):
|
|
|
|
|
# Redirect from `/redirect/1` to `/get`.
|
|
|
|
|
expected_filename = '1.json'
|
2024-06-03 14:35:54 +02:00
|
|
|
|
with cd_clean_tmp_dir(assert_filenames_after=[expected_filename]):
|
|
|
|
|
http('--download', httpbin + '/redirect/1')
|
2024-05-23 06:09:57 +02:00
|
|
|
|
|
|
|
|
|
def test_download_gzip_content_encoding(self, httpbin):
|
2024-06-03 14:35:54 +02:00
|
|
|
|
expected_filename = 'gzip.json'
|
|
|
|
|
with cd_clean_tmp_dir(assert_filenames_after=[expected_filename]):
|
|
|
|
|
r = http('--download', httpbin + '/gzip')
|
2024-05-23 06:09:57 +02:00
|
|
|
|
assert r.exit_status == 0
|
2024-06-14 13:30:34 +02:00
|
|
|
|
|
|
|
|
|
@responses.activate
|
|
|
|
|
def test_incomplete_response(self):
|
|
|
|
|
# We have incompleteness checks in the downloader, but it might not be needed as it’s built into (ni|req)uests.
|
|
|
|
|
error_msg = 'peer closed connection without sending complete message body (received 2 bytes, expected 1 more)'
|
|
|
|
|
responses.add(
|
|
|
|
|
method=responses.GET,
|
|
|
|
|
url=DUMMY_URL,
|
|
|
|
|
headers={
|
|
|
|
|
'Content-Length': '3',
|
|
|
|
|
},
|
|
|
|
|
body='12',
|
|
|
|
|
)
|
2024-06-14 16:33:00 +02:00
|
|
|
|
with cd_clean_tmp_dir(), pytest.raises(Exception) as exc_info:
|
2024-06-14 13:30:34 +02:00
|
|
|
|
http('--download', DUMMY_URL)
|
|
|
|
|
assert error_msg in str(exc_info.value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDecodedDownloads:
|
|
|
|
|
"""Test downloading responses with `Content-Encoding`"""
|
|
|
|
|
|
|
|
|
|
@responses.activate
|
|
|
|
|
def test_decoded_response_no_content_length(self):
|
|
|
|
|
responses.add(
|
|
|
|
|
method=responses.GET,
|
|
|
|
|
url=DUMMY_URL,
|
|
|
|
|
headers={
|
|
|
|
|
'Content-Encoding': 'gzip, br',
|
|
|
|
|
},
|
|
|
|
|
body='123',
|
|
|
|
|
)
|
|
|
|
|
with cd_clean_tmp_dir():
|
|
|
|
|
r = http('--download', '--headers', DUMMY_URL)
|
|
|
|
|
assert DECODED_FROM_SUFFIX.format(encodings='`gzip`, `br`') in r.stderr
|
|
|
|
|
assert DECODED_SIZE_NOTE_SUFFIX in r.stderr
|
|
|
|
|
print(r.stderr)
|
|
|
|
|
|
|
|
|
|
@responses.activate
|
|
|
|
|
def test_decoded_response_with_content_length(self):
|
|
|
|
|
responses.add(
|
|
|
|
|
method=responses.GET,
|
|
|
|
|
url=DUMMY_URL,
|
|
|
|
|
headers={
|
|
|
|
|
'Content-Encoding': 'gzip, br',
|
|
|
|
|
'Content-Length': '3',
|
|
|
|
|
},
|
|
|
|
|
body='123',
|
|
|
|
|
)
|
|
|
|
|
with cd_clean_tmp_dir():
|
|
|
|
|
r = http('--download', DUMMY_URL)
|
|
|
|
|
assert DECODED_FROM_SUFFIX.format(encodings='`gzip`, `br`') in r.stderr
|
|
|
|
|
assert DECODED_SIZE_NOTE_SUFFIX in r.stderr
|
|
|
|
|
print(r.stderr)
|
|
|
|
|
|
|
|
|
|
@responses.activate
|
|
|
|
|
def test_decoded_response_without_content_length(self):
|
|
|
|
|
responses.add(
|
|
|
|
|
method=responses.GET,
|
|
|
|
|
url=DUMMY_URL,
|
|
|
|
|
headers={
|
|
|
|
|
'Content-Encoding': 'gzip, br',
|
|
|
|
|
},
|
|
|
|
|
body='123',
|
|
|
|
|
)
|
|
|
|
|
with cd_clean_tmp_dir():
|
|
|
|
|
r = http('--download', DUMMY_URL)
|
|
|
|
|
assert DECODED_FROM_SUFFIX.format(encodings='`gzip`, `br`') in r.stderr
|
|
|
|
|
assert DECODED_SIZE_NOTE_SUFFIX in r.stderr
|
|
|
|
|
print(r.stderr)
|
|
|
|
|
|
|
|
|
|
@responses.activate
|
|
|
|
|
def test_non_decoded_response_without_content_length(self):
|
|
|
|
|
responses.add(
|
|
|
|
|
method=responses.GET,
|
|
|
|
|
url=DUMMY_URL,
|
|
|
|
|
headers={
|
|
|
|
|
'Content-Length': '3',
|
|
|
|
|
},
|
|
|
|
|
body='123',
|
|
|
|
|
)
|
|
|
|
|
with cd_clean_tmp_dir():
|
|
|
|
|
r = http('--download', DUMMY_URL)
|
|
|
|
|
assert DECODED_SIZE_NOTE_SUFFIX not in r.stderr
|
|
|
|
|
print(r.stderr)
|
|
|
|
|
|
|
|
|
|
@responses.activate
|
|
|
|
|
def test_non_decoded_response_with_content_length(self):
|
|
|
|
|
responses.add(
|
|
|
|
|
method=responses.GET,
|
|
|
|
|
url=DUMMY_URL,
|
|
|
|
|
headers={
|
|
|
|
|
},
|
|
|
|
|
body='123',
|
|
|
|
|
)
|
|
|
|
|
with cd_clean_tmp_dir():
|
|
|
|
|
r = http('--download', DUMMY_URL)
|
|
|
|
|
assert DECODED_SIZE_NOTE_SUFFIX not in r.stderr
|
|
|
|
|
print(r.stderr)
|