httpie-cli/httpie/client.py

368 lines
12 KiB
Python
Raw Normal View History

import argparse
import http.client
import json
import sys
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Dict, Callable, Iterable
2020-04-13 20:18:01 +02:00
from urllib.parse import urlparse, urlunparse
import requests
2020-05-23 20:30:25 +02:00
# noinspection PyPackageRequirements
import urllib3
from . import __version__
from .adapters import HTTPieHTTPAdapter
from .cli.dicts import HTTPHeadersDict
from .encoding import UTF8
from .models import RequestsMessage
from .plugins.registry import plugin_manager
from .sessions import get_httpie_session
from .ssl_ import AVAILABLE_SSL_VERSION_ARG_MAPPING, HTTPieHTTPSAdapter
from .uploads import (
compress_request, prepare_request_body,
2020-09-25 13:44:28 +02:00
get_multipart_data_and_content_type,
)
from .utils import get_expired_cookies, repr_dict
2020-05-23 20:30:25 +02:00
urllib3.disable_warnings()
FORM_CONTENT_TYPE = f'application/x-www-form-urlencoded; charset={UTF8}'
JSON_CONTENT_TYPE = 'application/json'
JSON_ACCEPT = f'{JSON_CONTENT_TYPE}, */*;q=0.5'
DEFAULT_UA = f'HTTPie/{__version__}'
def collect_messages(
args: argparse.Namespace,
config_dir: Path,
request_body_read_callback: Callable[[bytes], None] = None,
) -> Iterable[RequestsMessage]:
httpie_session = None
httpie_session_headers = None
if args.session or args.session_read_only:
httpie_session = get_httpie_session(
config_dir=config_dir,
session_name=args.session or args.session_read_only,
host=args.headers.get('Host'),
url=args.url,
)
httpie_session_headers = httpie_session.headers
request_kwargs = make_request_kwargs(
args=args,
base_headers=httpie_session_headers,
request_body_read_callback=request_body_read_callback
)
send_kwargs = make_send_kwargs(args)
send_kwargs_mergeable_from_env = make_send_kwargs_mergeable_from_env(args)
requests_session = build_requests_session(
ssl_version=args.ssl_version,
2020-05-23 13:26:06 +02:00
ciphers=args.ciphers,
2020-05-23 20:19:16 +02:00
verify=bool(send_kwargs_mergeable_from_env['verify'])
)
if httpie_session:
httpie_session.update_headers(request_kwargs['headers'])
requests_session.cookies = httpie_session.cookies
if args.auth_plugin:
# Save auth from CLI to HTTPie session.
httpie_session.auth = {
'type': args.auth_plugin.auth_type,
'raw_auth': args.auth_plugin.raw_auth,
}
elif httpie_session.auth:
# Apply auth from HTTPie session
request_kwargs['auth'] = httpie_session.auth
if args.debug:
# TODO: reflect the split between request and send kwargs.
dump_request(request_kwargs)
request = requests.Request(**request_kwargs)
prepared_request = requests_session.prepare_request(request)
apply_missing_repeated_headers(prepared_request, request.headers)
2020-04-13 20:18:01 +02:00
if args.path_as_is:
prepared_request.url = ensure_path_as_is(
orig_url=args.url,
prepped_url=prepared_request.url,
)
2019-09-04 00:00:03 +02:00
if args.compress and prepared_request.body:
compress_request(
request=prepared_request,
always=args.compress > 1,
)
response_count = 0
expired_cookies = []
while prepared_request:
yield prepared_request
if not args.offline:
send_kwargs_merged = requests_session.merge_environment_settings(
url=prepared_request.url,
**send_kwargs_mergeable_from_env,
)
with max_headers(args.max_headers):
response = requests_session.send(
request=prepared_request,
**send_kwargs_merged,
**send_kwargs,
)
2020-06-15 23:02:16 +02:00
expired_cookies += get_expired_cookies(
Add internal support for file-like object responses to improve adapter plugin support (#1094) * Support `requests.response.raw` being a file-like object Previously HTTPie relied on `requests.models.Response.raw` being `urllib3.HTTPResponse`. The `requests` documentation specifies that (requests.models.Response.raw)[https://docs.python-requests.org/en/master/api/#requests.Response.raw] is a file-like object but allows for other types for internal use. This change introduces graceful handling for scenarios when `requests.models.Response.raw` is not `urllib3.HTTPResponse`. In such a scenario HTTPie now falls back to extracting metadata from `requests.models.Response` directly instead of direct access from protected protected members such as `response.raw._original_response`. A side effect in this fallback procedure is that we can no longer determine HTTP protocol version and report it as `1.1`. This change is necessary to make it possible to implement `TransportPlugins` without having to also needing to emulate internal behavior of `urlib3` and `http.client`. * Load cookies from `response.headers` instead of `response.raw._original_response.msg._headers` `response.cookies` was not utilized as it not possible to construct original payload from `http.cookiejar.Cookie`. Data is stored in lossy format. For example `Cookie.secure` defaults to `False` so we cannot distinguish if `Cookie.secure` was set to `False` or was not set at all. Same problem applies to other fields also. * Simpler HTTP envelope data extraction * Test cookie extraction and make cookie presentment backwards compatible Co-authored-by: Mickaël Schoentgen <contact@tiger-222.fr> Co-authored-by: Jakub Roztocil <jakub@roztocil.co>
2021-07-06 21:00:06 +02:00
response.headers.get('Set-Cookie', '')
2020-06-15 23:02:16 +02:00
)
response_count += 1
if response.next:
if args.max_redirects and response_count == args.max_redirects:
raise requests.TooManyRedirects
if args.follow:
prepared_request = response.next
if args.all:
yield response
continue
yield response
break
if httpie_session:
if httpie_session.is_new() or not args.session_read_only:
httpie_session.cookies = requests_session.cookies
httpie_session.remove_cookies(
2020-06-15 23:02:16 +02:00
# TODO: take path & domain into account?
cookie['name'] for cookie in expired_cookies
)
httpie_session.save()
# noinspection PyProtectedMember
@contextmanager
def max_headers(limit):
2020-12-23 22:07:27 +01:00
# <https://github.com/httpie/httpie/issues/802>
2020-05-23 13:26:06 +02:00
# noinspection PyUnresolvedReferences
orig = http.client._MAXHEADERS
http.client._MAXHEADERS = limit or float('Inf')
try:
yield
finally:
http.client._MAXHEADERS = orig
2019-09-01 11:38:14 +02:00
def build_requests_session(
2020-05-23 20:19:16 +02:00
verify: bool,
ssl_version: str = None,
2020-05-23 13:26:06 +02:00
ciphers: str = None,
2019-09-01 11:38:14 +02:00
) -> requests.Session:
requests_session = requests.Session()
2019-09-01 11:38:14 +02:00
# Install our adapter.
http_adapter = HTTPieHTTPAdapter()
2020-05-23 20:19:16 +02:00
https_adapter = HTTPieHTTPSAdapter(
2020-05-23 13:26:06 +02:00
ciphers=ciphers,
2020-05-23 20:19:16 +02:00
verify=verify,
2019-09-04 00:00:03 +02:00
ssl_version=(
2020-05-23 13:26:06 +02:00
AVAILABLE_SSL_VERSION_ARG_MAPPING[ssl_version]
2019-09-04 00:00:03 +02:00
if ssl_version else None
2020-05-23 20:19:16 +02:00
),
)
requests_session.mount('http://', http_adapter)
2020-05-23 20:19:16 +02:00
requests_session.mount('https://', https_adapter)
2019-09-01 11:38:14 +02:00
# Install adapters from plugins.
for plugin_cls in plugin_manager.get_transport_plugins():
transport_plugin = plugin_cls()
requests_session.mount(
prefix=transport_plugin.prefix,
adapter=transport_plugin.get_adapter(),
)
return requests_session
def dump_request(kwargs: dict):
2019-08-31 18:00:03 +02:00
sys.stderr.write(
f'\n>>> requests.request(**{repr_dict(kwargs)})\n\n')
def finalize_headers(headers: HTTPHeadersDict) -> HTTPHeadersDict:
final_headers = HTTPHeadersDict()
2016-08-13 22:40:01 +02:00
for name, value in headers.items():
if value is not None:
2019-12-03 19:09:09 +01:00
# “leading or trailing LWS MAY be removed without
# changing the semantics of the field value”
# <https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html>
2016-08-13 22:40:01 +02:00
# Also, requests raises `InvalidHeader` for leading spaces.
value = value.strip()
if isinstance(value, str):
2020-12-23 22:07:27 +01:00
# See <https://github.com/httpie/httpie/issues/212>
value = value.encode()
final_headers.add(name, value)
2016-08-13 22:40:01 +02:00
return final_headers
def apply_missing_repeated_headers(
prepared_request: requests.PreparedRequest,
original_headers: HTTPHeadersDict
) -> None:
"""Update the given `prepared_request`'s headers with the original
ones. This allows the requests to be prepared as usual, and then later
merged with headers that are specified multiple times."""
new_headers = HTTPHeadersDict(prepared_request.headers)
for prepared_name, prepared_value in prepared_request.headers.items():
if prepared_name not in original_headers:
continue
original_keys, original_values = zip(*filter(
lambda item: item[0].casefold() == prepared_name.casefold(),
original_headers.items()
))
if prepared_value not in original_values:
# If the current value is not among the initial values
# set for this field, then it means that this field got
# overridden on the way, and we should preserve it.
continue
new_headers.popone(prepared_name)
new_headers.update(zip(original_keys, original_values))
prepared_request.headers = new_headers
def make_default_headers(args: argparse.Namespace) -> HTTPHeadersDict:
default_headers = HTTPHeadersDict({
'User-Agent': DEFAULT_UA
})
auto_json = args.data and not args.form
if args.json or auto_json:
default_headers['Accept'] = JSON_ACCEPT
if args.json or (auto_json and args.data):
default_headers['Content-Type'] = JSON_CONTENT_TYPE
elif args.form and not args.files:
2012-09-07 12:48:59 +02:00
# If sending files, `requests` will set
# the `Content-Type` for us.
default_headers['Content-Type'] = FORM_CONTENT_TYPE
return default_headers
def make_send_kwargs(args: argparse.Namespace) -> dict:
return {
'timeout': args.timeout or None,
'allow_redirects': False,
}
def make_send_kwargs_mergeable_from_env(args: argparse.Namespace) -> dict:
cert = None
if args.cert:
cert = args.cert
if args.cert_key:
cert = cert, args.cert_key
return {
'proxies': {p.key: p.value for p in args.proxy},
'stream': True,
'verify': {
'yes': True,
'true': True,
'no': False,
'false': False,
}.get(args.verify.lower(), args.verify),
'cert': cert,
}
def json_dict_to_request_body(data: Dict[str, Any]) -> str:
# Propagate the top-level list if there is only one
# item in the object, with an en empty key.
if len(data) == 1:
[(key, value)] = data.items()
if key == '' and isinstance(value, list):
data = value
if data:
data = json.dumps(data)
else:
# We need to set data to an empty string to prevent requests
# from assigning an empty list to `response.request.data`.
data = ''
return data
def make_request_kwargs(
2019-09-01 21:15:39 +02:00
args: argparse.Namespace,
base_headers: HTTPHeadersDict = None,
request_body_read_callback=lambda chunk: chunk
2019-09-01 21:15:39 +02:00
) -> dict:
"""
Translate our `args` into `requests.Request` keyword arguments.
"""
files = args.files
# Serialize JSON data, if needed.
data = args.data
auto_json = data and not args.form
if (args.json or auto_json) and isinstance(data, dict):
data = json_dict_to_request_body(data)
# Finalize headers.
2019-09-01 11:38:14 +02:00
headers = make_default_headers(args)
if base_headers:
headers.update(base_headers)
headers.update(args.headers)
if args.offline and args.chunked and 'Transfer-Encoding' not in headers:
# When online, we let requests set the header instead to be able more
# easily verify chunking is taking place.
headers['Transfer-Encoding'] = 'chunked'
2016-08-13 22:40:01 +02:00
headers = finalize_headers(headers)
2020-09-25 14:44:22 +02:00
if (args.form and files) or args.multipart:
2020-08-19 10:22:42 +02:00
data, headers['Content-Type'] = get_multipart_data_and_content_type(
data=args.multipart_data,
2020-08-19 10:22:42 +02:00
boundary=args.boundary,
content_type=args.headers.get('Content-Type'),
)
return {
'method': args.method.lower(),
'url': args.url,
'headers': headers,
'data': prepare_request_body(
data,
body_read_callback=request_body_read_callback,
chunked=args.chunked,
offline=args.offline,
content_length_header_value=headers.get('Content-Length'),
),
'auth': args.auth,
'params': args.params.items(),
}
2020-04-13 20:18:01 +02:00
def ensure_path_as_is(orig_url: str, prepped_url: str) -> str:
"""
Handle `--path-as-is` by replacing the path component of the prepared
URL with the path component from the original URL. Other parts stay
untouched because other (welcome) processing on the URL might have
taken place.
2020-12-23 22:07:27 +01:00
<https://github.com/httpie/httpie/issues/895>
2020-04-13 20:18:01 +02:00
<https://ec.haxx.se/http/http-basics#path-as-is>
<https://curl.haxx.se/libcurl/c/CURLOPT_PATH_AS_IS.html>
>>> ensure_path_as_is('http://foo/../', 'http://foo/?foo=bar')
'http://foo/../?foo=bar'
"""
parsed_orig, parsed_prepped = urlparse(orig_url), urlparse(prepped_url)
final_dict = {
# noinspection PyProtectedMember
**parsed_prepped._asdict(),
'path': parsed_orig.path,
}
return urlunparse(tuple(final_dict.values()))