2015-01-23 22:04:42 +01:00
|
|
|
import json
|
2019-08-31 15:17:10 +02:00
|
|
|
import mimetypes
|
2021-09-21 19:07:59 +02:00
|
|
|
import re
|
|
|
|
import sys
|
2020-06-15 22:28:04 +02:00
|
|
|
import time
|
2017-12-28 18:03:13 +01:00
|
|
|
from collections import OrderedDict
|
2020-06-15 22:28:04 +02:00
|
|
|
from http.cookiejar import parse_ns_headers
|
2019-08-31 18:00:03 +02:00
|
|
|
from pprint import pformat
|
2021-09-21 19:07:59 +02:00
|
|
|
from typing import Any, List, Optional, Tuple
|
2015-01-23 22:04:42 +01:00
|
|
|
|
2019-08-31 12:09:17 +02:00
|
|
|
import requests.auth
|
|
|
|
|
2021-07-06 21:00:06 +02:00
|
|
|
RE_COOKIE_SPLIT = re.compile(r', (?=[^ ;]+=)')
|
2021-09-21 19:07:59 +02:00
|
|
|
Item = Tuple[str, Any]
|
|
|
|
Items = List[Item]
|
|
|
|
|
|
|
|
|
|
|
|
class JsonDictPreservingDuplicateKeys(OrderedDict):
|
|
|
|
"""A specialized JSON dict preserving duplicate keys.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Python versions prior to 3.8 suffer from an issue with multiple keys with the same name.
|
|
|
|
# `json.dumps(obj, indent=N, sort_keys=True)` will output sorted keys when they are unique, and
|
|
|
|
# duplicate keys will be outputted as they were defined in the original data.
|
|
|
|
# See <https://bugs.python.org/issue23493#msg400929> for the behavior change between Python versions.
|
|
|
|
SUPPORTS_SORTING = sys.version_info >= (3, 8)
|
|
|
|
|
|
|
|
def __init__(self, items: Items):
|
|
|
|
self._items = items
|
|
|
|
self._ensure_items_used()
|
|
|
|
|
|
|
|
def _ensure_items_used(self) -> None:
|
|
|
|
"""HACK: Force `json.dumps()` to use `self.items()` instead of an empty dict.
|
|
|
|
|
|
|
|
Two JSON encoders are available on CPython: pure-Python (1) and C (2) implementations.
|
|
|
|
|
|
|
|
(1) The pure-python implementation will do a simple `if not dict: return '{}'`,
|
|
|
|
and we could fake that check by implementing the `__bool__()` method.
|
|
|
|
Source:
|
|
|
|
- <https://github.com/python/cpython/blob/9d318ad/Lib/json/encoder.py#L334-L336>
|
|
|
|
|
|
|
|
(2) On the other hand, the C implementation will do a check on the number of
|
|
|
|
items contained inside the dict, using a verification on `dict->ma_used`, which
|
|
|
|
is updated only when an item is added/removed from the dict. For that case,
|
|
|
|
there is no workaround but to add an item into the dict.
|
|
|
|
Sources:
|
|
|
|
- <https://github.com/python/cpython/blob/9d318ad/Modules/_json.c#L1581-L1582>
|
|
|
|
- <https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L53>
|
|
|
|
- <https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L17-L18>
|
|
|
|
|
|
|
|
To please both implementations, we simply add one item to the dict.
|
|
|
|
|
|
|
|
"""
|
|
|
|
if self._items:
|
|
|
|
self['__hack__'] = '__hack__'
|
|
|
|
|
|
|
|
def items(self) -> Items:
|
|
|
|
"""Return all items, duplicate ones included.
|
|
|
|
|
|
|
|
"""
|
|
|
|
return self._items
|
2021-07-06 21:00:06 +02:00
|
|
|
|
2015-01-23 22:04:42 +01:00
|
|
|
|
2021-09-21 19:07:59 +02:00
|
|
|
def load_json_preserve_order_and_dupe_keys(s):
|
|
|
|
return json.loads(s, object_pairs_hook=JsonDictPreservingDuplicateKeys)
|
2013-02-26 15:12:33 +01:00
|
|
|
|
|
|
|
|
2019-08-31 18:00:03 +02:00
|
|
|
def repr_dict(d: dict) -> str:
|
|
|
|
return pformat(d)
|
2016-03-04 18:42:13 +01:00
|
|
|
|
|
|
|
|
2013-03-04 02:35:01 +01:00
|
|
|
def humanize_bytes(n, precision=2):
|
2013-04-10 16:48:18 +02:00
|
|
|
# Author: Doug Latornell
|
|
|
|
# Licence: MIT
|
2019-08-30 10:07:01 +02:00
|
|
|
# URL: https://code.activestate.com/recipes/577081/
|
2013-02-26 15:12:33 +01:00
|
|
|
"""Return a humanized string representation of a number of bytes.
|
|
|
|
|
|
|
|
>>> humanize_bytes(1)
|
2014-04-24 17:08:40 +02:00
|
|
|
'1 B'
|
|
|
|
>>> humanize_bytes(1024, precision=1)
|
2013-02-26 15:12:33 +01:00
|
|
|
'1.0 kB'
|
2014-04-24 17:08:40 +02:00
|
|
|
>>> humanize_bytes(1024 * 123, precision=1)
|
2013-02-26 15:12:33 +01:00
|
|
|
'123.0 kB'
|
2014-04-24 17:08:40 +02:00
|
|
|
>>> humanize_bytes(1024 * 12342, precision=1)
|
2013-02-26 15:12:33 +01:00
|
|
|
'12.1 MB'
|
2014-04-24 17:08:40 +02:00
|
|
|
>>> humanize_bytes(1024 * 12342, precision=2)
|
2013-02-26 15:12:33 +01:00
|
|
|
'12.05 MB'
|
2014-04-24 17:08:40 +02:00
|
|
|
>>> humanize_bytes(1024 * 1234, precision=2)
|
2013-02-26 15:12:33 +01:00
|
|
|
'1.21 MB'
|
2014-04-24 17:08:40 +02:00
|
|
|
>>> humanize_bytes(1024 * 1234 * 1111, precision=2)
|
2013-02-26 15:12:33 +01:00
|
|
|
'1.31 GB'
|
2014-04-24 17:08:40 +02:00
|
|
|
>>> humanize_bytes(1024 * 1234 * 1111, precision=1)
|
2013-02-26 15:12:33 +01:00
|
|
|
'1.3 GB'
|
|
|
|
|
|
|
|
"""
|
|
|
|
abbrevs = [
|
|
|
|
(1 << 50, 'PB'),
|
|
|
|
(1 << 40, 'TB'),
|
|
|
|
(1 << 30, 'GB'),
|
|
|
|
(1 << 20, 'MB'),
|
|
|
|
(1 << 10, 'kB'),
|
2013-04-13 02:49:27 +02:00
|
|
|
(1, 'B')
|
2013-02-26 15:12:33 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
if n == 1:
|
2013-04-15 05:56:47 +02:00
|
|
|
return '1 B'
|
2013-02-26 15:12:33 +01:00
|
|
|
|
|
|
|
for factor, suffix in abbrevs:
|
|
|
|
if n >= factor:
|
|
|
|
break
|
|
|
|
|
2014-04-27 00:07:13 +02:00
|
|
|
# noinspection PyUnboundLocalVariable
|
2021-05-25 20:49:07 +02:00
|
|
|
return f'{n / factor:.{precision}f} {suffix}'
|
2019-08-31 12:09:17 +02:00
|
|
|
|
|
|
|
|
|
|
|
class ExplicitNullAuth(requests.auth.AuthBase):
|
|
|
|
"""Forces requests to ignore the ``.netrc``.
|
|
|
|
<https://github.com/psf/requests/issues/2773#issuecomment-174312831>
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __call__(self, r):
|
|
|
|
return r
|
2019-08-31 15:17:10 +02:00
|
|
|
|
|
|
|
|
|
|
|
def get_content_type(filename):
|
|
|
|
"""
|
|
|
|
Return the content type for ``filename`` in format appropriate
|
|
|
|
for Content-Type headers, or ``None`` if the file type is unknown
|
|
|
|
to ``mimetypes``.
|
|
|
|
|
|
|
|
"""
|
2021-08-06 12:35:38 +02:00
|
|
|
return mimetypes.guess_type(filename, strict=False)[0]
|
2020-06-15 22:28:04 +02:00
|
|
|
|
|
|
|
|
2021-07-06 21:00:06 +02:00
|
|
|
def split_cookies(cookies):
|
|
|
|
"""
|
|
|
|
When ``requests`` stores cookies in ``response.headers['Set-Cookie']``
|
|
|
|
it concatenates all of them through ``, ``.
|
|
|
|
|
|
|
|
This function splits cookies apart being careful to not to
|
|
|
|
split on ``, `` which may be part of cookie value.
|
|
|
|
"""
|
|
|
|
if not cookies:
|
|
|
|
return []
|
|
|
|
return RE_COOKIE_SPLIT.split(cookies)
|
|
|
|
|
|
|
|
|
2020-06-15 23:02:16 +02:00
|
|
|
def get_expired_cookies(
|
2021-07-06 21:00:06 +02:00
|
|
|
cookies: str,
|
2020-06-15 23:02:16 +02:00
|
|
|
now: float = None
|
|
|
|
) -> List[dict]:
|
2020-09-20 09:21:10 +02:00
|
|
|
|
2020-06-15 23:02:16 +02:00
|
|
|
now = now or time.time()
|
2020-09-20 09:21:10 +02:00
|
|
|
|
|
|
|
def is_expired(expires: Optional[float]) -> bool:
|
|
|
|
return expires is not None and expires <= now
|
|
|
|
|
2020-06-15 23:02:16 +02:00
|
|
|
attr_sets: List[Tuple[str, str]] = parse_ns_headers(
|
2021-07-06 21:00:06 +02:00
|
|
|
split_cookies(cookies)
|
2020-06-15 23:02:16 +02:00
|
|
|
)
|
2021-07-06 21:00:06 +02:00
|
|
|
|
2020-06-15 23:02:16 +02:00
|
|
|
cookies = [
|
|
|
|
# The first attr name is the cookie name.
|
|
|
|
dict(attrs[1:], name=attrs[0][0])
|
|
|
|
for attrs in attr_sets
|
|
|
|
]
|
2020-09-20 09:21:10 +02:00
|
|
|
|
2021-02-06 10:58:36 +01:00
|
|
|
_max_age_to_expires(cookies=cookies, now=now)
|
2021-02-06 10:50:34 +01:00
|
|
|
|
2020-06-15 23:02:16 +02:00
|
|
|
return [
|
|
|
|
{
|
|
|
|
'name': cookie['name'],
|
|
|
|
'path': cookie.get('path', '/')
|
|
|
|
}
|
|
|
|
for cookie in cookies
|
2020-09-20 09:21:10 +02:00
|
|
|
if is_expired(expires=cookie.get('expires'))
|
2020-06-15 22:28:04 +02:00
|
|
|
]
|
2021-02-06 10:58:36 +01:00
|
|
|
|
|
|
|
|
|
|
|
def _max_age_to_expires(cookies, now):
|
|
|
|
"""
|
|
|
|
Translate `max-age` into `expires` for Requests to take it into account.
|
|
|
|
|
|
|
|
HACK/FIXME: <https://github.com/psf/requests/issues/5743>
|
|
|
|
|
|
|
|
"""
|
|
|
|
for cookie in cookies:
|
|
|
|
if 'expires' in cookie:
|
|
|
|
continue
|
|
|
|
max_age = cookie.get('max-age')
|
|
|
|
if max_age and max_age.isdigit():
|
|
|
|
cookie['expires'] = now + float(max_age)
|