wttr.in/bin/proxy.py

292 lines
9.4 KiB
Python
Raw Normal View History

2018-10-23 20:04:25 +02:00
#vim: fileencoding=utf-8
2018-10-02 23:56:11 +02:00
2018-10-23 20:04:25 +02:00
"""
The proxy server acts as a backend for the wttr.in service.
It caches the answers and handles various data sources transforming their
answers into format supported by the wttr.in service.
2020-04-26 19:36:47 +02:00
If WTTRIN_TEST is specified, it works in a special test mode:
it does not fetch and does not store the data in the cache,
but is using the fake data from "test/proxy-data".
2018-10-23 20:04:25 +02:00
"""
from __future__ import print_function
2018-10-23 20:04:25 +02:00
from gevent.pywsgi import WSGIServer
2018-10-02 23:56:11 +02:00
from gevent.monkey import patch_all
patch_all()
2018-10-02 23:56:11 +02:00
2018-10-23 20:04:25 +02:00
# pylint: disable=wrong-import-position,wrong-import-order
import sys
2018-10-02 23:56:11 +02:00
import os
import time
import json
2020-05-01 16:07:23 +02:00
import hashlib
2018-10-02 23:56:11 +02:00
import requests
import cyrtranslit
2018-10-23 20:04:25 +02:00
from flask import Flask, request
APP = Flask(__name__)
MYDIR = os.path.abspath(
os.path.dirname(os.path.dirname('__file__')))
sys.path.append("%s/lib/" % MYDIR)
2020-10-08 23:09:58 +02:00
from globals import PROXY_CACHEDIR, PROXY_HOST, PROXY_PORT, USE_METNO, USER_AGENT
from metno import create_standard_json_from_metno, metno_request
2018-10-23 20:04:25 +02:00
from translations import PROXY_LANGS
# pylint: enable=wrong-import-position
2020-04-26 19:36:47 +02:00
def is_testmode():
"""Server is running in the wttr.in test mode"""
2018-10-02 23:56:11 +02:00
2020-04-26 19:36:47 +02:00
return "WTTRIN_TEST" in os.environ
2018-10-02 23:56:11 +02:00
def load_translations():
"""
load all translations
"""
translations = {}
2018-10-23 20:04:25 +02:00
for f_name in PROXY_LANGS:
2018-10-02 23:56:11 +02:00
f_name = 'share/translations/%s.txt' % f_name
translation = {}
lang = f_name.split('/')[-1].split('.', 1)[0]
with open(f_name, "r") as f_file:
for line in f_file:
if ':' not in line:
continue
if line.count(':') == 3:
_, trans, orig, _ = line.strip().split(':', 4)
else:
_, trans, orig = line.strip().split(':', 3)
trans = trans.strip()
orig = orig.strip()
translation[orig] = trans
translations[lang] = translation
return translations
TRANSLATIONS = load_translations()
2020-10-08 23:09:58 +02:00
def _is_metno():
return USE_METNO
2018-10-23 20:04:25 +02:00
def _find_srv_for_query(path, query): # pylint: disable=unused-argument
2020-10-08 23:09:58 +02:00
if _is_metno():
return 'https://api.met.no'
2019-08-25 20:28:19 +02:00
return 'http://api.worldweatheronline.com'
2018-10-02 23:56:11 +02:00
2020-05-01 16:07:23 +02:00
def _cache_file(path, query):
"""Return cache file name for specified `path` and `query`
and for the current time.
Do smooth load on the server, expiration time
is slightly varied basing on the path+query sha1 hash digest.
"""
digest = hashlib.sha1(("%s %s" % (path, query)).encode("utf-8")).hexdigest()
2020-05-07 09:43:42 +02:00
digest_number = ord(digest[0].upper())
2020-09-20 09:59:15 +02:00
expiry_interval = 60*(digest_number+40)
2020-05-01 16:07:23 +02:00
2020-05-07 09:43:42 +02:00
timestamp = "%010d" % (int(time.time())//expiry_interval*expiry_interval)
2020-05-01 16:07:23 +02:00
filename = os.path.join(PROXY_CACHEDIR, timestamp, path, query)
return filename
2018-10-23 20:04:25 +02:00
def _load_content_and_headers(path, query):
2020-04-26 19:36:47 +02:00
if is_testmode():
cache_file = "test/proxy-data/data1"
else:
2020-05-01 16:07:23 +02:00
cache_file = _cache_file(path, query)
2018-10-02 23:56:11 +02:00
try:
2018-10-23 20:04:25 +02:00
return (open(cache_file, 'r').read(),
json.loads(open(cache_file+".headers", 'r').read()))
except IOError:
2018-10-02 23:56:11 +02:00
return None, None
2020-05-01 16:07:23 +02:00
def _touch_empty_file(path, query):
cache_file = _cache_file(path, query)
2019-08-25 20:37:24 +02:00
cache_dir = os.path.dirname(cache_file)
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
open(cache_file, 'w').write("")
2018-10-23 20:04:25 +02:00
def _save_content_and_headers(path, query, content, headers):
2020-05-01 16:07:23 +02:00
cache_file = _cache_file(path, query)
2018-10-23 20:04:25 +02:00
cache_dir = os.path.dirname(cache_file)
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
open(cache_file + ".headers", 'w').write(json.dumps(headers))
2020-05-01 17:49:41 +02:00
open(cache_file, 'wb').write(content)
2018-10-02 23:56:11 +02:00
def translate(text, lang):
2018-10-23 20:04:25 +02:00
"""
Translate `text` into `lang`
"""
2018-10-02 23:56:11 +02:00
translated = TRANSLATIONS.get(lang, {}).get(text, text)
2020-04-26 19:36:47 +02:00
if text == translated:
print("%s: %s" % (lang, text))
2018-10-02 23:56:11 +02:00
return translated
def cyr(to_translate):
2018-10-23 20:04:25 +02:00
"""
Transliterate `to_translate` from latin into cyrillic
"""
2018-10-02 23:56:11 +02:00
return cyrtranslit.to_cyrillic(to_translate)
2018-10-23 20:04:25 +02:00
def _patch_greek(original):
2020-04-26 19:36:47 +02:00
return original.replace(u"Ηλιόλουστη/ο", u"Ηλιόλουστη")
2018-10-02 23:56:11 +02:00
def add_translations(content, lang):
2018-10-23 20:04:25 +02:00
"""
Add `lang` translation to `content` (JSON)
returned by the data source
"""
2020-05-01 17:49:41 +02:00
2020-05-24 19:25:45 +02:00
if content == "{}":
2020-05-01 17:49:41 +02:00
return {}
2018-10-02 23:56:11 +02:00
languages_to_translate = TRANSLATIONS.keys()
try:
2018-10-23 20:04:25 +02:00
d = json.loads(content) # pylint: disable=invalid-name
2020-05-01 17:49:41 +02:00
except (ValueError, TypeError) as exception:
print("---")
print(exception)
print("---")
2020-05-01 17:49:41 +02:00
return {}
2018-10-02 23:56:11 +02:00
try:
2020-05-20 14:14:33 +02:00
weather_condition = d['data']['current_condition'
][0]['weatherDesc'][0]['value'].capitalize()
d['data']['current_condition'][0]['weatherDesc'][0]['value'] = \
weather_condition
2018-10-02 23:56:11 +02:00
if lang in languages_to_translate:
2018-10-23 20:04:25 +02:00
d['data']['current_condition'][0]['lang_%s' % lang] = \
[{'value': translate(weather_condition, lang)}]
2018-10-02 23:56:11 +02:00
elif lang == 'sr':
2018-10-23 20:04:25 +02:00
d['data']['current_condition'][0]['lang_%s' % lang] = \
[{'value': cyr(
d['data']['current_condition'][0]['lang_%s' % lang][0]['value']\
2020-04-26 19:36:47 +02:00
)}]
2018-10-02 23:56:11 +02:00
elif lang == 'el':
2018-10-23 20:04:25 +02:00
d['data']['current_condition'][0]['lang_%s' % lang] = \
[{'value': _patch_greek(
d['data']['current_condition'][0]['lang_%s' % lang][0]['value']\
2020-04-26 19:36:47 +02:00
)}]
2018-10-02 23:56:11 +02:00
elif lang == 'sr-lat':
2018-10-23 20:04:25 +02:00
d['data']['current_condition'][0]['lang_%s' % lang] = \
[{'value':d['data']['current_condition'][0]['lang_sr'][0]['value']\
2020-04-26 19:36:47 +02:00
}]
2018-10-02 23:56:11 +02:00
fixed_weather = []
2018-10-23 20:04:25 +02:00
for w in d['data']['weather']: # pylint: disable=invalid-name
2018-10-02 23:56:11 +02:00
fixed_hourly = []
2018-10-23 20:04:25 +02:00
for h in w['hourly']: # pylint: disable=invalid-name
2018-10-02 23:56:11 +02:00
weather_condition = h['weatherDesc'][0]['value']
if lang in languages_to_translate:
2018-10-23 20:04:25 +02:00
h['lang_%s' % lang] = \
[{'value': translate(weather_condition, lang)}]
2018-10-02 23:56:11 +02:00
elif lang == 'sr':
2018-10-23 20:04:25 +02:00
h['lang_%s' % lang] = \
2020-04-26 19:36:47 +02:00
[{'value': cyr(h['lang_%s' % lang][0]['value'])}]
2018-10-02 23:56:11 +02:00
elif lang == 'el':
2018-10-23 20:04:25 +02:00
h['lang_%s' % lang] = \
2020-04-26 19:36:47 +02:00
[{'value': _patch_greek(h['lang_%s' % lang][0]['value'])}]
2018-10-02 23:56:11 +02:00
elif lang == 'sr-lat':
2018-10-23 20:04:25 +02:00
h['lang_%s' % lang] = \
2020-04-26 19:36:47 +02:00
[{'value': h['lang_sr'][0]['value']}]
2018-10-02 23:56:11 +02:00
fixed_hourly.append(h)
w['hourly'] = fixed_hourly
fixed_weather.append(w)
2018-10-23 20:04:25 +02:00
d['data']['weather'] = fixed_weather
2018-10-02 23:56:11 +02:00
content = json.dumps(d)
2018-10-23 20:04:25 +02:00
except (IndexError, ValueError) as exception:
print(exception)
2018-10-02 23:56:11 +02:00
return content
2020-10-08 23:09:58 +02:00
def _fetch_content_and_headers(path, query_string, **kwargs):
2018-10-23 20:04:25 +02:00
content, headers = _load_content_and_headers(path, query_string)
2018-10-02 23:56:11 +02:00
if content is None:
2018-10-23 20:04:25 +02:00
srv = _find_srv_for_query(path, query_string)
2018-10-02 23:56:11 +02:00
url = '%s/%s?%s' % (srv, path, query_string)
2019-08-28 13:58:36 +02:00
attempts = 10
2019-08-25 20:37:24 +02:00
response = None
2018-10-02 23:56:11 +02:00
while attempts:
2019-08-28 13:58:36 +02:00
try:
2020-10-08 23:09:58 +02:00
response = requests.get(url, timeout=2, **kwargs)
2019-08-28 13:58:36 +02:00
except requests.ReadTimeout:
attempts -= 1
continue
2018-10-02 23:56:11 +02:00
try:
2018-10-23 20:04:25 +02:00
json.loads(response.content)
2018-10-02 23:56:11 +02:00
break
2018-10-23 20:04:25 +02:00
except ValueError:
2018-10-02 23:56:11 +02:00
attempts -= 1
2020-05-01 16:07:23 +02:00
_touch_empty_file(path, query_string)
2019-08-25 20:37:24 +02:00
if response:
headers = {}
headers['Content-Type'] = response.headers['content-type']
2019-12-28 18:08:06 +01:00
_save_content_and_headers(path, query_string, response.content, headers)
2020-05-01 17:49:41 +02:00
content = response.content
2019-08-25 20:37:24 +02:00
else:
content = "{}"
2020-05-07 09:43:42 +02:00
else:
print("cache found")
2020-10-08 23:09:58 +02:00
return content, headers
@APP.route("/<path:path>")
def proxy(path):
"""
Main proxy function. Handles incoming HTTP queries.
"""
lang = request.args.get('lang', 'en')
query_string = request.query_string.decode("utf-8")
query_string = query_string.replace('sr-lat', 'sr')
query_string = query_string.replace('lang=None', 'lang=en')
content = ""
headers = ""
if _is_metno():
path, query, days = metno_request(path, query_string)
if USER_AGENT == '':
raise ValueError('User agent must be set to adhere to metno ToS: https://api.met.no/doc/TermsOfService')
content, headers = _fetch_content_and_headers(path, query, headers={
'User-Agent': USER_AGENT
})
content = create_standard_json_from_metno(content, days)
else:
# WWO tweaks
query_string += "&extra=localObsTime"
query_string += "&includelocation=yes"
content, headers = _fetch_content_and_headers(path, query)
2018-10-02 23:56:11 +02:00
2020-04-26 19:36:47 +02:00
content = add_translations(content, lang)
2018-10-02 23:56:11 +02:00
return content, 200, headers
if __name__ == "__main__":
#app.run(host='0.0.0.0', port=5001, debug=False)
#app.debug = True
2020-10-08 23:09:58 +02:00
if len(sys.argv) == 1:
bind_addr = "0.0.0.0"
SERVER = WSGIServer((bind_addr, PROXY_PORT), APP)
SERVER.serve_forever()
else:
print('running single request from command line arg')
APP.testing = True
with APP.test_client() as c:
resp = c.get(sys.argv[1])
print('Status: ' + resp.status)
# print('Headers: ' + dumps(resp.headers))
print(resp.data.decode('utf-8'))