django-helpdeskmig/helpdesk/akismet.py

368 lines
13 KiB
Python
Raw Normal View History

# Version 0.2.0
# 2009/06/18
# Copyright Michael Foord 2005-2009
# akismet.py
# Python interface to the akismet API
# E-mail fuzzyman@voidspace.org.uk
# http://www.voidspace.org.uk/python/modules.shtml
# http://akismet.com
# Released subject to the BSD License
# See http://www.voidspace.org.uk/python/license.shtml
"""
A python interface to the `Akismet <http://akismet.com>`_ API.
2016-10-24 10:05:41 +02:00
This is a web service for blocking SPAM comments to blogs - or other online
services.
You will need a Wordpress API key, from `wordpress.com <http://wordpress.com>`_.
You should pass in the keyword argument 'agent' to the name of your program,
when you create an Akismet instance. This sets the ``user-agent`` to a useful
value.
The default is::
Python Interface by Fuzzyman | akismet.py/0.2.0
Whatever you pass in, will replace the *Python Interface by Fuzzyman* part.
**0.2.0** will change with the version of this interface.
Usage example::
2016-10-24 10:05:41 +02:00
from akismet import Akismet
2016-10-24 10:05:41 +02:00
api = Akismet(agent='Test Script')
# if apikey.txt is in place,
# the key will automatically be set
# or you can call api.setAPIKey()
#
if api.key is None:
2011-09-01 13:01:03 +02:00
print >> sys.stderr, "No 'apikey.txt' file."
elif not api.verify_key():
2011-09-01 13:01:03 +02:00
print >> sys.stderr, "The API key is invalid."
else:
# data should be a dictionary of values
# They can all be filled in with defaults
# from a CGI environment
if api.comment_check(comment, data):
2011-09-01 13:01:03 +02:00
print >> sys.stderr, 'This comment is spam.'
else:
2011-09-01 13:01:03 +02:00
print >> sys.stderr, 'This comment is ham.'
"""
2016-10-21 17:14:12 +02:00
import os
from urllib import urlencode
import socket
if hasattr(socket, 'setdefaulttimeout'):
# Set the default timeout on sockets to 5 seconds
socket.setdefaulttimeout(5)
__version__ = '0.2.0'
__all__ = (
'__version__',
'Akismet',
'AkismetError',
'APIKeyError',
2016-10-24 10:05:41 +02:00
)
__author__ = 'Michael Foord <fuzzyman AT voidspace DOT org DOT uk>'
__docformat__ = "restructuredtext en"
user_agent = "%s | akismet.py/%s"
DEFAULTAGENT = 'Python Interface by Fuzzyman/%s'
isfile = os.path.isfile
urllib2 = None
try:
from google.appengine.api import urlfetch
except ImportError:
import urllib2
if urllib2 is None:
def _fetch_url(url, data, headers):
req = urlfetch.fetch(url=url, payload=data, method=urlfetch.POST, headers=headers)
if req.status_code == 200:
return req.content
2016-10-24 10:05:41 +02:00
raise Exception('Could not fetch Akismet URL: %s Response code: %s' %
(url, req.status_code))
else:
def _fetch_url(url, data, headers):
req = urllib2.Request(url, data, headers)
h = urllib2.urlopen(req)
resp = h.read()
return resp
class AkismetError(Exception):
"""Base class for all akismet exceptions."""
2016-10-21 17:14:12 +02:00
pass
class APIKeyError(AkismetError):
"""Invalid API key."""
2016-10-21 17:14:12 +02:00
pass
class Akismet(object):
"""A class for working with the akismet API"""
baseurl = 'rest.akismet.com/1.1/'
def __init__(self, key=None, blog_url=None, agent=None):
"""Automatically calls ``setAPIKey``."""
if agent is None:
agent = DEFAULTAGENT % __version__
self.user_agent = user_agent % (agent, __version__)
self.setAPIKey(key, blog_url)
def _getURL(self):
"""
Fetch the url to make requests to.
2016-10-24 10:05:41 +02:00
This comprises of api key plus the baseurl.
"""
return 'http://%s.%s' % (self.key, self.baseurl)
2016-10-21 17:14:12 +02:00
def _safeRequest(self, url, data, headers):
try:
resp = _fetch_url(url, data, headers)
except Exception as e:
raise AkismetError(str(e))
return resp
def setAPIKey(self, key=None, blog_url=None):
"""
Set the wordpress API key for all transactions.
2016-10-24 10:05:41 +02:00
If you don't specify an explicit API ``key`` and ``blog_url`` it will
attempt to load them from a file called ``apikey.txt`` in the current
directory.
2016-10-24 10:05:41 +02:00
This method is *usually* called automatically when you create a new
``Akismet`` instance.
"""
if key is None and isfile('apikey.txt'):
the_file = [l.strip() for l in open('apikey.txt').readlines()
2016-10-21 17:14:12 +02:00
if l.strip() and not l.strip().startswith('#')]
try:
self.key = the_file[0]
self.blog_url = the_file[1]
except IndexError:
raise APIKeyError("Your 'apikey.txt' is invalid.")
else:
self.key = key
self.blog_url = blog_url
def verify_key(self):
"""
This equates to the ``verify-key`` call against the akismet API.
2016-10-24 10:05:41 +02:00
It returns ``True`` if the key is valid.
2016-10-24 10:05:41 +02:00
The docs state that you *ought* to call this at the start of the
transaction.
2016-10-24 10:05:41 +02:00
It raises ``APIKeyError`` if you have not yet set an API key.
2016-10-24 10:05:41 +02:00
If the connection to akismet fails, it allows the normal ``HTTPError``
or ``URLError`` to be raised.
(*akismet.py* uses `urllib2 <http://docs.python.org/lib/module-urllib2.html>`_)
"""
if self.key is None:
raise APIKeyError("Your have not set an API key.")
2016-10-21 17:14:12 +02:00
data = {'key': self.key, 'blog': self.blog_url}
# this function *doesn't* use the key as part of the URL
url = 'http://%sverify-key' % self.baseurl
# we *don't* trap the error here
# so if akismet is down it will raise an HTTPError or URLError
2016-10-21 17:14:12 +02:00
headers = {'User-Agent': self.user_agent}
resp = self._safeRequest(url, urlencode(data), headers)
if resp.lower() == 'valid':
return True
else:
return False
def _build_data(self, comment, data):
"""
This function builds the data structure required by ``comment_check``,
``submit_spam``, and ``submit_ham``.
2016-10-24 10:05:41 +02:00
It modifies the ``data`` dictionary you give it in place. (and so
doesn't return anything)
2016-10-24 10:05:41 +02:00
It raises an ``AkismetError`` if the user IP or user-agent can't be
worked out.
"""
data['comment_content'] = comment
2016-10-24 10:05:41 +02:00
if 'user_ip' not in data:
try:
val = os.environ['REMOTE_ADDR']
except KeyError:
raise AkismetError("No 'user_ip' supplied")
data['user_ip'] = val
2016-10-24 10:05:41 +02:00
if 'user_agent' not in data:
try:
val = os.environ['HTTP_USER_AGENT']
except KeyError:
raise AkismetError("No 'user_agent' supplied")
data['user_agent'] = val
#
data.setdefault('referrer', os.environ.get('HTTP_REFERER', 'unknown'))
data.setdefault('permalink', '')
data.setdefault('comment_type', 'comment')
data.setdefault('comment_author', '')
data.setdefault('comment_author_email', '')
data.setdefault('comment_author_url', '')
data.setdefault('SERVER_ADDR', os.environ.get('SERVER_ADDR', ''))
data.setdefault('SERVER_ADMIN', os.environ.get('SERVER_ADMIN', ''))
data.setdefault('SERVER_NAME', os.environ.get('SERVER_NAME', ''))
data.setdefault('SERVER_PORT', os.environ.get('SERVER_PORT', ''))
2016-10-21 17:14:12 +02:00
data.setdefault('SERVER_SIGNATURE', os.environ.get('SERVER_SIGNATURE', ''))
data.setdefault('SERVER_SOFTWARE', os.environ.get('SERVER_SOFTWARE', ''))
data.setdefault('HTTP_ACCEPT', os.environ.get('HTTP_ACCEPT', ''))
data.setdefault('blog', self.blog_url)
def comment_check(self, comment, data=None, build_data=True, DEBUG=False):
"""
This is the function that checks comments.
2016-10-24 10:05:41 +02:00
It returns ``True`` for spam and ``False`` for ham.
2016-10-24 10:05:41 +02:00
If you set ``DEBUG=True`` then it will return the text of the response,
instead of the ``True`` or ``False`` object.
2016-10-24 10:05:41 +02:00
It raises ``APIKeyError`` if you have not yet set an API key.
2016-10-24 10:05:41 +02:00
If the connection to Akismet fails then the ``HTTPError`` or
``URLError`` will be propogated.
2016-10-24 10:05:41 +02:00
As a minimum it requires the body of the comment. This is the
``comment`` argument.
2016-10-24 10:05:41 +02:00
Akismet requires some other arguments, and allows some optional ones.
The more information you give it, the more likely it is to be able to
make an accurate diagnosise.
2016-10-24 10:05:41 +02:00
You supply these values using a mapping object (dictionary) as the
``data`` argument.
2016-10-24 10:05:41 +02:00
If ``build_data`` is ``True`` (the default), then *akismet.py* will
attempt to fill in as much information as possible, using default
values where necessary. This is particularly useful for programs
running in a {acro;CGI} environment. A lot of useful information
can be supplied from evironment variables (``os.environ``). See below.
2016-10-24 10:05:41 +02:00
You *only* need supply values for which you don't want defaults filled
in for. All values must be strings.
2016-10-24 10:05:41 +02:00
There are a few required values. If they are not supplied, and
defaults can't be worked out, then an ``AkismetError`` is raised.
2016-10-24 10:05:41 +02:00
If you set ``build_data=False`` and a required value is missing an
``AkismetError`` will also be raised.
2016-10-24 10:05:41 +02:00
The normal values (and defaults) are as follows : ::
2016-10-24 10:05:41 +02:00
'user_ip': os.environ['REMOTE_ADDR'] (*)
'user_agent': os.environ['HTTP_USER_AGENT'] (*)
'referrer': os.environ.get('HTTP_REFERER', 'unknown') [#]_
'permalink': ''
'comment_type': 'comment' [#]_
'comment_author': ''
'comment_author_email': ''
'comment_author_url': ''
'SERVER_ADDR': os.environ.get('SERVER_ADDR', '')
'SERVER_ADMIN': os.environ.get('SERVER_ADMIN', '')
'SERVER_NAME': os.environ.get('SERVER_NAME', '')
'SERVER_PORT': os.environ.get('SERVER_PORT', '')
'SERVER_SIGNATURE': os.environ.get('SERVER_SIGNATURE', '')
'SERVER_SOFTWARE': os.environ.get('SERVER_SOFTWARE', '')
'HTTP_ACCEPT': os.environ.get('HTTP_ACCEPT', '')
2016-10-24 10:05:41 +02:00
(*) Required values
2016-10-24 10:05:41 +02:00
You may supply as many additional 'HTTP_*' type values as you wish.
These should correspond to the http headers sent with the request.
2016-10-24 10:05:41 +02:00
.. [#] Note the spelling "referrer". This is a required value by the
akismet api - however, referrer information is not always
supplied by the browser or server. In fact the HTTP protocol
2016-10-24 10:05:41 +02:00
forbids relying on referrer information for functionality in
programs.
.. [#] The `API docs <http://akismet.com/development/api/>`_ state that this value
can be " *blank, comment, trackback, pingback, or a made up value*
*like 'registration'* ".
"""
if self.key is None:
raise APIKeyError("Your have not set an API key.")
if data is None:
data = {}
if build_data:
self._build_data(comment, data)
if 'blog' not in data:
data['blog'] = self.blog_url
url = '%scomment-check' % self._getURL()
# we *don't* trap the error here
# so if akismet is down it will raise an HTTPError or URLError
2016-10-21 17:14:12 +02:00
headers = {'User-Agent': self.user_agent}
resp = self._safeRequest(url, urlencode(data), headers)
if DEBUG:
return resp
resp = resp.lower()
if resp == 'true':
return True
elif resp == 'false':
return False
else:
# NOTE: Happens when you get a 'howdy wilbur' response !
raise AkismetError('missing required argument.')
def submit_spam(self, comment, data=None, build_data=True):
"""
This function is used to tell akismet that a comment it marked as ham,
is really spam.
2016-10-24 10:05:41 +02:00
It takes all the same arguments as ``comment_check``, except for
*DEBUG*.
"""
if self.key is None:
raise APIKeyError("Your have not set an API key.")
if data is None:
data = {}
if build_data:
self._build_data(comment, data)
url = '%ssubmit-spam' % self._getURL()
# we *don't* trap the error here
# so if akismet is down it will raise an HTTPError or URLError
2016-10-21 17:14:12 +02:00
headers = {'User-Agent': self.user_agent}
self._safeRequest(url, urlencode(data), headers)
def submit_ham(self, comment, data=None, build_data=True):
"""
This function is used to tell akismet that a comment it marked as spam,
is really ham.
2016-10-24 10:05:41 +02:00
It takes all the same arguments as ``comment_check``, except for
*DEBUG*.
"""
if self.key is None:
raise APIKeyError("Your have not set an API key.")
if data is None:
data = {}
if build_data:
self._build_data(comment, data)
url = '%ssubmit-ham' % self._getURL()
# we *don't* trap the error here
# so if akismet is down it will raise an HTTPError or URLError
2016-10-21 17:14:12 +02:00
headers = {'User-Agent': self.user_agent}
self._safeRequest(url, urlencode(data), headers)