2009-06-25 13:22:53 +02:00
|
|
|
# Version 0.2.0
|
|
|
|
# 2009/06/18
|
|
|
|
|
|
|
|
# Copyright Michael Foord 2005-2009
|
|
|
|
# akismet.py
|
|
|
|
# Python interface to the akismet API
|
|
|
|
# E-mail fuzzyman@voidspace.org.uk
|
|
|
|
|
|
|
|
# http://www.voidspace.org.uk/python/modules.shtml
|
|
|
|
# http://akismet.com
|
|
|
|
|
|
|
|
# Released subject to the BSD License
|
|
|
|
# See http://www.voidspace.org.uk/python/license.shtml
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
A python interface to the `Akismet <http://akismet.com>`_ API.
|
2016-10-24 10:05:41 +02:00
|
|
|
This is a web service for blocking SPAM comments to blogs - or other online
|
2009-06-25 13:22:53 +02:00
|
|
|
services.
|
|
|
|
|
|
|
|
You will need a Wordpress API key, from `wordpress.com <http://wordpress.com>`_.
|
|
|
|
|
|
|
|
You should pass in the keyword argument 'agent' to the name of your program,
|
|
|
|
when you create an Akismet instance. This sets the ``user-agent`` to a useful
|
|
|
|
value.
|
|
|
|
|
2016-10-26 10:22:15 +02:00
|
|
|
The default is::
|
2009-06-25 13:22:53 +02:00
|
|
|
|
|
|
|
Python Interface by Fuzzyman | akismet.py/0.2.0
|
|
|
|
|
|
|
|
Whatever you pass in, will replace the *Python Interface by Fuzzyman* part.
|
|
|
|
**0.2.0** will change with the version of this interface.
|
|
|
|
|
2016-10-26 10:22:15 +02:00
|
|
|
Usage example::
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
from akismet import Akismet
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
api = Akismet(agent='Test Script')
|
|
|
|
# if apikey.txt is in place,
|
|
|
|
# the key will automatically be set
|
|
|
|
# or you can call api.setAPIKey()
|
|
|
|
#
|
|
|
|
if api.key is None:
|
2011-09-01 13:01:03 +02:00
|
|
|
print >> sys.stderr, "No 'apikey.txt' file."
|
2009-06-25 13:22:53 +02:00
|
|
|
elif not api.verify_key():
|
2011-09-01 13:01:03 +02:00
|
|
|
print >> sys.stderr, "The API key is invalid."
|
2009-06-25 13:22:53 +02:00
|
|
|
else:
|
|
|
|
# data should be a dictionary of values
|
|
|
|
# They can all be filled in with defaults
|
|
|
|
# from a CGI environment
|
|
|
|
if api.comment_check(comment, data):
|
2011-09-01 13:01:03 +02:00
|
|
|
print >> sys.stderr, 'This comment is spam.'
|
2009-06-25 13:22:53 +02:00
|
|
|
else:
|
2011-09-01 13:01:03 +02:00
|
|
|
print >> sys.stderr, 'This comment is ham.'
|
2009-06-25 13:22:53 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
2016-10-21 17:14:12 +02:00
|
|
|
import os
|
2009-06-25 13:22:53 +02:00
|
|
|
from urllib import urlencode
|
|
|
|
|
|
|
|
import socket
|
|
|
|
if hasattr(socket, 'setdefaulttimeout'):
|
|
|
|
# Set the default timeout on sockets to 5 seconds
|
|
|
|
socket.setdefaulttimeout(5)
|
|
|
|
|
|
|
|
__version__ = '0.2.0'
|
|
|
|
|
|
|
|
__all__ = (
|
|
|
|
'__version__',
|
|
|
|
'Akismet',
|
|
|
|
'AkismetError',
|
|
|
|
'APIKeyError',
|
2016-10-24 10:05:41 +02:00
|
|
|
)
|
2009-06-25 13:22:53 +02:00
|
|
|
|
|
|
|
__author__ = 'Michael Foord <fuzzyman AT voidspace DOT org DOT uk>'
|
|
|
|
|
|
|
|
__docformat__ = "restructuredtext en"
|
|
|
|
|
|
|
|
user_agent = "%s | akismet.py/%s"
|
|
|
|
DEFAULTAGENT = 'Python Interface by Fuzzyman/%s'
|
|
|
|
|
|
|
|
isfile = os.path.isfile
|
|
|
|
|
|
|
|
urllib2 = None
|
|
|
|
try:
|
|
|
|
from google.appengine.api import urlfetch
|
|
|
|
except ImportError:
|
|
|
|
import urllib2
|
|
|
|
|
|
|
|
if urllib2 is None:
|
|
|
|
def _fetch_url(url, data, headers):
|
|
|
|
req = urlfetch.fetch(url=url, payload=data, method=urlfetch.POST, headers=headers)
|
|
|
|
if req.status_code == 200:
|
|
|
|
return req.content
|
2016-10-24 10:05:41 +02:00
|
|
|
raise Exception('Could not fetch Akismet URL: %s Response code: %s' %
|
2009-06-25 13:22:53 +02:00
|
|
|
(url, req.status_code))
|
|
|
|
else:
|
|
|
|
def _fetch_url(url, data, headers):
|
|
|
|
req = urllib2.Request(url, data, headers)
|
|
|
|
h = urllib2.urlopen(req)
|
|
|
|
resp = h.read()
|
|
|
|
return resp
|
|
|
|
|
|
|
|
|
|
|
|
class AkismetError(Exception):
|
|
|
|
"""Base class for all akismet exceptions."""
|
2016-10-21 17:14:12 +02:00
|
|
|
pass
|
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
|
|
|
|
class APIKeyError(AkismetError):
|
|
|
|
"""Invalid API key."""
|
2016-10-21 17:14:12 +02:00
|
|
|
pass
|
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
|
|
|
|
class Akismet(object):
|
|
|
|
"""A class for working with the akismet API"""
|
|
|
|
|
|
|
|
baseurl = 'rest.akismet.com/1.1/'
|
|
|
|
|
|
|
|
def __init__(self, key=None, blog_url=None, agent=None):
|
|
|
|
"""Automatically calls ``setAPIKey``."""
|
|
|
|
if agent is None:
|
|
|
|
agent = DEFAULTAGENT % __version__
|
|
|
|
self.user_agent = user_agent % (agent, __version__)
|
|
|
|
self.setAPIKey(key, blog_url)
|
|
|
|
|
|
|
|
def _getURL(self):
|
|
|
|
"""
|
|
|
|
Fetch the url to make requests to.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
This comprises of api key plus the baseurl.
|
|
|
|
"""
|
|
|
|
return 'http://%s.%s' % (self.key, self.baseurl)
|
2016-10-21 17:14:12 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
def _safeRequest(self, url, data, headers):
|
|
|
|
try:
|
|
|
|
resp = _fetch_url(url, data, headers)
|
2016-08-17 22:01:09 +02:00
|
|
|
except Exception as e:
|
2009-06-25 13:22:53 +02:00
|
|
|
raise AkismetError(str(e))
|
|
|
|
return resp
|
|
|
|
|
|
|
|
def setAPIKey(self, key=None, blog_url=None):
|
|
|
|
"""
|
|
|
|
Set the wordpress API key for all transactions.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
If you don't specify an explicit API ``key`` and ``blog_url`` it will
|
|
|
|
attempt to load them from a file called ``apikey.txt`` in the current
|
|
|
|
directory.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
This method is *usually* called automatically when you create a new
|
|
|
|
``Akismet`` instance.
|
|
|
|
"""
|
|
|
|
if key is None and isfile('apikey.txt'):
|
|
|
|
the_file = [l.strip() for l in open('apikey.txt').readlines()
|
2016-10-21 17:14:12 +02:00
|
|
|
if l.strip() and not l.strip().startswith('#')]
|
2009-06-25 13:22:53 +02:00
|
|
|
try:
|
|
|
|
self.key = the_file[0]
|
|
|
|
self.blog_url = the_file[1]
|
|
|
|
except IndexError:
|
|
|
|
raise APIKeyError("Your 'apikey.txt' is invalid.")
|
|
|
|
else:
|
|
|
|
self.key = key
|
|
|
|
self.blog_url = blog_url
|
|
|
|
|
|
|
|
def verify_key(self):
|
|
|
|
"""
|
|
|
|
This equates to the ``verify-key`` call against the akismet API.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
It returns ``True`` if the key is valid.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
The docs state that you *ought* to call this at the start of the
|
|
|
|
transaction.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
It raises ``APIKeyError`` if you have not yet set an API key.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
If the connection to akismet fails, it allows the normal ``HTTPError``
|
|
|
|
or ``URLError`` to be raised.
|
|
|
|
(*akismet.py* uses `urllib2 <http://docs.python.org/lib/module-urllib2.html>`_)
|
|
|
|
"""
|
|
|
|
if self.key is None:
|
|
|
|
raise APIKeyError("Your have not set an API key.")
|
2016-10-21 17:14:12 +02:00
|
|
|
data = {'key': self.key, 'blog': self.blog_url}
|
2009-06-25 13:22:53 +02:00
|
|
|
# this function *doesn't* use the key as part of the URL
|
|
|
|
url = 'http://%sverify-key' % self.baseurl
|
|
|
|
# we *don't* trap the error here
|
|
|
|
# so if akismet is down it will raise an HTTPError or URLError
|
2016-10-21 17:14:12 +02:00
|
|
|
headers = {'User-Agent': self.user_agent}
|
2009-06-25 13:22:53 +02:00
|
|
|
resp = self._safeRequest(url, urlencode(data), headers)
|
|
|
|
if resp.lower() == 'valid':
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
def _build_data(self, comment, data):
|
|
|
|
"""
|
|
|
|
This function builds the data structure required by ``comment_check``,
|
|
|
|
``submit_spam``, and ``submit_ham``.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
It modifies the ``data`` dictionary you give it in place. (and so
|
|
|
|
doesn't return anything)
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
It raises an ``AkismetError`` if the user IP or user-agent can't be
|
|
|
|
worked out.
|
|
|
|
"""
|
|
|
|
data['comment_content'] = comment
|
2016-10-24 10:05:41 +02:00
|
|
|
if 'user_ip' not in data:
|
2009-06-25 13:22:53 +02:00
|
|
|
try:
|
|
|
|
val = os.environ['REMOTE_ADDR']
|
|
|
|
except KeyError:
|
|
|
|
raise AkismetError("No 'user_ip' supplied")
|
|
|
|
data['user_ip'] = val
|
2016-10-24 10:05:41 +02:00
|
|
|
if 'user_agent' not in data:
|
2009-06-25 13:22:53 +02:00
|
|
|
try:
|
|
|
|
val = os.environ['HTTP_USER_AGENT']
|
|
|
|
except KeyError:
|
|
|
|
raise AkismetError("No 'user_agent' supplied")
|
|
|
|
data['user_agent'] = val
|
|
|
|
#
|
|
|
|
data.setdefault('referrer', os.environ.get('HTTP_REFERER', 'unknown'))
|
|
|
|
data.setdefault('permalink', '')
|
|
|
|
data.setdefault('comment_type', 'comment')
|
|
|
|
data.setdefault('comment_author', '')
|
|
|
|
data.setdefault('comment_author_email', '')
|
|
|
|
data.setdefault('comment_author_url', '')
|
|
|
|
data.setdefault('SERVER_ADDR', os.environ.get('SERVER_ADDR', ''))
|
|
|
|
data.setdefault('SERVER_ADMIN', os.environ.get('SERVER_ADMIN', ''))
|
|
|
|
data.setdefault('SERVER_NAME', os.environ.get('SERVER_NAME', ''))
|
|
|
|
data.setdefault('SERVER_PORT', os.environ.get('SERVER_PORT', ''))
|
2016-10-21 17:14:12 +02:00
|
|
|
data.setdefault('SERVER_SIGNATURE', os.environ.get('SERVER_SIGNATURE', ''))
|
|
|
|
data.setdefault('SERVER_SOFTWARE', os.environ.get('SERVER_SOFTWARE', ''))
|
2009-06-25 13:22:53 +02:00
|
|
|
data.setdefault('HTTP_ACCEPT', os.environ.get('HTTP_ACCEPT', ''))
|
|
|
|
data.setdefault('blog', self.blog_url)
|
|
|
|
|
|
|
|
def comment_check(self, comment, data=None, build_data=True, DEBUG=False):
|
|
|
|
"""
|
|
|
|
This is the function that checks comments.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
It returns ``True`` for spam and ``False`` for ham.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
If you set ``DEBUG=True`` then it will return the text of the response,
|
|
|
|
instead of the ``True`` or ``False`` object.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
It raises ``APIKeyError`` if you have not yet set an API key.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
If the connection to Akismet fails then the ``HTTPError`` or
|
|
|
|
``URLError`` will be propogated.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
As a minimum it requires the body of the comment. This is the
|
|
|
|
``comment`` argument.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
Akismet requires some other arguments, and allows some optional ones.
|
|
|
|
The more information you give it, the more likely it is to be able to
|
|
|
|
make an accurate diagnosise.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
You supply these values using a mapping object (dictionary) as the
|
|
|
|
``data`` argument.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
If ``build_data`` is ``True`` (the default), then *akismet.py* will
|
|
|
|
attempt to fill in as much information as possible, using default
|
|
|
|
values where necessary. This is particularly useful for programs
|
|
|
|
running in a {acro;CGI} environment. A lot of useful information
|
|
|
|
can be supplied from evironment variables (``os.environ``). See below.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
You *only* need supply values for which you don't want defaults filled
|
|
|
|
in for. All values must be strings.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
There are a few required values. If they are not supplied, and
|
|
|
|
defaults can't be worked out, then an ``AkismetError`` is raised.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
If you set ``build_data=False`` and a required value is missing an
|
|
|
|
``AkismetError`` will also be raised.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
The normal values (and defaults) are as follows : ::
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
'user_ip': os.environ['REMOTE_ADDR'] (*)
|
|
|
|
'user_agent': os.environ['HTTP_USER_AGENT'] (*)
|
|
|
|
'referrer': os.environ.get('HTTP_REFERER', 'unknown') [#]_
|
|
|
|
'permalink': ''
|
|
|
|
'comment_type': 'comment' [#]_
|
|
|
|
'comment_author': ''
|
|
|
|
'comment_author_email': ''
|
|
|
|
'comment_author_url': ''
|
|
|
|
'SERVER_ADDR': os.environ.get('SERVER_ADDR', '')
|
|
|
|
'SERVER_ADMIN': os.environ.get('SERVER_ADMIN', '')
|
|
|
|
'SERVER_NAME': os.environ.get('SERVER_NAME', '')
|
|
|
|
'SERVER_PORT': os.environ.get('SERVER_PORT', '')
|
|
|
|
'SERVER_SIGNATURE': os.environ.get('SERVER_SIGNATURE', '')
|
|
|
|
'SERVER_SOFTWARE': os.environ.get('SERVER_SOFTWARE', '')
|
|
|
|
'HTTP_ACCEPT': os.environ.get('HTTP_ACCEPT', '')
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
(*) Required values
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
You may supply as many additional 'HTTP_*' type values as you wish.
|
|
|
|
These should correspond to the http headers sent with the request.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
.. [#] Note the spelling "referrer". This is a required value by the
|
|
|
|
akismet api - however, referrer information is not always
|
|
|
|
supplied by the browser or server. In fact the HTTP protocol
|
2016-10-24 10:05:41 +02:00
|
|
|
forbids relying on referrer information for functionality in
|
2009-06-25 13:22:53 +02:00
|
|
|
programs.
|
|
|
|
.. [#] The `API docs <http://akismet.com/development/api/>`_ state that this value
|
|
|
|
can be " *blank, comment, trackback, pingback, or a made up value*
|
|
|
|
*like 'registration'* ".
|
|
|
|
"""
|
|
|
|
if self.key is None:
|
|
|
|
raise APIKeyError("Your have not set an API key.")
|
|
|
|
if data is None:
|
|
|
|
data = {}
|
|
|
|
if build_data:
|
|
|
|
self._build_data(comment, data)
|
|
|
|
if 'blog' not in data:
|
|
|
|
data['blog'] = self.blog_url
|
|
|
|
url = '%scomment-check' % self._getURL()
|
|
|
|
# we *don't* trap the error here
|
|
|
|
# so if akismet is down it will raise an HTTPError or URLError
|
2016-10-21 17:14:12 +02:00
|
|
|
headers = {'User-Agent': self.user_agent}
|
2009-06-25 13:22:53 +02:00
|
|
|
resp = self._safeRequest(url, urlencode(data), headers)
|
|
|
|
if DEBUG:
|
|
|
|
return resp
|
|
|
|
resp = resp.lower()
|
|
|
|
if resp == 'true':
|
|
|
|
return True
|
|
|
|
elif resp == 'false':
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
# NOTE: Happens when you get a 'howdy wilbur' response !
|
|
|
|
raise AkismetError('missing required argument.')
|
|
|
|
|
|
|
|
def submit_spam(self, comment, data=None, build_data=True):
|
|
|
|
"""
|
|
|
|
This function is used to tell akismet that a comment it marked as ham,
|
|
|
|
is really spam.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
It takes all the same arguments as ``comment_check``, except for
|
|
|
|
*DEBUG*.
|
|
|
|
"""
|
|
|
|
if self.key is None:
|
|
|
|
raise APIKeyError("Your have not set an API key.")
|
|
|
|
if data is None:
|
|
|
|
data = {}
|
|
|
|
if build_data:
|
|
|
|
self._build_data(comment, data)
|
|
|
|
url = '%ssubmit-spam' % self._getURL()
|
|
|
|
# we *don't* trap the error here
|
|
|
|
# so if akismet is down it will raise an HTTPError or URLError
|
2016-10-21 17:14:12 +02:00
|
|
|
headers = {'User-Agent': self.user_agent}
|
2009-06-25 13:22:53 +02:00
|
|
|
self._safeRequest(url, urlencode(data), headers)
|
|
|
|
|
|
|
|
def submit_ham(self, comment, data=None, build_data=True):
|
|
|
|
"""
|
|
|
|
This function is used to tell akismet that a comment it marked as spam,
|
|
|
|
is really ham.
|
2016-10-24 10:05:41 +02:00
|
|
|
|
2009-06-25 13:22:53 +02:00
|
|
|
It takes all the same arguments as ``comment_check``, except for
|
|
|
|
*DEBUG*.
|
|
|
|
"""
|
|
|
|
if self.key is None:
|
|
|
|
raise APIKeyError("Your have not set an API key.")
|
|
|
|
if data is None:
|
|
|
|
data = {}
|
|
|
|
if build_data:
|
|
|
|
self._build_data(comment, data)
|
|
|
|
url = '%ssubmit-ham' % self._getURL()
|
|
|
|
# we *don't* trap the error here
|
|
|
|
# so if akismet is down it will raise an HTTPError or URLError
|
2016-10-21 17:14:12 +02:00
|
|
|
headers = {'User-Agent': self.user_agent}
|
2009-06-25 13:22:53 +02:00
|
|
|
self._safeRequest(url, urlencode(data), headers)
|