From 762f48a59e454a64f398083f3867dc307eefb893 Mon Sep 17 00:00:00 2001 From: Ross Poulton Date: Thu, 25 Jun 2009 11:22:53 +0000 Subject: [PATCH] Resolves issue #62 - adds spam filtering via akismet.py using either Akismet or TypePad AntiSpam. See the README for configuration instructions. --- LICENSE.3RDPARTY | 41 ++- README | 37 ++- akismet.py | 372 ++++++++++++++++++++++++++++ lib.py | 43 +++- templates/helpdesk/public_spam.html | 13 + views/public.py | 18 +- 6 files changed, 510 insertions(+), 14 deletions(-) create mode 100644 akismet.py create mode 100644 templates/helpdesk/public_spam.html diff --git a/LICENSE.3RDPARTY b/LICENSE.3RDPARTY index d4bc3649..2051225f 100644 --- a/LICENSE.3RDPARTY +++ b/LICENSE.3RDPARTY @@ -3,6 +3,7 @@ distributed with Jutda Helpdesk. 1. License for jQuery & jQuery UI 2. License for jQuery UI 'Smoothness' theme +3. License for akismet.py ---------------------------------------------------------------------- @@ -31,7 +32,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ---------------------------------------------------------------------- -3. License for jQuery UI 'Smoothness' theme +2. License for jQuery UI 'Smoothness' theme /* * jQuery UI screen structure and presentation @@ -39,3 +40,41 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * Author: Scott Jehl, scott@filamentgroup.com, http://www.filamentgroup.com * Visit ThemeRoller.com */ + +---------------------------------------------------------------------- + +3. License for akismet.py + +Copyright (c) 2003-2009, Michael Foord +All rights reserved. +E-mail : fuzzyman AT voidspace DOT org DOT uk + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Michael Foord nor the name of Voidspace + may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/README b/README index 544a7cdb..814ec755 100644 --- a/README +++ b/README @@ -11,8 +11,9 @@ Jutda Helpdesk - A Django powered ticket tracker for small enterprise. 3. Upgrading from previous versions 4. Installation 5. Initial Configuration -6. API Usage -7. Thank You +6. Spam filtering +7. API Usage +8. Thank You ######################### 1. Licensing @@ -175,7 +176,35 @@ the current version of Jutda Helpdesk working. You're now up and running! ######################### -6. API Usage +7. Spam filtering +######################### + +Jutda Helpdesk includes a copy of `akismet.py' by Michael Foord, which lets +incoming ticket submissions be automatically checked against either the +Akismet or TypePad Anti-Spam services. + +To enable this functionality, sign up for an API key with one of the following +serviceS: + +Akismet: http://akismet.com/ +Save your API key in settings.py as AKISMET_API_KEY +Note: Akismet is only free for personal use. Paid commercial accounts are +available. + +TypePad AntiSpam: http://antispam.typepad.com/ +Save your API key in settings.py as TYPEPAD_ANTISPAM_API_KEY +This service is free to use, within their terms and conditions. + +If you have either of these settings enabled, the spam filtering will be +done automatically. If you have *both* settings configured, TypePad will +be used instead of Akismet. + +Example configuration in settings.py: + +TYPEPAD_ANTISPAM_API_KEY = 'abc123' + +######################### +7. API Usage ######################### Jutda Helpdesk includes an API accessible via HTTP POST requests, allowing @@ -185,7 +214,7 @@ For usage instructions and command syntax, see the file templates/helpdesk/api_help.html, or visit http://helpdesk/api/help/. ######################### -7. Thank You +8. Thank You ######################### While this started as a project to suit my own needs, since publishing the diff --git a/akismet.py b/akismet.py new file mode 100644 index 00000000..a924ff1e --- /dev/null +++ b/akismet.py @@ -0,0 +1,372 @@ +# Version 0.2.0 +# 2009/06/18 + +# Copyright Michael Foord 2005-2009 +# akismet.py +# Python interface to the akismet API +# E-mail fuzzyman@voidspace.org.uk + +# http://www.voidspace.org.uk/python/modules.shtml +# http://akismet.com + +# Released subject to the BSD License +# See http://www.voidspace.org.uk/python/license.shtml + + +""" +A python interface to the `Akismet `_ API. +This is a web service for blocking SPAM comments to blogs - or other online +services. + +You will need a Wordpress API key, from `wordpress.com `_. + +You should pass in the keyword argument 'agent' to the name of your program, +when you create an Akismet instance. This sets the ``user-agent`` to a useful +value. + +The default is : :: + + Python Interface by Fuzzyman | akismet.py/0.2.0 + +Whatever you pass in, will replace the *Python Interface by Fuzzyman* part. +**0.2.0** will change with the version of this interface. + +Usage example:: + + from akismet import Akismet + + api = Akismet(agent='Test Script') + # if apikey.txt is in place, + # the key will automatically be set + # or you can call api.setAPIKey() + # + if api.key is None: + print "No 'apikey.txt' file." + elif not api.verify_key(): + print "The API key is invalid." + else: + # data should be a dictionary of values + # They can all be filled in with defaults + # from a CGI environment + if api.comment_check(comment, data): + print 'This comment is spam.' + else: + print 'This comment is ham.' +""" + + +import os, sys +from urllib import urlencode + +import socket +if hasattr(socket, 'setdefaulttimeout'): + # Set the default timeout on sockets to 5 seconds + socket.setdefaulttimeout(5) + +__version__ = '0.2.0' + +__all__ = ( + '__version__', + 'Akismet', + 'AkismetError', + 'APIKeyError', + ) + +__author__ = 'Michael Foord ' + +__docformat__ = "restructuredtext en" + +user_agent = "%s | akismet.py/%s" +DEFAULTAGENT = 'Python Interface by Fuzzyman/%s' + +isfile = os.path.isfile + +urllib2 = None +try: + from google.appengine.api import urlfetch +except ImportError: + import urllib2 + +if urllib2 is None: + def _fetch_url(url, data, headers): + req = urlfetch.fetch(url=url, payload=data, method=urlfetch.POST, headers=headers) + if req.status_code == 200: + return req.content + raise Exception('Could not fetch Akismet URL: %s Response code: %s' % + (url, req.status_code)) +else: + def _fetch_url(url, data, headers): + req = urllib2.Request(url, data, headers) + h = urllib2.urlopen(req) + resp = h.read() + return resp + + +class AkismetError(Exception): + """Base class for all akismet exceptions.""" + +class APIKeyError(AkismetError): + """Invalid API key.""" + +class Akismet(object): + """A class for working with the akismet API""" + + baseurl = 'rest.akismet.com/1.1/' + + def __init__(self, key=None, blog_url=None, agent=None): + """Automatically calls ``setAPIKey``.""" + if agent is None: + agent = DEFAULTAGENT % __version__ + self.user_agent = user_agent % (agent, __version__) + self.setAPIKey(key, blog_url) + + + def _getURL(self): + """ + Fetch the url to make requests to. + + This comprises of api key plus the baseurl. + """ + return 'http://%s.%s' % (self.key, self.baseurl) + + + def _safeRequest(self, url, data, headers): + try: + resp = _fetch_url(url, data, headers) + except Exception, e: + raise AkismetError(str(e)) + return resp + + + def setAPIKey(self, key=None, blog_url=None): + """ + Set the wordpress API key for all transactions. + + If you don't specify an explicit API ``key`` and ``blog_url`` it will + attempt to load them from a file called ``apikey.txt`` in the current + directory. + + This method is *usually* called automatically when you create a new + ``Akismet`` instance. + """ + if key is None and isfile('apikey.txt'): + the_file = [l.strip() for l in open('apikey.txt').readlines() + if l.strip() and not l.strip().startswith('#')] + try: + self.key = the_file[0] + self.blog_url = the_file[1] + except IndexError: + raise APIKeyError("Your 'apikey.txt' is invalid.") + else: + self.key = key + self.blog_url = blog_url + + + def verify_key(self): + """ + This equates to the ``verify-key`` call against the akismet API. + + It returns ``True`` if the key is valid. + + The docs state that you *ought* to call this at the start of the + transaction. + + It raises ``APIKeyError`` if you have not yet set an API key. + + If the connection to akismet fails, it allows the normal ``HTTPError`` + or ``URLError`` to be raised. + (*akismet.py* uses `urllib2 `_) + """ + if self.key is None: + raise APIKeyError("Your have not set an API key.") + data = { 'key': self.key, 'blog': self.blog_url } + # this function *doesn't* use the key as part of the URL + url = 'http://%sverify-key' % self.baseurl + # we *don't* trap the error here + # so if akismet is down it will raise an HTTPError or URLError + headers = {'User-Agent' : self.user_agent} + resp = self._safeRequest(url, urlencode(data), headers) + if resp.lower() == 'valid': + return True + else: + return False + + def _build_data(self, comment, data): + """ + This function builds the data structure required by ``comment_check``, + ``submit_spam``, and ``submit_ham``. + + It modifies the ``data`` dictionary you give it in place. (and so + doesn't return anything) + + It raises an ``AkismetError`` if the user IP or user-agent can't be + worked out. + """ + data['comment_content'] = comment + if not 'user_ip' in data: + try: + val = os.environ['REMOTE_ADDR'] + except KeyError: + raise AkismetError("No 'user_ip' supplied") + data['user_ip'] = val + if not 'user_agent' in data: + try: + val = os.environ['HTTP_USER_AGENT'] + except KeyError: + raise AkismetError("No 'user_agent' supplied") + data['user_agent'] = val + # + data.setdefault('referrer', os.environ.get('HTTP_REFERER', 'unknown')) + data.setdefault('permalink', '') + data.setdefault('comment_type', 'comment') + data.setdefault('comment_author', '') + data.setdefault('comment_author_email', '') + data.setdefault('comment_author_url', '') + data.setdefault('SERVER_ADDR', os.environ.get('SERVER_ADDR', '')) + data.setdefault('SERVER_ADMIN', os.environ.get('SERVER_ADMIN', '')) + data.setdefault('SERVER_NAME', os.environ.get('SERVER_NAME', '')) + data.setdefault('SERVER_PORT', os.environ.get('SERVER_PORT', '')) + data.setdefault('SERVER_SIGNATURE', os.environ.get('SERVER_SIGNATURE', + '')) + data.setdefault('SERVER_SOFTWARE', os.environ.get('SERVER_SOFTWARE', + '')) + data.setdefault('HTTP_ACCEPT', os.environ.get('HTTP_ACCEPT', '')) + data.setdefault('blog', self.blog_url) + + + def comment_check(self, comment, data=None, build_data=True, DEBUG=False): + """ + This is the function that checks comments. + + It returns ``True`` for spam and ``False`` for ham. + + If you set ``DEBUG=True`` then it will return the text of the response, + instead of the ``True`` or ``False`` object. + + It raises ``APIKeyError`` if you have not yet set an API key. + + If the connection to Akismet fails then the ``HTTPError`` or + ``URLError`` will be propogated. + + As a minimum it requires the body of the comment. This is the + ``comment`` argument. + + Akismet requires some other arguments, and allows some optional ones. + The more information you give it, the more likely it is to be able to + make an accurate diagnosise. + + You supply these values using a mapping object (dictionary) as the + ``data`` argument. + + If ``build_data`` is ``True`` (the default), then *akismet.py* will + attempt to fill in as much information as possible, using default + values where necessary. This is particularly useful for programs + running in a {acro;CGI} environment. A lot of useful information + can be supplied from evironment variables (``os.environ``). See below. + + You *only* need supply values for which you don't want defaults filled + in for. All values must be strings. + + There are a few required values. If they are not supplied, and + defaults can't be worked out, then an ``AkismetError`` is raised. + + If you set ``build_data=False`` and a required value is missing an + ``AkismetError`` will also be raised. + + The normal values (and defaults) are as follows : :: + + 'user_ip': os.environ['REMOTE_ADDR'] (*) + 'user_agent': os.environ['HTTP_USER_AGENT'] (*) + 'referrer': os.environ.get('HTTP_REFERER', 'unknown') [#]_ + 'permalink': '' + 'comment_type': 'comment' [#]_ + 'comment_author': '' + 'comment_author_email': '' + 'comment_author_url': '' + 'SERVER_ADDR': os.environ.get('SERVER_ADDR', '') + 'SERVER_ADMIN': os.environ.get('SERVER_ADMIN', '') + 'SERVER_NAME': os.environ.get('SERVER_NAME', '') + 'SERVER_PORT': os.environ.get('SERVER_PORT', '') + 'SERVER_SIGNATURE': os.environ.get('SERVER_SIGNATURE', '') + 'SERVER_SOFTWARE': os.environ.get('SERVER_SOFTWARE', '') + 'HTTP_ACCEPT': os.environ.get('HTTP_ACCEPT', '') + + (*) Required values + + You may supply as many additional 'HTTP_*' type values as you wish. + These should correspond to the http headers sent with the request. + + .. [#] Note the spelling "referrer". This is a required value by the + akismet api - however, referrer information is not always + supplied by the browser or server. In fact the HTTP protocol + forbids relying on referrer information for functionality in + programs. + .. [#] The `API docs `_ state that this value + can be " *blank, comment, trackback, pingback, or a made up value* + *like 'registration'* ". + """ + if self.key is None: + raise APIKeyError("Your have not set an API key.") + if data is None: + data = {} + if build_data: + self._build_data(comment, data) + if 'blog' not in data: + data['blog'] = self.blog_url + url = '%scomment-check' % self._getURL() + # we *don't* trap the error here + # so if akismet is down it will raise an HTTPError or URLError + headers = {'User-Agent' : self.user_agent} + resp = self._safeRequest(url, urlencode(data), headers) + if DEBUG: + return resp + resp = resp.lower() + if resp == 'true': + return True + elif resp == 'false': + return False + else: + # NOTE: Happens when you get a 'howdy wilbur' response ! + raise AkismetError('missing required argument.') + + + def submit_spam(self, comment, data=None, build_data=True): + """ + This function is used to tell akismet that a comment it marked as ham, + is really spam. + + It takes all the same arguments as ``comment_check``, except for + *DEBUG*. + """ + if self.key is None: + raise APIKeyError("Your have not set an API key.") + if data is None: + data = {} + if build_data: + self._build_data(comment, data) + url = '%ssubmit-spam' % self._getURL() + # we *don't* trap the error here + # so if akismet is down it will raise an HTTPError or URLError + headers = {'User-Agent' : self.user_agent} + self._safeRequest(url, urlencode(data), headers) + + + def submit_ham(self, comment, data=None, build_data=True): + """ + This function is used to tell akismet that a comment it marked as spam, + is really ham. + + It takes all the same arguments as ``comment_check``, except for + *DEBUG*. + """ + if self.key is None: + raise APIKeyError("Your have not set an API key.") + if data is None: + data = {} + if build_data: + self._build_data(comment, data) + url = '%ssubmit-ham' % self._getURL() + # we *don't* trap the error here + # so if akismet is down it will raise an HTTPError or URLError + headers = {'User-Agent' : self.user_agent} + self._safeRequest(url, urlencode(data), headers) diff --git a/lib.py b/lib.py index 060f84b6..e37494d7 100644 --- a/lib.py +++ b/lib.py @@ -65,7 +65,7 @@ def send_templated_mail(template_name, email_context, recipients, sender=None, b t = EmailTemplate.objects.get(template_name__iexact=template_localized) except EmailTemplate.DoesNotExist: pass - + if not t: t = EmailTemplate.objects.get(template_name__iexact=template_name) @@ -296,9 +296,48 @@ def safe_template_context(ticket): context['ticket'][field] = '%s' % attr() else: context['ticket'][field] = attr - + context['ticket']['queue'] = context['queue'] context['ticket']['assigned_to'] = context['ticket']['_get_assigned_to'] return context + +def text_is_spam(text, request): + # Based on a blog post by 'sciyoshi': + # http://sciyoshi.com/blog/2008/aug/27/using-akismet-djangos-new-comments-framework/ + # This will return 'True' is the given text is deemed to be spam, or + # False if it is not spam. If it cannot be checked for some reason, we + # assume it isn't spam. + from django.contrib.sites.models import Site + from django.conf import settings + try: + from helpdesk.akismet import Akismet + except: + return False + + ak = Akismet( + blog_url='http://%s/' % Site.objects.get(pk=settings.SITE_ID).domain, + agent='Jutda Helpdesk', + ) + + if hasattr(settings, 'TYPEPAD_ANTISPAM_API_KEY'): + ak.setAPIKey(key = settings.TYPEPAD_ANTISPAM_API_KEY) + ak.baseurl = 'api.antispam.typepad.com/1.1/' + elif hasattr(settings, 'AKISMET_API_KEY'): + ak.setAPIKey(key = settings.AKISMET_API_KEY) + else: + return False + + if ak.verify_key(): + ak_data = { + 'user_ip': request.META.get('REMOTE_ADDR', '127.0.0.1'), + 'user_agent': request.META.get('HTTP_USER_AGENT', ''), + 'referrer': request.META.get('HTTP_REFERER', ''), + 'comment_type': 'comment', + 'comment_author': '', + } + + return ak.comment_check(text, data=ak_data) + + return False diff --git a/templates/helpdesk/public_spam.html b/templates/helpdesk/public_spam.html new file mode 100644 index 00000000..ed05434c --- /dev/null +++ b/templates/helpdesk/public_spam.html @@ -0,0 +1,13 @@ +{% extends "helpdesk/public_base.html" %}{% load i18n %} + +{% block helpdesk_body %} +

{% trans "Unable To Open Ticket" %}

+ +{% blocktrans %}

Sorry, but there has been an error trying to submit your ticket.

+ +

Our system has marked your submission as spam, so we are unable to save it. If this is not spam, please press back and re-type your message. Be careful to avoid sounding 'spammy', and if you have heaps of links please try removing them if possible.

+ +

We are sorry for any inconvenience, however this check is required to avoid our helpdesk resources being overloaded by spammers.

+{% endblocktrans %} + +{% endblock %} diff --git a/views/public.py b/views/public.py index abdeee92..7b32b82a 100644 --- a/views/public.py +++ b/views/public.py @@ -16,7 +16,7 @@ from django.template import loader, Context, RequestContext from django.utils.translation import ugettext as _ from helpdesk.forms import PublicTicketForm -from helpdesk.lib import send_templated_mail +from helpdesk.lib import send_templated_mail, text_is_spam from helpdesk.models import Ticket, Queue @@ -31,12 +31,16 @@ def homepage(request): form = PublicTicketForm(request.POST, request.FILES) form.fields['queue'].choices = [('', '--------')] + [[q.id, q.title] for q in Queue.objects.filter(allow_public_submission=True)] if form.is_valid(): - ticket = form.save() - return HttpResponseRedirect('%s?ticket=%s&email=%s'% ( - reverse('helpdesk_public_view'), - ticket.ticket_for_url, - ticket.submitter_email) - ) + if text_is_spam(form.cleaned_data['body'], request): + # This submission is spam. Let's not save it. + return render_to_response('helpdesk/public_spam.html', RequestContext(request, {})) + else: + ticket = form.save() + return HttpResponseRedirect('%s?ticket=%s&email=%s'% ( + reverse('helpdesk_public_view'), + ticket.ticket_for_url, + ticket.submitter_email) + ) else: try: queue = Queue.objects.get(slug=request.GET.get('queue', None))