From 514f63841ddab249040bbcaac01af59428661561 Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Thu, 3 Jan 2019 11:14:30 +0100 Subject: [PATCH 01/11] Trap internal AssertionError from python libraries For some really broken messages, we end up in a cannot-happen codepath. Trap this one and just consider that MIME part empty, and try again later. --- loader/lib/parser.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/loader/lib/parser.py b/loader/lib/parser.py index 8ee25c5..8c98076 100644 --- a/loader/lib/parser.py +++ b/loader/lib/parser.py @@ -124,7 +124,14 @@ def clean_charset(self, charset): return charset def get_payload_as_unicode(self, msg): - b = msg.get_payload(decode=True) + try: + b = msg.get_payload(decode=True) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore it and hope for a better MIME part later. + b = None + if b: # Find out if there is a charset charset = None From b0f5f31308a28ad9cd6cb613b9d506589ec86e7f Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Thu, 3 Jan 2019 11:19:33 +0100 Subject: [PATCH 02/11] Trap the decoding AssertionError in all codepaths --- loader/lib/parser.py | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/loader/lib/parser.py b/loader/lib/parser.py index 8c98076..b97c8b3 100644 --- a/loader/lib/parser.py +++ b/loader/lib/parser.py @@ -310,8 +310,15 @@ def recursive_get_attachments(self, container): return # For now, accept anything not text/plain if container.get_content_type() != 'text/plain': - self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + try: + self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return return + # It's a text/plain, it might be worthwhile. # If it has a name, we consider it an attachments if not container.get_params(): @@ -319,19 +326,42 @@ def recursive_get_attachments(self, container): for k,v in container.get_params(): if k=='name' and v != '': # Yes, it has a name - self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + try: + self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return + return + # If it's content-disposition=attachment, we also want to save it if 'Content-Disposition' in container and container['Content-Disposition'].startswith('attachment'): - self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + try: + self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return + return + # If we have already found one text/plain part, make all # further text/plain parts attachments if self.attachments_found_first_plaintext: # However, this will also *always* catch the MIME part added # by majordomo with the footer. So if that one is present, # we need to explicitly exclude it again. - b = container.get_payload(decode=True) + try: + b = container.get_payload(decode=True) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return + if isinstance(b, str) and not self._re_footer.match(b): # We know there is no name for this one self.attachments.append((None, container.get_content_type(), b)) From 0ea76dc4fe2cbeb8780990f3fb0dc5b54944d51f Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Thu, 3 Jan 2019 11:43:30 +0100 Subject: [PATCH 03/11] Continue loading after failures of parsing We continued in the mode where we just generated diffs, but not when making updates. Now continue in both cases, but of course don't do the actual update if the parsing failed. --- loader/reparse_message.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/loader/reparse_message.py b/loader/reparse_message.py index df4501a..ed4def2 100755 --- a/loader/reparse_message.py +++ b/loader/reparse_message.py @@ -102,8 +102,9 @@ def ResultIter(cursor): ap.analyze(date_override=opt.force_date) except IgnorableException as e: if opt.update: - raise e - f.write("Exception loading %s: %s" % (id, e)) + print("Exception loading {0}: {1}".format(id, e)) + else: + f.write("Exception loading %s: %s" % (id, e)) continue if opt.update: From 23cd37302b727f38e676d4969a1462f36d61b63b Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Thu, 3 Jan 2019 11:59:40 +0100 Subject: [PATCH 04/11] Update django app to python 3 --- django/archives/auth.py | 28 +++++++++---------- django/archives/mailarchives/api.py | 4 +-- .../mailarchives/templatetags/pgfilters.py | 2 +- django/archives/mailarchives/views.py | 20 ++++++------- django/archives/settings.py | 2 +- 5 files changed, 28 insertions(+), 28 deletions(-) diff --git a/django/archives/auth.py b/django/archives/auth.py index b3af1a5..c24faa1 100644 --- a/django/archives/auth.py +++ b/django/archives/auth.py @@ -28,8 +28,8 @@ import base64 import json import socket -import urlparse -import urllib +from urllib.parse import urlparse, urlencode, parse_qs +import urllib.request from Crypto.Cipher import AES from Crypto.Hash import SHA from Crypto import Random @@ -57,17 +57,17 @@ def login(request): # Put together an url-encoded dict of parameters we're getting back, # including a small nonce at the beginning to make sure it doesn't # encrypt the same way every time. - s = "t=%s&%s" % (int(time.time()), urllib.urlencode({'r': request.GET['next']})) + s = "t=%s&%s" % (int(time.time()), urlencode({'r': request.GET['next']})) # Now encrypt it r = Random.new() iv = r.read(16) - encryptor = AES.new(SHA.new(settings.SECRET_KEY).digest()[:16], AES.MODE_CBC, iv) + encryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], AES.MODE_CBC, iv) cipher = encryptor.encrypt(s + ' ' * (16-(len(s) % 16))) # pad to 16 bytes return HttpResponseRedirect("%s?d=%s$%s" % ( settings.PGAUTH_REDIRECT, - base64.b64encode(iv, "-_"), - base64.b64encode(cipher, "-_"), + base64.b64encode(iv, b"-_").decode('utf8'), + base64.b64encode(cipher, b"-_").decode('utf8'), )) else: return HttpResponseRedirect(settings.PGAUTH_REDIRECT) @@ -95,11 +95,11 @@ def auth_receive(request): decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY), AES.MODE_CBC, base64.b64decode(str(request.GET['i']), "-_")) - s = decryptor.decrypt(base64.b64decode(str(request.GET['d']), "-_")).rstrip(' ') + s = decryptor.decrypt(base64.b64decode(str(request.GET['d']), "-_")).rstrip(b' ').decode('utf8') # Now un-urlencode it try: - data = urlparse.parse_qs(s, strict_parsing=True) + data = parse_qs(s, strict_parsing=True) except ValueError: return HttpResponse("Invalid encrypted data received.", status=400) @@ -172,12 +172,12 @@ def auth_receive(request): # redirect the user. if 'd' in data: (ivs, datas) = data['d'][0].split('$') - decryptor = AES.new(SHA.new(settings.SECRET_KEY).digest()[:16], + decryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], AES.MODE_CBC, - base64.b64decode(ivs, "-_")) - s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(' ') + base64.b64decode(ivs, b"-_")) + s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(b' ').decode('utf8') try: - rdata = urlparse.parse_qs(s, strict_parsing=True) + rdata = parse_qs(s, strict_parsing=True) except ValueError: return HttpResponse("Invalid encrypted data received.", status=400) if 'r' in rdata: @@ -205,9 +205,9 @@ def user_search(searchterm=None, userid=None): else: q = {'s': searchterm} - u = urllib.urlopen('%ssearch/?%s' % ( + u = urllib.request.urlopen('%ssearch/?%s' % ( settings.PGAUTH_REDIRECT, - urllib.urlencode(q), + urlencode(q), )) (ivs, datas) = u.read().split('&') u.close() diff --git a/django/archives/mailarchives/api.py b/django/archives/mailarchives/api.py index 85eae8e..0ec94e1 100644 --- a/django/archives/mailarchives/api.py +++ b/django/archives/mailarchives/api.py @@ -2,8 +2,8 @@ from django.shortcuts import get_object_or_404 from django.conf import settings -from views import cache -from models import Message, List, ApiClient, ThreadSubscription +from .views import cache +from .models import Message, List, ApiClient, ThreadSubscription import json diff --git a/django/archives/mailarchives/templatetags/pgfilters.py b/django/archives/mailarchives/templatetags/pgfilters.py index 4f85e80..77d5fec 100644 --- a/django/archives/mailarchives/templatetags/pgfilters.py +++ b/django/archives/mailarchives/templatetags/pgfilters.py @@ -48,4 +48,4 @@ def nameonly(value): @register.filter(name='md5') @stringfilter def md5(value): - return hashlib.md5(value).hexdigest() + return hashlib.md5(value.encode('utf8')).hexdigest() diff --git a/django/archives/mailarchives/views.py b/django/archives/mailarchives/views.py index dc5c177..a5c87b3 100644 --- a/django/archives/mailarchives/views.py +++ b/django/archives/mailarchives/views.py @@ -10,20 +10,20 @@ from django.conf import settings import copy -import urllib import re import os import base64 from datetime import datetime, timedelta, date import calendar import email.parser -from StringIO import StringIO +import email.policy +from io import BytesIO import json -from redirecthandler import ERedirect +from .redirecthandler import ERedirect -from models import * +from .models import * # Ensure the user is logged in (if it's not public lists) def ensure_logged_in(request): @@ -117,7 +117,7 @@ def _antispam_auth(request, *_args, **_kwargs): if len(auth) != 2: return HttpResponseForbidden("Invalid authentication") if auth[0].lower() == "basic": - user, pwd = base64.b64decode(auth[1]).split(':') + user, pwd = base64.b64decode(auth[1]).decode('utf8', errors='ignore').split(':') if user == 'archives' and pwd == 'antispam': # Actually run the function if auth is correct resp = fn(request, *_args, **_kwargs) @@ -156,7 +156,7 @@ def get_all_groups_and_lists(request, listid=None): 'homelink': 'list/group/%s' % l.group.groupid, } - return (sorted(groups.values(), key=lambda g: g['sortkey']), listgroupid) + return (sorted(list(groups.values()), key=lambda g: g['sortkey']), listgroupid) class NavContext(object): @@ -395,7 +395,7 @@ def _get_nextprevious(listmap, dt): WHERE m.date<%(time)s AND lt.listid=l.listid ORDER BY m.date DESC LIMIT 1 ) FROM l""", { - 'lists': listmap.keys(), + 'lists': list(listmap.keys()), 'time': dt, }) retval = {} @@ -525,8 +525,8 @@ def _build_mbox(query, params, msgid=None): def _one_message(raw): # Parse as a message to generate headers - s = StringIO(raw) - parser = email.parser.Parser() + s = BytesIO(raw) + parser = email.parser.BytesParser(policy=email.policy.compat32) msg = parser.parse(s) return msg.as_string(unixfrom=True) @@ -603,7 +603,7 @@ def search(request): # q = query to search for # ln = comma separate list of listnames to search in # d = number of days back to search for, or -1 (or not specified) - # to search the full archives + # to search the full archives # s = sort results by ['r'=rank, 'd'=date, 'i'=inverse date] if not request.method == 'POST': raise Http404('I only respond to POST') diff --git a/django/archives/settings.py b/django/archives/settings.py index 4fe7e96..6a42a26 100644 --- a/django/archives/settings.py +++ b/django/archives/settings.py @@ -159,7 +159,7 @@ PUBLIC_ARCHIVES = False try: - from settings_local import * + from .settings_local import * except ImportError: pass From 65b4db1eb5d535b2d8ba82de4fec3aafce16a76a Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Thu, 3 Jan 2019 21:15:38 +0100 Subject: [PATCH 05/11] Tabs to 4 spaces pep8 standard for indentation --- django/archives/auth.py | 320 ++--- django/archives/mailarchives/api.py | 212 +-- django/archives/mailarchives/models.py | 200 +-- .../archives/mailarchives/redirecthandler.py | 10 +- .../mailarchives/templatetags/pgfilters.py | 24 +- django/archives/mailarchives/views.py | 1154 ++++++++--------- django/archives/settings.py | 58 +- django/archives/util.py | 60 +- loader/clean_date.py | 100 +- loader/generate_mbox.py | 164 +-- loader/hide_message.py | 158 +-- loader/legacy/scan_old_archives.py | 32 +- loader/lib/exception.py | 2 +- loader/lib/log.py | 44 +- loader/lib/mbox.py | 68 +- loader/lib/parser.py | 1102 ++++++++-------- loader/lib/storage.py | 556 ++++---- loader/lib/varnish.py | 48 +- loader/load_message.py | 314 ++--- loader/pglister_sync.py | 164 +-- loader/purge_frontend_message.py | 64 +- loader/reparse_message.py | 246 ++-- loader/tools/edit_raw.py | 138 +- loader/tools/fix_from.py | 200 +-- 24 files changed, 2719 insertions(+), 2719 deletions(-) diff --git a/django/archives/auth.py b/django/archives/auth.py index c24faa1..fd93790 100644 --- a/django/archives/auth.py +++ b/django/archives/auth.py @@ -36,10 +36,10 @@ import time class AuthBackend(ModelBackend): - # We declare a fake backend that always fails direct authentication - - # since we should never be using direct authentication in the first place! - def authenticate(self, username=None, password=None): - raise Exception("Direct authentication not supported") + # We declare a fake backend that always fails direct authentication - + # since we should never be using direct authentication in the first place! + def authenticate(self, username=None, password=None): + raise Exception("Direct authentication not supported") #### @@ -48,90 +48,90 @@ def authenticate(self, username=None, password=None): # Handle login requests by sending them off to the main site def login(request): - if not hasattr(settings, 'PGAUTH_REDIRECT'): - # No pgauth installed, so allow local installs. - from django.contrib.auth.views import login - return login(request, template_name='admin.html') - - if 'next' in request.GET: - # Put together an url-encoded dict of parameters we're getting back, - # including a small nonce at the beginning to make sure it doesn't - # encrypt the same way every time. - s = "t=%s&%s" % (int(time.time()), urlencode({'r': request.GET['next']})) - # Now encrypt it - r = Random.new() - iv = r.read(16) - encryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], AES.MODE_CBC, iv) - cipher = encryptor.encrypt(s + ' ' * (16-(len(s) % 16))) # pad to 16 bytes - - return HttpResponseRedirect("%s?d=%s$%s" % ( - settings.PGAUTH_REDIRECT, - base64.b64encode(iv, b"-_").decode('utf8'), - base64.b64encode(cipher, b"-_").decode('utf8'), - )) - else: - return HttpResponseRedirect(settings.PGAUTH_REDIRECT) + if not hasattr(settings, 'PGAUTH_REDIRECT'): + # No pgauth installed, so allow local installs. + from django.contrib.auth.views import login + return login(request, template_name='admin.html') + + if 'next' in request.GET: + # Put together an url-encoded dict of parameters we're getting back, + # including a small nonce at the beginning to make sure it doesn't + # encrypt the same way every time. + s = "t=%s&%s" % (int(time.time()), urlencode({'r': request.GET['next']})) + # Now encrypt it + r = Random.new() + iv = r.read(16) + encryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], AES.MODE_CBC, iv) + cipher = encryptor.encrypt(s + ' ' * (16-(len(s) % 16))) # pad to 16 bytes + + return HttpResponseRedirect("%s?d=%s$%s" % ( + settings.PGAUTH_REDIRECT, + base64.b64encode(iv, b"-_").decode('utf8'), + base64.b64encode(cipher, b"-_").decode('utf8'), + )) + else: + return HttpResponseRedirect(settings.PGAUTH_REDIRECT) # Handle logout requests by logging out of this site and then # redirecting to log out from the main site as well. def logout(request): - if request.user.is_authenticated(): - django_logout(request) - return HttpResponseRedirect("%slogout/" % settings.PGAUTH_REDIRECT) + if request.user.is_authenticated(): + django_logout(request) + return HttpResponseRedirect("%slogout/" % settings.PGAUTH_REDIRECT) # Receive an authentication response from the main website and try # to log the user in. def auth_receive(request): - if 's' in request.GET and request.GET['s'] == "logout": - # This was a logout request - return HttpResponseRedirect('/') - - if 'i' not in request.GET: - return HttpResponse("Missing IV in url!", status=400) - if 'd' not in request.GET: - return HttpResponse("Missing data in url!", status=400) - - # Set up an AES object and decrypt the data we received - decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY), - AES.MODE_CBC, - base64.b64decode(str(request.GET['i']), "-_")) - s = decryptor.decrypt(base64.b64decode(str(request.GET['d']), "-_")).rstrip(b' ').decode('utf8') - - # Now un-urlencode it - try: - data = parse_qs(s, strict_parsing=True) - except ValueError: - return HttpResponse("Invalid encrypted data received.", status=400) - - # Check the timestamp in the authentication - if (int(data['t'][0]) < time.time() - 10): - return HttpResponse("Authentication token too old.", status=400) - - # Update the user record (if any) - try: - user = User.objects.get(username=data['u'][0]) - # User found, let's see if any important fields have changed - changed = False - if user.first_name != data['f'][0]: - user.first_name = data['f'][0] - changed = True - if user.last_name != data['l'][0]: - user.last_name = data['l'][0] - changed = True - if user.email != data['e'][0]: - user.email = data['e'][0] - changed= True - if changed: - user.save() - except User.DoesNotExist: - # User not found, create it! - - # NOTE! We have some legacy users where there is a user in - # the database with a different userid. Instead of trying to - # somehow fix that live, give a proper error message and - # have somebody look at it manually. - if User.objects.filter(email=data['e'][0]).exists(): - return HttpResponse("""A user with email %s already exists, but with + if 's' in request.GET and request.GET['s'] == "logout": + # This was a logout request + return HttpResponseRedirect('/') + + if 'i' not in request.GET: + return HttpResponse("Missing IV in url!", status=400) + if 'd' not in request.GET: + return HttpResponse("Missing data in url!", status=400) + + # Set up an AES object and decrypt the data we received + decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY), + AES.MODE_CBC, + base64.b64decode(str(request.GET['i']), "-_")) + s = decryptor.decrypt(base64.b64decode(str(request.GET['d']), "-_")).rstrip(b' ').decode('utf8') + + # Now un-urlencode it + try: + data = parse_qs(s, strict_parsing=True) + except ValueError: + return HttpResponse("Invalid encrypted data received.", status=400) + + # Check the timestamp in the authentication + if (int(data['t'][0]) < time.time() - 10): + return HttpResponse("Authentication token too old.", status=400) + + # Update the user record (if any) + try: + user = User.objects.get(username=data['u'][0]) + # User found, let's see if any important fields have changed + changed = False + if user.first_name != data['f'][0]: + user.first_name = data['f'][0] + changed = True + if user.last_name != data['l'][0]: + user.last_name = data['l'][0] + changed = True + if user.email != data['e'][0]: + user.email = data['e'][0] + changed= True + if changed: + user.save() + except User.DoesNotExist: + # User not found, create it! + + # NOTE! We have some legacy users where there is a user in + # the database with a different userid. Instead of trying to + # somehow fix that live, give a proper error message and + # have somebody look at it manually. + if User.objects.filter(email=data['e'][0]).exists(): + return HttpResponse("""A user with email %s already exists, but with a different username than %s. This is almost certainly caused by some legacy data in our database. @@ -142,51 +142,51 @@ def auth_receive(request): We apologize for the inconvenience. """ % (data['e'][0], data['u'][0]), content_type='text/plain') - if hasattr(settings, 'PGAUTH_CREATEUSER_CALLBACK'): - res = getattr(settings, 'PGAUTH_CREATEUSER_CALLBACK')( - data['u'][0], - data['e'][0], - ['f'][0], - data['l'][0], - ) - # If anything is returned, we'll return that as our result. - # If None is returned, it means go ahead and create the user. - if res: - return res - - user = User(username=data['u'][0], - first_name=data['f'][0], - last_name=data['l'][0], - email=data['e'][0], - password='setbypluginnotasha1', - ) - user.save() - - # Ok, we have a proper user record. Now tell django that - # we're authenticated so it persists it in the session. Before - # we do that, we have to annotate it with the backend information. - user.backend = "%s.%s" % (AuthBackend.__module__, AuthBackend.__name__) - django_login(request, user) - - # Finally, check of we have a data package that tells us where to - # redirect the user. - if 'd' in data: - (ivs, datas) = data['d'][0].split('$') - decryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], - AES.MODE_CBC, - base64.b64decode(ivs, b"-_")) - s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(b' ').decode('utf8') - try: - rdata = parse_qs(s, strict_parsing=True) - except ValueError: - return HttpResponse("Invalid encrypted data received.", status=400) - if 'r' in rdata: - # Redirect address - return HttpResponseRedirect(rdata['r'][0]) - # No redirect specified, see if we have it in our settings - if hasattr(settings, 'PGAUTH_REDIRECT_SUCCESS'): - return HttpResponseRedirect(settings.PGAUTH_REDIRECT_SUCCESS) - return HttpResponse("Authentication successful, but don't know where to redirect!", status=500) + if hasattr(settings, 'PGAUTH_CREATEUSER_CALLBACK'): + res = getattr(settings, 'PGAUTH_CREATEUSER_CALLBACK')( + data['u'][0], + data['e'][0], + ['f'][0], + data['l'][0], + ) + # If anything is returned, we'll return that as our result. + # If None is returned, it means go ahead and create the user. + if res: + return res + + user = User(username=data['u'][0], + first_name=data['f'][0], + last_name=data['l'][0], + email=data['e'][0], + password='setbypluginnotasha1', + ) + user.save() + + # Ok, we have a proper user record. Now tell django that + # we're authenticated so it persists it in the session. Before + # we do that, we have to annotate it with the backend information. + user.backend = "%s.%s" % (AuthBackend.__module__, AuthBackend.__name__) + django_login(request, user) + + # Finally, check of we have a data package that tells us where to + # redirect the user. + if 'd' in data: + (ivs, datas) = data['d'][0].split('$') + decryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], + AES.MODE_CBC, + base64.b64decode(ivs, b"-_")) + s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(b' ').decode('utf8') + try: + rdata = parse_qs(s, strict_parsing=True) + except ValueError: + return HttpResponse("Invalid encrypted data received.", status=400) + if 'r' in rdata: + # Redirect address + return HttpResponseRedirect(rdata['r'][0]) + # No redirect specified, see if we have it in our settings + if hasattr(settings, 'PGAUTH_REDIRECT_SUCCESS'): + return HttpResponseRedirect(settings.PGAUTH_REDIRECT_SUCCESS) + return HttpResponse("Authentication successful, but don't know where to redirect!", status=500) # Perform a search in the central system. Note that the results are returned as an @@ -197,29 +197,29 @@ def auth_receive(request): # Unlike the authentication, searching does not involve the browser - we just make # a direct http call. def user_search(searchterm=None, userid=None): - # If upsteam isn't responding quickly, it's not going to respond at all, and - # 10 seconds is already quite long. - socket.setdefaulttimeout(10) - if userid: - q = {'u': userid} - else: - q = {'s': searchterm} - - u = urllib.request.urlopen('%ssearch/?%s' % ( - settings.PGAUTH_REDIRECT, - urlencode(q), - )) - (ivs, datas) = u.read().split('&') - u.close() - - # Decryption time - decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY), - AES.MODE_CBC, - base64.b64decode(ivs, "-_")) - s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(' ') - j = json.loads(s) - - return j + # If upsteam isn't responding quickly, it's not going to respond at all, and + # 10 seconds is already quite long. + socket.setdefaulttimeout(10) + if userid: + q = {'u': userid} + else: + q = {'s': searchterm} + + u = urllib.request.urlopen('%ssearch/?%s' % ( + settings.PGAUTH_REDIRECT, + urlencode(q), + )) + (ivs, datas) = u.read().split('&') + u.close() + + # Decryption time + decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY), + AES.MODE_CBC, + base64.b64decode(ivs, "-_")) + s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(' ') + j = json.loads(s) + + return j # Import a user into the local authentication system. Will initially # make a search for it, and if anything other than one entry is returned @@ -230,18 +230,18 @@ def user_search(searchterm=None, userid=None): # The call to this function should normally be wrapped in a transaction, # and this function itself will make no attempt to do anything about that. def user_import(uid): - u = user_search(userid=uid) - if len(u) != 1: - raise Exception("Internal error, duplicate or no user found") + u = user_search(userid=uid) + if len(u) != 1: + raise Exception("Internal error, duplicate or no user found") - u = u[0] + u = u[0] - if User.objects.filter(username=u['u']).exists(): - raise Exception("User already exists") + if User.objects.filter(username=u['u']).exists(): + raise Exception("User already exists") - User(username=u['u'], - first_name=u['f'], - last_name=u['l'], - email=u['e'], - password='setbypluginnotsha1', - ).save() + User(username=u['u'], + first_name=u['f'], + last_name=u['l'], + email=u['e'], + password='setbypluginnotsha1', + ).save() diff --git a/django/archives/mailarchives/api.py b/django/archives/mailarchives/api.py index 0ec94e1..e8e0891 100644 --- a/django/archives/mailarchives/api.py +++ b/django/archives/mailarchives/api.py @@ -10,126 +10,126 @@ @cache(hours=4) def listinfo(request): - if not settings.PUBLIC_ARCHIVES: - return HttpResponseForbidden('No API access on private archives for now') + if not settings.PUBLIC_ARCHIVES: + return HttpResponseForbidden('No API access on private archives for now') - if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: - return HttpResponseForbidden('Invalid host') + if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: + return HttpResponseForbidden('Invalid host') - resp = HttpResponse(content_type='application/json') - json.dump([{ - 'name': l.listname, - 'shortdesc': l.shortdesc, - 'description': l.description, - 'active': l.active, - 'group': l.group.groupname, - } for l in List.objects.select_related('group').all()], resp) + resp = HttpResponse(content_type='application/json') + json.dump([{ + 'name': l.listname, + 'shortdesc': l.shortdesc, + 'description': l.description, + 'active': l.active, + 'group': l.group.groupname, + } for l in List.objects.select_related('group').all()], resp) - return resp + return resp @cache(hours=4) def latest(request, listname): - if not settings.PUBLIC_ARCHIVES: - return HttpResponseForbidden('No API access on private archives for now') - - if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: - return HttpResponseForbidden('Invalid host') - - # Return the latest messages on this list. - # If is not specified, return 50. Max value for is 100. - if 'n' in request.GET: - try: - limit = int(request.GET['n']) - except: - limit = 0 - else: - limit = 50 - if limit <= 0 or limit > 100: - limit = 50 - - extrawhere=[] - extraparams=[] - - # Return only messages that have attachments? - if 'a' in request.GET: - if request.GET['a'] == '1': - extrawhere.append("has_attachment") - - # Restrict by full text search - if 's' in request.GET and request.GET['s']: - extrawhere.append("fti @@ plainto_tsquery('public.pg', %s)") - extraparams.append(request.GET['s']) - - if listname != '*': - list = get_object_or_404(List, listname=listname) - extrawhere.append("threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % list.listid) - else: - list = None - extrawhere='' - - mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().extra(where=extrawhere, params=extraparams).order_by('-date')[:limit] - allyearmonths = set([(m.date.year, m.date.month) for m in mlist]) - - resp = HttpResponse(content_type='application/json') - json.dump([ - {'msgid': m.messageid, - 'date': m.date.isoformat(), - 'from': m.mailfrom, - 'subj': m.subject,} - for m in mlist], resp) - - # Make sure this expires from the varnish cache when new entries show - # up in this month. - # XXX: need to deal with the global view, but for now API callers come in directly - if list: - resp['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (list.listid, year, month) for year, month in allyearmonths])) - return resp + if not settings.PUBLIC_ARCHIVES: + return HttpResponseForbidden('No API access on private archives for now') + + if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: + return HttpResponseForbidden('Invalid host') + + # Return the latest messages on this list. + # If is not specified, return 50. Max value for is 100. + if 'n' in request.GET: + try: + limit = int(request.GET['n']) + except: + limit = 0 + else: + limit = 50 + if limit <= 0 or limit > 100: + limit = 50 + + extrawhere=[] + extraparams=[] + + # Return only messages that have attachments? + if 'a' in request.GET: + if request.GET['a'] == '1': + extrawhere.append("has_attachment") + + # Restrict by full text search + if 's' in request.GET and request.GET['s']: + extrawhere.append("fti @@ plainto_tsquery('public.pg', %s)") + extraparams.append(request.GET['s']) + + if listname != '*': + list = get_object_or_404(List, listname=listname) + extrawhere.append("threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % list.listid) + else: + list = None + extrawhere='' + + mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().extra(where=extrawhere, params=extraparams).order_by('-date')[:limit] + allyearmonths = set([(m.date.year, m.date.month) for m in mlist]) + + resp = HttpResponse(content_type='application/json') + json.dump([ + {'msgid': m.messageid, + 'date': m.date.isoformat(), + 'from': m.mailfrom, + 'subj': m.subject,} + for m in mlist], resp) + + # Make sure this expires from the varnish cache when new entries show + # up in this month. + # XXX: need to deal with the global view, but for now API callers come in directly + if list: + resp['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (list.listid, year, month) for year, month in allyearmonths])) + return resp @cache(hours=4) def thread(request, msgid): - if not settings.PUBLIC_ARCHIVES: - return HttpResponseForbidden('No API access on private archives for now') - - if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: - return HttpResponseForbidden('Invalid host') - - # Return metadata about a single thread. A list of all the emails - # that are in the thread with their basic attributes are included. - msg = get_object_or_404(Message, messageid=msgid) - mlist = Message.objects.defer('bodytxt', 'cc', 'to').filter(threadid=msg.threadid) - - resp = HttpResponse(content_type='application/json') - json.dump([ - {'msgid': m.messageid, - 'date': m.date.isoformat(), - 'from': m.mailfrom, - 'subj': m.subject, - 'atts': [{'id': a.id, 'name': a.filename} for a in m.attachment_set.all()], - } - for m in mlist], resp) - resp['X-pgthread'] = m.threadid - return resp + if not settings.PUBLIC_ARCHIVES: + return HttpResponseForbidden('No API access on private archives for now') + + if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: + return HttpResponseForbidden('Invalid host') + + # Return metadata about a single thread. A list of all the emails + # that are in the thread with their basic attributes are included. + msg = get_object_or_404(Message, messageid=msgid) + mlist = Message.objects.defer('bodytxt', 'cc', 'to').filter(threadid=msg.threadid) + + resp = HttpResponse(content_type='application/json') + json.dump([ + {'msgid': m.messageid, + 'date': m.date.isoformat(), + 'from': m.mailfrom, + 'subj': m.subject, + 'atts': [{'id': a.id, 'name': a.filename} for a in m.attachment_set.all()], + } + for m in mlist], resp) + resp['X-pgthread'] = m.threadid + return resp def thread_subscribe(request, msgid): - if not settings.PUBLIC_ARCHIVES: - return HttpResponseForbidden('No API access on private archives for now') + if not settings.PUBLIC_ARCHIVES: + return HttpResponseForbidden('No API access on private archives for now') - if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: - return HttpResponseForbidden('Invalid host') + if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: + return HttpResponseForbidden('Invalid host') - if 'HTTP_X_APIKEY' not in request.META: - return HttpResponseForbidden('No API key') + if 'HTTP_X_APIKEY' not in request.META: + return HttpResponseForbidden('No API key') - if request.method != 'PUT': - return HttpResponseForbidden('Invalid HTTP verb') + if request.method != 'PUT': + return HttpResponseForbidden('Invalid HTTP verb') - apiclient = get_object_or_404(ApiClient, apikey=request.META['HTTP_X_APIKEY']) - msg = get_object_or_404(Message, messageid=msgid) + apiclient = get_object_or_404(ApiClient, apikey=request.META['HTTP_X_APIKEY']) + msg = get_object_or_404(Message, messageid=msgid) - (obj, created) = ThreadSubscription.objects.get_or_create(apiclient=apiclient, - threadid=msg.threadid) - if created: - return HttpResponse(status=201) - else: - return HttpResponse(status=200) + (obj, created) = ThreadSubscription.objects.get_or_create(apiclient=apiclient, + threadid=msg.threadid) + if created: + return HttpResponse(status=201) + else: + return HttpResponse(status=200) diff --git a/django/archives/mailarchives/models.py b/django/archives/mailarchives/models.py index 6270974..ca8e88f 100644 --- a/django/archives/mailarchives/models.py +++ b/django/archives/mailarchives/models.py @@ -4,128 +4,128 @@ # We're intentionally putting the prefix text in the array here, since # we might need that flexibility in the future. hide_reasons = [ - None, # placeholder for 0 - 'This message has been hidden because a virus was found in the message.', # 1 - 'This message has been hidden because the message violated policies.', # 2 - 'This message has been hidden because for privacy reasons.', # 3 - 'This message was corrupt', # 4 - ] + None, # placeholder for 0 + 'This message has been hidden because a virus was found in the message.', # 1 + 'This message has been hidden because the message violated policies.', # 2 + 'This message has been hidden because for privacy reasons.', # 3 + 'This message was corrupt', # 4 + ] class Message(models.Model): - threadid = models.IntegerField(null=False, blank=False) - mailfrom = models.TextField(null=False, db_column='_from') - to = models.TextField(null=False, db_column='_to') - cc = models.TextField(null=False) - subject = models.TextField(null=False) - date = models.DateTimeField(null=False) - messageid = models.TextField(null=False) - bodytxt = models.TextField(null=False) - # rawtxt is a bytea field, which django doesn't support (easily) - parentid = models.IntegerField(null=False, blank=False) - has_attachment = models.BooleanField(null=False, default=False) - hiddenstatus = models.IntegerField(null=True) - # fti is a tsvector field, which django doesn't support (easily) - - class Meta: - db_table = 'messages' - - @property - def printdate(self): - return self.date.strftime("%Y-%m-%d %H:%M:%S") - - @property - def shortdate(self): - return self.date.strftime("%Y%m%d%H%M") - - # We explicitly cache the attachments here, so we can use them - # multiple times from templates without generating multiple queries - # to the database. - _attachments = None - @property - def attachments(self): - if not self._attachments: - self._attachments = self.attachment_set.extra(select={'len': 'length(attachment)'}).all() - return self._attachments - - @property - def hiddenreason(self): - if not self.hiddenstatus: return None - try: - return hide_reasons[self.hiddenstatus] - except: - # Weird value - return 'This message has been hidden.' + threadid = models.IntegerField(null=False, blank=False) + mailfrom = models.TextField(null=False, db_column='_from') + to = models.TextField(null=False, db_column='_to') + cc = models.TextField(null=False) + subject = models.TextField(null=False) + date = models.DateTimeField(null=False) + messageid = models.TextField(null=False) + bodytxt = models.TextField(null=False) + # rawtxt is a bytea field, which django doesn't support (easily) + parentid = models.IntegerField(null=False, blank=False) + has_attachment = models.BooleanField(null=False, default=False) + hiddenstatus = models.IntegerField(null=True) + # fti is a tsvector field, which django doesn't support (easily) + + class Meta: + db_table = 'messages' + + @property + def printdate(self): + return self.date.strftime("%Y-%m-%d %H:%M:%S") + + @property + def shortdate(self): + return self.date.strftime("%Y%m%d%H%M") + + # We explicitly cache the attachments here, so we can use them + # multiple times from templates without generating multiple queries + # to the database. + _attachments = None + @property + def attachments(self): + if not self._attachments: + self._attachments = self.attachment_set.extra(select={'len': 'length(attachment)'}).all() + return self._attachments + + @property + def hiddenreason(self): + if not self.hiddenstatus: return None + try: + return hide_reasons[self.hiddenstatus] + except: + # Weird value + return 'This message has been hidden.' class ListGroup(models.Model): - groupid = models.IntegerField(null=False, primary_key=True) - groupname = models.CharField(max_length=200, null=False, blank=False) - sortkey = models.IntegerField(null=False) + groupid = models.IntegerField(null=False, primary_key=True) + groupname = models.CharField(max_length=200, null=False, blank=False) + sortkey = models.IntegerField(null=False) - class Meta: - db_table = 'listgroups' + class Meta: + db_table = 'listgroups' class List(models.Model): - listid = models.IntegerField(null=False, primary_key=True) - listname = models.CharField(max_length=200, null=False, blank=False, unique=True) - shortdesc = models.TextField(null=False, blank=False) - description = models.TextField(null=False, blank=False) - active = models.BooleanField(null=False, blank=False) - group = models.ForeignKey(ListGroup, db_column='groupid') - subscriber_access = models.BooleanField(null=False, blank=False, default=False, help_text="Subscribers can access contents (default is admins only)") + listid = models.IntegerField(null=False, primary_key=True) + listname = models.CharField(max_length=200, null=False, blank=False, unique=True) + shortdesc = models.TextField(null=False, blank=False) + description = models.TextField(null=False, blank=False) + active = models.BooleanField(null=False, blank=False) + group = models.ForeignKey(ListGroup, db_column='groupid') + subscriber_access = models.BooleanField(null=False, blank=False, default=False, help_text="Subscribers can access contents (default is admins only)") - @property - def maybe_shortdesc(self): - if self.shortdesc: - return self.shortdesc - return self.listname + @property + def maybe_shortdesc(self): + if self.shortdesc: + return self.shortdesc + return self.listname - class Meta: - db_table = 'lists' + class Meta: + db_table = 'lists' class Attachment(models.Model): - message = models.ForeignKey(Message, null=False, blank=False, db_column='message') - filename = models.CharField(max_length=1000, null=False, blank=False) - contenttype = models.CharField(max_length=1000, null=False, blank=False) - # attachment = bytea, not supported by django at this point + message = models.ForeignKey(Message, null=False, blank=False, db_column='message') + filename = models.CharField(max_length=1000, null=False, blank=False) + contenttype = models.CharField(max_length=1000, null=False, blank=False) + # attachment = bytea, not supported by django at this point - class Meta: - db_table = 'attachments' - # Predictable same-as-insert order - ordering = ('id',) + class Meta: + db_table = 'attachments' + # Predictable same-as-insert order + ordering = ('id',) - def inlineable(self): - # Return True if this image should be inlined - if self.contenttype in ('image/png', 'image/gif', 'image/jpg', 'image/jpeg'): - # Note! len needs to be set with extra(select=) - if self.len < 75000: - return True - return False + def inlineable(self): + # Return True if this image should be inlined + if self.contenttype in ('image/png', 'image/gif', 'image/jpg', 'image/jpeg'): + # Note! len needs to be set with extra(select=) + if self.len < 75000: + return True + return False class ListSubscriber(models.Model): - # Only used when public access is not allowed. - # We set the username of the community account instead of a - # foreign key, because the user might not exist. - list = models.ForeignKey(List, null=False, blank=False) - username = models.CharField(max_length=30, null=False, blank=False) + # Only used when public access is not allowed. + # We set the username of the community account instead of a + # foreign key, because the user might not exist. + list = models.ForeignKey(List, null=False, blank=False) + username = models.CharField(max_length=30, null=False, blank=False) - class Meta: - unique_together = (('list', 'username'), ) - db_table = 'listsubscribers' + class Meta: + unique_together = (('list', 'username'), ) + db_table = 'listsubscribers' class ApiClient(models.Model): - apikey = models.CharField(max_length=100, null=False, blank=False) - postback = models.URLField(max_length=500, null=False, blank=False) + apikey = models.CharField(max_length=100, null=False, blank=False) + postback = models.URLField(max_length=500, null=False, blank=False) - class Meta: - db_table = 'apiclients' + class Meta: + db_table = 'apiclients' class ThreadSubscription(models.Model): - apiclient = models.ForeignKey(ApiClient, null=False, blank=False) - threadid = models.IntegerField(null=False, blank=False) + apiclient = models.ForeignKey(ApiClient, null=False, blank=False) + threadid = models.IntegerField(null=False, blank=False) - class Meta: - db_table = 'threadsubscriptions' - unique_together = (('apiclient', 'threadid'),) + class Meta: + db_table = 'threadsubscriptions' + unique_together = (('apiclient', 'threadid'),) diff --git a/django/archives/mailarchives/redirecthandler.py b/django/archives/mailarchives/redirecthandler.py index 030b43f..f91a773 100644 --- a/django/archives/mailarchives/redirecthandler.py +++ b/django/archives/mailarchives/redirecthandler.py @@ -1,10 +1,10 @@ from django import shortcuts class ERedirect(Exception): - def __init__(self, url): - self.url = url + def __init__(self, url): + self.url = url class RedirectMiddleware(object): - def process_exception(self, request, exception): - if isinstance(exception, ERedirect): - return shortcuts.redirect(exception.url) + def process_exception(self, request, exception): + if isinstance(exception, ERedirect): + return shortcuts.redirect(exception.url) diff --git a/django/archives/mailarchives/templatetags/pgfilters.py b/django/archives/mailarchives/templatetags/pgfilters.py index 77d5fec..c7d121b 100644 --- a/django/archives/mailarchives/templatetags/pgfilters.py +++ b/django/archives/mailarchives/templatetags/pgfilters.py @@ -8,12 +8,12 @@ register = template.Library() def _rewrite_email(value): - return value.replace('@', '(at)').replace('.','(dot)') + return value.replace('@', '(at)').replace('.','(dot)') @register.filter(name='hidemail') @stringfilter def hidemail(value): - return _rewrite_email(value) + return _rewrite_email(value) # A regular expression and replacement function to mangle email addresses. # @@ -27,25 +27,25 @@ def hidemail(value): # are mangled. _re_mail = re.compile('(/m(essage-id)?/)?[^()<>@,;:\/\s"\'&|]+@[^()<>@,;:\/\s"\'&|]+') def _rewrite_email_match(match): - if match.group(1): - return match.group(0) # was preceded by /message-id/ - else: - return _rewrite_email(match.group(0)) + if match.group(1): + return match.group(0) # was preceded by /message-id/ + else: + return _rewrite_email(match.group(0)) @register.filter(name='hideallemail') @stringfilter def hideallemail(value): - return _re_mail.sub(lambda x: _rewrite_email_match(x), value) + return _re_mail.sub(lambda x: _rewrite_email_match(x), value) @register.filter(name='nameonly') @stringfilter def nameonly(value): - (name, email) = parseaddr(value) - if name: - return name - return email.split('@')[0] + (name, email) = parseaddr(value) + if name: + return name + return email.split('@')[0] @register.filter(name='md5') @stringfilter def md5(value): - return hashlib.md5(value.encode('utf8')).hexdigest() + return hashlib.md5(value.encode('utf8')).hexdigest() diff --git a/django/archives/mailarchives/views.py b/django/archives/mailarchives/views.py index a5c87b3..c82ea04 100644 --- a/django/archives/mailarchives/views.py +++ b/django/archives/mailarchives/views.py @@ -27,40 +27,40 @@ # Ensure the user is logged in (if it's not public lists) def ensure_logged_in(request): - if settings.PUBLIC_ARCHIVES: - return - if hasattr(request, 'user') and request.user.is_authenticated(): - return - raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) + if settings.PUBLIC_ARCHIVES: + return + if hasattr(request, 'user') and request.user.is_authenticated(): + return + raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) # Ensure the user has permissions to access a list. If not, raise # a permissions exception. def ensure_list_permissions(request, l): - if settings.PUBLIC_ARCHIVES: - return - if hasattr(request, 'user') and request.user.is_authenticated(): - if request.user.is_superuser: - return - if l.subscriber_access and ListSubscriber.objects.filter(list=l, username=request.user.username).exists(): - return - # Logged in but no access - raise PermissionDenied("Access denied.") - - # Redirect to a login page - raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) + if settings.PUBLIC_ARCHIVES: + return + if hasattr(request, 'user') and request.user.is_authenticated(): + if request.user.is_superuser: + return + if l.subscriber_access and ListSubscriber.objects.filter(list=l, username=request.user.username).exists(): + return + # Logged in but no access + raise PermissionDenied("Access denied.") + + # Redirect to a login page + raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) # Ensure the user has permissions to access a message. In order to view # a message, the user must have permissions on *all* lists the thread # appears on. def ensure_message_permissions(request, msgid): - if settings.PUBLIC_ARCHIVES: - return - if hasattr(request, 'user') and request.user.is_authenticated(): - if request.user.is_superuser: - return - - curs = connection.cursor() - curs.execute("""SELECT EXISTS ( + if settings.PUBLIC_ARCHIVES: + return + if hasattr(request, 'user') and request.user.is_authenticated(): + if request.user.is_superuser: + return + + curs = connection.cursor() + curs.execute("""SELECT EXISTS ( SELECT 1 FROM list_threads INNER JOIN messages ON messages.threadid=list_threads.threadid WHERE messages.messageid=%(msgid)s @@ -70,302 +70,302 @@ def ensure_message_permissions(request, msgid): AND listsubscribers.username=%(username)s ) )""", { - 'msgid': msgid, - 'username': request.user.username, - }) - if not curs.fetchone()[0]: - # This thread is not on any list that the user does not have permissions on. - return + 'msgid': msgid, + 'username': request.user.username, + }) + if not curs.fetchone()[0]: + # This thread is not on any list that the user does not have permissions on. + return - # Logged in but no access - raise PermissionDenied("Access denied.") + # Logged in but no access + raise PermissionDenied("Access denied.") - # Redirect to a login page - raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) + # Redirect to a login page + raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) # Decorator to set cache age def cache(days=0, hours=0, minutes=0, seconds=0): - "Set the server to cache object a specified time. td must be a timedelta object" - def _cache(fn): - def __cache(request, *_args, **_kwargs): - resp = fn(request, *_args, **_kwargs) - if settings.PUBLIC_ARCHIVES: - # Only set cache headers on public archives - td = timedelta(hours=hours, minutes=minutes, seconds=seconds) - resp['Cache-Control'] = 's-maxage=%s' % (td.days*3600*24 + td.seconds) - return resp - return __cache - return _cache + "Set the server to cache object a specified time. td must be a timedelta object" + def _cache(fn): + def __cache(request, *_args, **_kwargs): + resp = fn(request, *_args, **_kwargs) + if settings.PUBLIC_ARCHIVES: + # Only set cache headers on public archives + td = timedelta(hours=hours, minutes=minutes, seconds=seconds) + resp['Cache-Control'] = 's-maxage=%s' % (td.days*3600*24 + td.seconds) + return resp + return __cache + return _cache def nocache(fn): - def _nocache(request, *_args, **_kwargs): - resp = fn(request, *_args, **_kwargs) - if settings.PUBLIC_ARCHIVES: - # Only set cache headers on public archives - resp['Cache-Control'] = 's-maxage=0' - return resp - return _nocache + def _nocache(request, *_args, **_kwargs): + resp = fn(request, *_args, **_kwargs) + if settings.PUBLIC_ARCHIVES: + # Only set cache headers on public archives + resp['Cache-Control'] = 's-maxage=0' + return resp + return _nocache # Decorator to require http auth def antispam_auth(fn): - def _antispam_auth(request, *_args, **_kwargs): - if not settings.PUBLIC_ARCHIVES: - return fn(request, *_args, **_kwargs) - - if 'HTTP_AUTHORIZATION' in request.META: - auth = request.META['HTTP_AUTHORIZATION'].split() - if len(auth) != 2: - return HttpResponseForbidden("Invalid authentication") - if auth[0].lower() == "basic": - user, pwd = base64.b64decode(auth[1]).decode('utf8', errors='ignore').split(':') - if user == 'archives' and pwd == 'antispam': - # Actually run the function if auth is correct - resp = fn(request, *_args, **_kwargs) - return resp - # Require authentication - response = HttpResponse() - response.status_code = 401 - response['WWW-Authenticate'] = 'Basic realm="Please authenticate with user archives and password antispam"' - return response - - return _antispam_auth + def _antispam_auth(request, *_args, **_kwargs): + if not settings.PUBLIC_ARCHIVES: + return fn(request, *_args, **_kwargs) + + if 'HTTP_AUTHORIZATION' in request.META: + auth = request.META['HTTP_AUTHORIZATION'].split() + if len(auth) != 2: + return HttpResponseForbidden("Invalid authentication") + if auth[0].lower() == "basic": + user, pwd = base64.b64decode(auth[1]).decode('utf8', errors='ignore').split(':') + if user == 'archives' and pwd == 'antispam': + # Actually run the function if auth is correct + resp = fn(request, *_args, **_kwargs) + return resp + # Require authentication + response = HttpResponse() + response.status_code = 401 + response['WWW-Authenticate'] = 'Basic realm="Please authenticate with user archives and password antispam"' + return response + + return _antispam_auth def get_all_groups_and_lists(request, listid=None): - # Django doesn't (yet) support traversing the reverse relationship, - # so we'll get all the lists and rebuild it backwards. - if settings.PUBLIC_ARCHIVES or request.user.is_superuser: - lists = List.objects.select_related('group').all().order_by('listname') - else: - lists = List.objects.select_related('group').filter(subscriber_access=True, listsubscriber__username=request.user.username).order_by('listname') - listgroupid = None - groups = {} - for l in lists: - if l.listid == listid: - listgroupid = l.group.groupid - - if l.group.groupid in groups: - groups[l.group.groupid]['lists'].append(l) - else: - groups[l.group.groupid] = { - 'groupid': l.group.groupid, - 'groupname': l.group.groupname, - 'sortkey': l.group.sortkey, - 'lists': [l,], - 'homelink': 'list/group/%s' % l.group.groupid, - } - - return (sorted(list(groups.values()), key=lambda g: g['sortkey']), listgroupid) + # Django doesn't (yet) support traversing the reverse relationship, + # so we'll get all the lists and rebuild it backwards. + if settings.PUBLIC_ARCHIVES or request.user.is_superuser: + lists = List.objects.select_related('group').all().order_by('listname') + else: + lists = List.objects.select_related('group').filter(subscriber_access=True, listsubscriber__username=request.user.username).order_by('listname') + listgroupid = None + groups = {} + for l in lists: + if l.listid == listid: + listgroupid = l.group.groupid + + if l.group.groupid in groups: + groups[l.group.groupid]['lists'].append(l) + else: + groups[l.group.groupid] = { + 'groupid': l.group.groupid, + 'groupname': l.group.groupname, + 'sortkey': l.group.sortkey, + 'lists': [l,], + 'homelink': 'list/group/%s' % l.group.groupid, + } + + return (sorted(list(groups.values()), key=lambda g: g['sortkey']), listgroupid) class NavContext(object): - def __init__(self, request, listid=None, listname=None, all_groups=None, expand_groupid=None): - self.request = request - self.ctx = {} - - if all_groups: - groups = copy.deepcopy(all_groups) - if expand_groupid: - listgroupid = int(expand_groupid) - else: - (groups, listgroupid) = get_all_groups_and_lists(request, listid) - - for g in groups: - # On the root page, remove *all* entries - # On other lists, remove the entries in all groups other than our - # own. - if (not listid and not expand_groupid) or listgroupid != g['groupid']: - # Root page, so remove *all* entries - g['lists'] = [] - - self.ctx.update({'listgroups': groups}) - if listname: - self.ctx.update({'searchform_listname': listname}) + def __init__(self, request, listid=None, listname=None, all_groups=None, expand_groupid=None): + self.request = request + self.ctx = {} + + if all_groups: + groups = copy.deepcopy(all_groups) + if expand_groupid: + listgroupid = int(expand_groupid) + else: + (groups, listgroupid) = get_all_groups_and_lists(request, listid) + + for g in groups: + # On the root page, remove *all* entries + # On other lists, remove the entries in all groups other than our + # own. + if (not listid and not expand_groupid) or listgroupid != g['groupid']: + # Root page, so remove *all* entries + g['lists'] = [] + + self.ctx.update({'listgroups': groups}) + if listname: + self.ctx.update({'searchform_listname': listname}) def render_nav(navcontext, template, ctx): - ctx.update(navcontext.ctx) - return render(navcontext.request, template, ctx) + ctx.update(navcontext.ctx) + return render(navcontext.request, template, ctx) @cache(hours=4) def index(request): - ensure_logged_in(request) + ensure_logged_in(request) - (groups, listgroupid) = get_all_groups_and_lists(request) - return render_nav(NavContext(request, all_groups=groups), 'index.html', { - 'groups': [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups], - }) + (groups, listgroupid) = get_all_groups_and_lists(request) + return render_nav(NavContext(request, all_groups=groups), 'index.html', { + 'groups': [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups], + }) @cache(hours=8) def groupindex(request, groupid): - (groups, listgroupid) = get_all_groups_and_lists(request) - mygroups = [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups if g['groupid']==int(groupid)] - if len(mygroups) == 0: - raise Http404('List group does not exist') + (groups, listgroupid) = get_all_groups_and_lists(request) + mygroups = [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups if g['groupid']==int(groupid)] + if len(mygroups) == 0: + raise Http404('List group does not exist') - return render_nav(NavContext(request, all_groups=groups, expand_groupid=groupid), 'index.html', { - 'groups': mygroups, - }) + return render_nav(NavContext(request, all_groups=groups, expand_groupid=groupid), 'index.html', { + 'groups': mygroups, + }) @cache(hours=8) def monthlist(request, listname): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) - curs = connection.cursor() - curs.execute("SELECT year, month FROM list_months WHERE listid=%(listid)s ORDER BY year DESC, month DESC", {'listid': l.listid}) - months=[{'year':r[0],'month':r[1], 'date':datetime(r[0],r[1],1)} for r in curs.fetchall()] + curs = connection.cursor() + curs.execute("SELECT year, month FROM list_months WHERE listid=%(listid)s ORDER BY year DESC, month DESC", {'listid': l.listid}) + months=[{'year':r[0],'month':r[1], 'date':datetime(r[0],r[1],1)} for r in curs.fetchall()] - return render_nav(NavContext(request, l.listid, l.listname), 'monthlist.html', { - 'list': l, - 'months': months, - }) + return render_nav(NavContext(request, l.listid, l.listname), 'monthlist.html', { + 'list': l, + 'months': months, + }) def get_monthday_info(mlist, l, d): - allmonths = set([m.date.month for m in mlist]) - monthdate = None - daysinmonth = None - if len(allmonths) == 1: - # All hits are from one month, so generate month links - monthdate = mlist[0].date - elif len(allmonths) == 0: - # No hits at all, so generate month links from the specified date - monthdate = d - - if monthdate: - curs = connection.cursor() - curs.execute("SELECT DISTINCT extract(day FROM date) FROM messages WHERE date >= %(startdate)s AND date < %(enddate)s AND threadid IN (SELECT threadid FROM list_threads WHERE listid=%(listid)s) ORDER BY 1", { - 'startdate': datetime(year=monthdate.year, month=monthdate.month, day=1), - 'enddate': monthdate + timedelta(days=calendar.monthrange(monthdate.year, monthdate.month)[1]), - 'listid': l.listid, - }) - daysinmonth = [int(r[0]) for r in curs.fetchall()] - - yearmonth = None - if monthdate: - yearmonth = "%s%02d" % (monthdate.year, monthdate.month) - return (yearmonth, daysinmonth) + allmonths = set([m.date.month for m in mlist]) + monthdate = None + daysinmonth = None + if len(allmonths) == 1: + # All hits are from one month, so generate month links + monthdate = mlist[0].date + elif len(allmonths) == 0: + # No hits at all, so generate month links from the specified date + monthdate = d + + if monthdate: + curs = connection.cursor() + curs.execute("SELECT DISTINCT extract(day FROM date) FROM messages WHERE date >= %(startdate)s AND date < %(enddate)s AND threadid IN (SELECT threadid FROM list_threads WHERE listid=%(listid)s) ORDER BY 1", { + 'startdate': datetime(year=monthdate.year, month=monthdate.month, day=1), + 'enddate': monthdate + timedelta(days=calendar.monthrange(monthdate.year, monthdate.month)[1]), + 'listid': l.listid, + }) + daysinmonth = [int(r[0]) for r in curs.fetchall()] + + yearmonth = None + if monthdate: + yearmonth = "%s%02d" % (monthdate.year, monthdate.month) + return (yearmonth, daysinmonth) def _render_datelist(request, l, d, datefilter, title, queryproc): - # NOTE! Basic permissions checks must be done before calling this function! - - if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser: - mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra( - where=["threadid IN (SELECT threadid FROM list_threads t WHERE listid=%s AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%s)))"], - params=(l.listid, request.user.username), - ) - else: - # Else we return everything - mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]) - mlist = queryproc(mlist) - - allyearmonths = set([(m.date.year, m.date.month) for m in mlist]) - (yearmonth, daysinmonth) = get_monthday_info(mlist, l, d) - - r = render_nav(NavContext(request, l.listid, l.listname), 'datelist.html', { - 'list': l, - 'messages': mlist, - 'title': title, - 'daysinmonth': daysinmonth, - 'yearmonth': yearmonth, - }) - r['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (l.listid, year, month) for year,month in allyearmonths])) - return r + # NOTE! Basic permissions checks must be done before calling this function! + + if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser: + mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra( + where=["threadid IN (SELECT threadid FROM list_threads t WHERE listid=%s AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%s)))"], + params=(l.listid, request.user.username), + ) + else: + # Else we return everything + mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]) + mlist = queryproc(mlist) + + allyearmonths = set([(m.date.year, m.date.month) for m in mlist]) + (yearmonth, daysinmonth) = get_monthday_info(mlist, l, d) + + r = render_nav(NavContext(request, l.listid, l.listname), 'datelist.html', { + 'list': l, + 'messages': mlist, + 'title': title, + 'daysinmonth': daysinmonth, + 'yearmonth': yearmonth, + }) + r['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (l.listid, year, month) for year,month in allyearmonths])) + return r def render_datelist_from(request, l, d, title, to=None): - # NOTE! Basic permissions checks must be done before calling this function! - datefilter = Q(date__gte=d) - if to: - datefilter.add(Q(date__lt=to), Q.AND) + # NOTE! Basic permissions checks must be done before calling this function! + datefilter = Q(date__gte=d) + if to: + datefilter.add(Q(date__lt=to), Q.AND) - return _render_datelist(request, l, d, datefilter, title, - lambda x: list(x.order_by('date')[:200])) + return _render_datelist(request, l, d, datefilter, title, + lambda x: list(x.order_by('date')[:200])) def render_datelist_to(request, l, d, title): - # NOTE! Basic permissions checks must be done before calling this function! + # NOTE! Basic permissions checks must be done before calling this function! - # Need to sort this backwards in the database to get the LIMIT applied - # properly, and then manually resort it in the correct order. We can do - # the second sort safely in python since it's not a lot of items.. + # Need to sort this backwards in the database to get the LIMIT applied + # properly, and then manually resort it in the correct order. We can do + # the second sort safely in python since it's not a lot of items.. - return _render_datelist(request, l, d, Q(date__lte=d), title, - lambda x: sorted(x.order_by('-date')[:200], key=lambda m: m.date)) + return _render_datelist(request, l, d, Q(date__lte=d), title, + lambda x: sorted(x.order_by('-date')[:200], key=lambda m: m.date)) @cache(hours=2) def datelistsince(request, listname, msgid): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) - msg = get_object_or_404(Message, messageid=msgid) - return render_datelist_from(request, l, msg.date, "%s since %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S"))) + msg = get_object_or_404(Message, messageid=msgid) + return render_datelist_from(request, l, msg.date, "%s since %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S"))) # Longer cache since this will be used for the fixed date links @cache(hours=4) def datelistsincetime(request, listname, year, month, day, hour, minute): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) - try: - d = datetime(int(year), int(month), int(day), int(hour), int(minute)) - except ValueError: - raise Http404("Invalid date format, not found") - return render_datelist_from(request, l, d, "%s since %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M"))) + try: + d = datetime(int(year), int(month), int(day), int(hour), int(minute)) + except ValueError: + raise Http404("Invalid date format, not found") + return render_datelist_from(request, l, d, "%s since %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M"))) @cache(hours=2) def datelistbefore(request, listname, msgid): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) - msg = get_object_or_404(Message, messageid=msgid) - return render_datelist_to(request, l, msg.date, "%s before %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S"))) + msg = get_object_or_404(Message, messageid=msgid) + return render_datelist_to(request, l, msg.date, "%s before %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S"))) @cache(hours=2) def datelistbeforetime(request, listname, year, month, day, hour, minute): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) - try: - d = datetime(int(year), int(month), int(day), int(hour), int(minute)) - except ValueError: - raise Http404("Invalid date format, not found") - return render_datelist_to(request, l, d, "%s before %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M"))) + try: + d = datetime(int(year), int(month), int(day), int(hour), int(minute)) + except ValueError: + raise Http404("Invalid date format, not found") + return render_datelist_to(request, l, d, "%s before %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M"))) @cache(hours=4) def datelist(request, listname, year, month): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) - try: - d = datetime(int(year), int(month), 1) - except ValueError: - raise Http404("Malformatted date, month not found") + try: + d = datetime(int(year), int(month), 1) + except ValueError: + raise Http404("Malformatted date, month not found") - enddate = d+timedelta(days=31) - enddate = datetime(enddate.year, enddate.month, 1) - return render_datelist_from(request, l, d, "%s - %s %s" % (l.listname, d.strftime("%B"), d.year), enddate) + enddate = d+timedelta(days=31) + enddate = datetime(enddate.year, enddate.month, 1) + return render_datelist_from(request, l, d, "%s - %s %s" % (l.listname, d.strftime("%B"), d.year), enddate) @cache(hours=4) def attachment(request, attid): - # Use a direct query instead of django, since it has bad support for - # bytea - # XXX: minor information leak, because we load the whole attachment before we check - # the thread permissions. Is that OK? - curs = connection.cursor() - curs.execute("SELECT filename, contenttype, messageid, attachment FROM attachments INNER JOIN messages ON messages.id=attachments.message AND attachments.id=%(id)s AND messages.hiddenstatus IS NULL", {'id': int(attid)}) - r = curs.fetchall() - if len(r) != 1: - return HttpResponse("Attachment not found") + # Use a direct query instead of django, since it has bad support for + # bytea + # XXX: minor information leak, because we load the whole attachment before we check + # the thread permissions. Is that OK? + curs = connection.cursor() + curs.execute("SELECT filename, contenttype, messageid, attachment FROM attachments INNER JOIN messages ON messages.id=attachments.message AND attachments.id=%(id)s AND messages.hiddenstatus IS NULL", {'id': int(attid)}) + r = curs.fetchall() + if len(r) != 1: + return HttpResponse("Attachment not found") - ensure_message_permissions(request, r[0][2]) + ensure_message_permissions(request, r[0][2]) - return HttpResponse(r[0][3], content_type=r[0][1]) + return HttpResponse(r[0][3], content_type=r[0][1]) def _build_thread_structure(threadid): - # Yeah, this is *way* too complicated for the django ORM - curs = connection.cursor() - curs.execute("""WITH RECURSIVE t(id, _from, subject, date, messageid, has_attachment, parentid, datepath) AS( + # Yeah, this is *way* too complicated for the django ORM + curs = connection.cursor() + curs.execute("""WITH RECURSIVE t(id, _from, subject, date, messageid, has_attachment, parentid, datepath) AS( SELECT id,_from,subject,date,messageid,has_attachment,parentid,array[]::timestamptz[] FROM messages m WHERE m.threadid=%(threadid)s AND parentid IS NULL UNION ALL SELECT m.id,m._from,m.subject,m.date,m.messageid,m.has_attachment,m.parentid,t.datepath||t.date FROM messages m INNER JOIN t ON t.id=m.parentid WHERE m.threadid=%(threadid)s @@ -373,395 +373,395 @@ def _build_thread_structure(threadid): SELECT id,_from,subject,date,messageid,has_attachment,parentid,datepath FROM t ORDER BY datepath||date """, {'threadid': threadid}) - for id,_from,subject,date,messageid,has_attachment,parentid,parentpath in curs.fetchall(): - yield {'id':id, 'mailfrom':_from, 'subject': subject, 'date': date, 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), 'messageid': messageid, 'hasattachment': has_attachment, 'parentid': parentid, 'indent': " " * len(parentpath)} + for id,_from,subject,date,messageid,has_attachment,parentid,parentpath in curs.fetchall(): + yield {'id':id, 'mailfrom':_from, 'subject': subject, 'date': date, 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), 'messageid': messageid, 'hasattachment': has_attachment, 'parentid': parentid, 'indent': " " * len(parentpath)} def _get_nextprevious(listmap, dt): - curs = connection.cursor() - curs.execute("""WITH l(listid) AS ( + curs = connection.cursor() + curs.execute("""WITH l(listid) AS ( SELECT unnest(%(lists)s) ) SELECT l.listid,1, (SELECT ARRAY[messageid,to_char(date, 'yyyy-mm-dd hh24:mi:ss'),subject,_from] FROM messages m - INNER JOIN list_threads lt ON lt.threadid=m.threadid - WHERE m.date>%(time)s AND lt.listid=l.listid - ORDER BY m.date LIMIT 1 + INNER JOIN list_threads lt ON lt.threadid=m.threadid + WHERE m.date>%(time)s AND lt.listid=l.listid + ORDER BY m.date LIMIT 1 ) FROM l UNION ALL SELECT l.listid,0, (SELECT ARRAY[messageid,to_char(date, 'yyyy-mm-dd hh24:mi:ss'),subject,_from] FROM messages m - INNER JOIN list_threads lt ON lt.threadid=m.threadid - WHERE m.date<%(time)s AND lt.listid=l.listid - ORDER BY m.date DESC LIMIT 1 + INNER JOIN list_threads lt ON lt.threadid=m.threadid + WHERE m.date<%(time)s AND lt.listid=l.listid + ORDER BY m.date DESC LIMIT 1 ) FROM l""", { - 'lists': list(listmap.keys()), - 'time': dt, - }) - retval = {} - for listid, isnext, data in curs.fetchall(): - if data: - # Can be NULL, but if not, it will always have all fields - listname = listmap[listid] - d = { - 'msgid': data[0], - 'date': data[1], - 'subject': data[2], - 'from': data[3], - } - if listname in retval: - retval[listname][isnext and 'next' or 'prev'] = d - else: - retval[listname] = { - isnext and 'next' or 'prev': d - } - return retval + 'lists': list(listmap.keys()), + 'time': dt, + }) + retval = {} + for listid, isnext, data in curs.fetchall(): + if data: + # Can be NULL, but if not, it will always have all fields + listname = listmap[listid] + d = { + 'msgid': data[0], + 'date': data[1], + 'subject': data[2], + 'from': data[3], + } + if listname in retval: + retval[listname][isnext and 'next' or 'prev'] = d + else: + retval[listname] = { + isnext and 'next' or 'prev': d + } + return retval @cache(hours=4) def message(request, msgid): - ensure_message_permissions(request, msgid) - - try: - m = Message.objects.get(messageid=msgid) - except Message.DoesNotExist: - raise Http404('Message does not exist') - - lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % m.threadid]).order_by('listname') - listmap = dict([(l.listid, l.listname) for l in lists]) - threadstruct = list(_build_thread_structure(m.threadid)) - newest = calendar.timegm(max(threadstruct, key=lambda x: x['date'])['date'].utctimetuple()) - if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG: - ims = parse_http_date_safe(request.META.get("HTTP_IF_MODIFIED_SINCE")) - if ims >= newest: - return HttpResponseNotModified() - - responses = [t for t in threadstruct if t['parentid']==m.id] - - if m.parentid: - for t in threadstruct: - if t['id'] == m.parentid: - parent = t - break - else: - parent = None - nextprev = _get_nextprevious(listmap, m.date) - - r = render_nav(NavContext(request, lists[0].listid, lists[0].listname), 'message.html', { - 'msg': m, - 'threadstruct': threadstruct, - 'responses': responses, - 'parent': parent, - 'lists': lists, - 'nextprev': nextprev, - }) - r['X-pgthread'] = ":%s:" % m.threadid - r['Last-Modified'] = http_date(newest) - return r + ensure_message_permissions(request, msgid) + + try: + m = Message.objects.get(messageid=msgid) + except Message.DoesNotExist: + raise Http404('Message does not exist') + + lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % m.threadid]).order_by('listname') + listmap = dict([(l.listid, l.listname) for l in lists]) + threadstruct = list(_build_thread_structure(m.threadid)) + newest = calendar.timegm(max(threadstruct, key=lambda x: x['date'])['date'].utctimetuple()) + if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG: + ims = parse_http_date_safe(request.META.get("HTTP_IF_MODIFIED_SINCE")) + if ims >= newest: + return HttpResponseNotModified() + + responses = [t for t in threadstruct if t['parentid']==m.id] + + if m.parentid: + for t in threadstruct: + if t['id'] == m.parentid: + parent = t + break + else: + parent = None + nextprev = _get_nextprevious(listmap, m.date) + + r = render_nav(NavContext(request, lists[0].listid, lists[0].listname), 'message.html', { + 'msg': m, + 'threadstruct': threadstruct, + 'responses': responses, + 'parent': parent, + 'lists': lists, + 'nextprev': nextprev, + }) + r['X-pgthread'] = ":%s:" % m.threadid + r['Last-Modified'] = http_date(newest) + return r @cache(hours=4) def message_flat(request, msgid): - ensure_message_permissions(request, msgid) - - try: - msg = Message.objects.get(messageid=msgid) - except Message.DoesNotExist: - raise Http404('Message does not exist') - allmsg = list(Message.objects.filter(threadid=msg.threadid).order_by('date')) - lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % msg.threadid]).order_by('listname') - - isfirst = (msg == allmsg[0]) - - newest = calendar.timegm(max(allmsg, key=lambda x: x.date).date.utctimetuple()) - if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG: - ims = parse_http_date_safe(request.META.get('HTTP_IF_MODIFIED_SINCE')) - if ims >= newest: - return HttpResponseNotModified() - - r = render_nav(NavContext(request), 'message_flat.html', { - 'msg': msg, - 'allmsg': allmsg, - 'lists': lists, - 'isfirst': isfirst, - }) - r['X-pgthread'] = ":%s:" % msg.threadid - r['Last-Modified'] = http_date(newest) - return r + ensure_message_permissions(request, msgid) + + try: + msg = Message.objects.get(messageid=msgid) + except Message.DoesNotExist: + raise Http404('Message does not exist') + allmsg = list(Message.objects.filter(threadid=msg.threadid).order_by('date')) + lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % msg.threadid]).order_by('listname') + + isfirst = (msg == allmsg[0]) + + newest = calendar.timegm(max(allmsg, key=lambda x: x.date).date.utctimetuple()) + if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG: + ims = parse_http_date_safe(request.META.get('HTTP_IF_MODIFIED_SINCE')) + if ims >= newest: + return HttpResponseNotModified() + + r = render_nav(NavContext(request), 'message_flat.html', { + 'msg': msg, + 'allmsg': allmsg, + 'lists': lists, + 'isfirst': isfirst, + }) + r['X-pgthread'] = ":%s:" % msg.threadid + r['Last-Modified'] = http_date(newest) + return r @nocache @antispam_auth def message_raw(request, msgid): - ensure_message_permissions(request, msgid) + ensure_message_permissions(request, msgid) - curs = connection.cursor() - curs.execute("SELECT threadid, hiddenstatus, rawtxt FROM messages WHERE messageid=%(messageid)s", { - 'messageid': msgid, - }) - row = curs.fetchall() - if len(row) != 1: - raise Http404('Message does not exist') + curs = connection.cursor() + curs.execute("SELECT threadid, hiddenstatus, rawtxt FROM messages WHERE messageid=%(messageid)s", { + 'messageid': msgid, + }) + row = curs.fetchall() + if len(row) != 1: + raise Http404('Message does not exist') - if row[0][1]: - r = HttpResponse('This message has been hidden.', content_type='text/plain') - else: - r = HttpResponse(row[0][2], content_type='text/plain') - r['X-pgthread'] = ":%s:" % row[0][0] - return r + if row[0][1]: + r = HttpResponse('This message has been hidden.', content_type='text/plain') + else: + r = HttpResponse(row[0][2], content_type='text/plain') + r['X-pgthread'] = ":%s:" % row[0][0] + return r def _build_mbox(query, params, msgid=None): - connection.ensure_connection() + connection.ensure_connection() - # Rawmsg is not in the django model, so we have to query it separately - curs = connection.connection.cursor(name='mbox', withhold=True) - curs.itersize = 50 - curs.execute(query, params) + # Rawmsg is not in the django model, so we have to query it separately + curs = connection.connection.cursor(name='mbox', withhold=True) + curs.itersize = 50 + curs.execute(query, params) - firstmsg = curs.fetchone() - if msgid and firstmsg[0] != msgid: - # Always redirect to the first message in the thread when building - # the mbox, to not generate potentially multiple copies in - # the cache. - return HttpResponsePermanentRedirect(firstmsg[0]) + firstmsg = curs.fetchone() + if msgid and firstmsg[0] != msgid: + # Always redirect to the first message in the thread when building + # the mbox, to not generate potentially multiple copies in + # the cache. + return HttpResponsePermanentRedirect(firstmsg[0]) - def _one_message(raw): - # Parse as a message to generate headers - s = BytesIO(raw) - parser = email.parser.BytesParser(policy=email.policy.compat32) - msg = parser.parse(s) - return msg.as_string(unixfrom=True) + def _one_message(raw): + # Parse as a message to generate headers + s = BytesIO(raw) + parser = email.parser.BytesParser(policy=email.policy.compat32) + msg = parser.parse(s) + return msg.as_string(unixfrom=True) - def _message_stream(first): - yield _one_message(first[1]) + def _message_stream(first): + yield _one_message(first[1]) - for mid, raw in curs: - yield _one_message(raw) + for mid, raw in curs: + yield _one_message(raw) - # Close must be done inside this function. If we close it in the - # main function, it won't let the iterator run to completion. - curs.close() + # Close must be done inside this function. If we close it in the + # main function, it won't let the iterator run to completion. + curs.close() - r = StreamingHttpResponse(_message_stream(firstmsg)) - r['Content-type'] = 'application/mbox' - return r + r = StreamingHttpResponse(_message_stream(firstmsg)) + r['Content-type'] = 'application/mbox' + return r @nocache @antispam_auth def message_mbox(request, msgid): - ensure_message_permissions(request, msgid) + ensure_message_permissions(request, msgid) - msg = get_object_or_404(Message, messageid=msgid) + msg = get_object_or_404(Message, messageid=msgid) - return _build_mbox( - "SELECT messageid, rawtxt FROM messages WHERE threadid=%(thread)s AND hiddenstatus IS NULL ORDER BY date", - { - 'thread': msg.threadid, - }, - msgid) + return _build_mbox( + "SELECT messageid, rawtxt FROM messages WHERE threadid=%(thread)s AND hiddenstatus IS NULL ORDER BY date", + { + 'thread': msg.threadid, + }, + msgid) @nocache @antispam_auth def mbox(request, listname, listname2, mboxyear, mboxmonth): - if (listname != listname2): - raise Http404('List name mismatch') - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) - - mboxyear = int(mboxyear) - mboxmonth = int(mboxmonth) - - query = "SELECT messageid, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE listid=%(listid)s AND hiddenstatus IS NULL AND date >= %(startdate)s AND date <= %(enddate)s %%% ORDER BY date" - params = { - 'listid': l.listid, - 'startdate': date(mboxyear, mboxmonth, 1), - 'enddate': datetime(mboxyear, mboxmonth, calendar.monthrange(mboxyear, mboxmonth)[1], 23, 59, 59), - } - - if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser: - # Restrict to only view messages that the user has permissions on all threads they're on - query = query.replace('%%%', 'AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%(username)s))') - params['username'] = request.user.username - else: - # Just return the whole thing - query = query.replace('%%%', '') - return _build_mbox(query, params) + if (listname != listname2): + raise Http404('List name mismatch') + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) + + mboxyear = int(mboxyear) + mboxmonth = int(mboxmonth) + + query = "SELECT messageid, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE listid=%(listid)s AND hiddenstatus IS NULL AND date >= %(startdate)s AND date <= %(enddate)s %%% ORDER BY date" + params = { + 'listid': l.listid, + 'startdate': date(mboxyear, mboxmonth, 1), + 'enddate': datetime(mboxyear, mboxmonth, calendar.monthrange(mboxyear, mboxmonth)[1], 23, 59, 59), + } + + if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser: + # Restrict to only view messages that the user has permissions on all threads they're on + query = query.replace('%%%', 'AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%(username)s))') + params['username'] = request.user.username + else: + # Just return the whole thing + query = query.replace('%%%', '') + return _build_mbox(query, params) def search(request): - if not settings.PUBLIC_ARCHIVES: - # We don't support searching of non-public archives at all at this point. - # XXX: room for future improvement - return HttpResponseForbidden('Not public archives') - - # Only certain hosts are allowed to call the search API - if not request.META['REMOTE_ADDR'] in settings.SEARCH_CLIENTS: - return HttpResponseForbidden('Invalid host') - - curs = connection.cursor() - - # Perform a search of the archives and return a JSON document. - # Expects the following (optional) POST parameters: - # q = query to search for - # ln = comma separate list of listnames to search in - # d = number of days back to search for, or -1 (or not specified) - # to search the full archives - # s = sort results by ['r'=rank, 'd'=date, 'i'=inverse date] - if not request.method == 'POST': - raise Http404('I only respond to POST') - - if 'q' not in request.POST: - raise Http404('No search query specified') - query = request.POST['q'] - - if 'ln' in request.POST: - try: - curs.execute("SELECT listid FROM lists WHERE listname=ANY(%(names)s)", { - 'names': request.POST['ln'].split(','), - }) - lists = [x for x, in curs.fetchall()] - except: - # If failing to parse list of lists, just search all - lists = None - else: - lists = None - - if 'd' in request.POST: - days = int(request.POST['d']) - if days < 1 or days > 365: - firstdate = None - else: - firstdate = datetime.now() - timedelta(days=days) - else: - firstdate = None - - if 's' in request.POST: - list_sort = request.POST['s'] - if not list_sort in ('d', 'r', 'i'): - list_stort = 'r' - else: - list_sort = 'r' - - # Ok, we have all we need to do the search - - if query.find('@') > 0: - # This could be a messageid. So try to get that one specifically first. - # We don't do a more specific check if it's a messageid because doing - # a key lookup is cheap... - curs.execute("SELECT messageid FROM messages WHERE messageid=%(q)s", { - 'q': query, - }) - a = curs.fetchall() - if len(a) == 1: - # Yup, this was a messageid - resp = HttpResponse(content_type='application/json') - - json.dump({'messageidmatch': 1}, resp) - return resp - # If not found, fall through to a regular search - - curs.execute("SET gin_fuzzy_search_limit=10000") - qstr = "SELECT messageid, date, subject, _from, ts_rank_cd(fti, plainto_tsquery('public.pg', %(q)s)), ts_headline(bodytxt, plainto_tsquery('public.pg', %(q)s),'StartSel=\"[[[[[[\",StopSel=\"]]]]]]\"') FROM messages m WHERE fti @@ plainto_tsquery('public.pg', %(q)s)" - params = { - 'q': query, - } - if lists: - qstr += " AND EXISTS (SELECT 1 FROM list_threads lt WHERE lt.threadid=m.threadid AND lt.listid=ANY(%(lists)s))" - params['lists'] = lists - if firstdate: - qstr += " AND m.date > %(date)s" - params['date'] = firstdate - if list_sort == 'r': - qstr += " ORDER BY ts_rank_cd(fti, plainto_tsquery(%(q)s)) DESC LIMIT 1000" - elif list_sort == 'd': - qstr += " ORDER BY date DESC LIMIT 1000" - else: - qstr += " ORDER BY date ASC LIMIT 1000" - - curs.execute(qstr, params) - - resp = HttpResponse(content_type='application/json') - - json.dump([{ - 'm': messageid, - 'd': date.isoformat(), - 's': subject, - 'f': mailfrom, - 'r': rank, - 'a': abstract.replace("[[[[[[", "").replace("]]]]]]",""), - - } for messageid, date, subject, mailfrom, rank, abstract in curs.fetchall()], - resp) - return resp + if not settings.PUBLIC_ARCHIVES: + # We don't support searching of non-public archives at all at this point. + # XXX: room for future improvement + return HttpResponseForbidden('Not public archives') + + # Only certain hosts are allowed to call the search API + if not request.META['REMOTE_ADDR'] in settings.SEARCH_CLIENTS: + return HttpResponseForbidden('Invalid host') + + curs = connection.cursor() + + # Perform a search of the archives and return a JSON document. + # Expects the following (optional) POST parameters: + # q = query to search for + # ln = comma separate list of listnames to search in + # d = number of days back to search for, or -1 (or not specified) + # to search the full archives + # s = sort results by ['r'=rank, 'd'=date, 'i'=inverse date] + if not request.method == 'POST': + raise Http404('I only respond to POST') + + if 'q' not in request.POST: + raise Http404('No search query specified') + query = request.POST['q'] + + if 'ln' in request.POST: + try: + curs.execute("SELECT listid FROM lists WHERE listname=ANY(%(names)s)", { + 'names': request.POST['ln'].split(','), + }) + lists = [x for x, in curs.fetchall()] + except: + # If failing to parse list of lists, just search all + lists = None + else: + lists = None + + if 'd' in request.POST: + days = int(request.POST['d']) + if days < 1 or days > 365: + firstdate = None + else: + firstdate = datetime.now() - timedelta(days=days) + else: + firstdate = None + + if 's' in request.POST: + list_sort = request.POST['s'] + if not list_sort in ('d', 'r', 'i'): + list_stort = 'r' + else: + list_sort = 'r' + + # Ok, we have all we need to do the search + + if query.find('@') > 0: + # This could be a messageid. So try to get that one specifically first. + # We don't do a more specific check if it's a messageid because doing + # a key lookup is cheap... + curs.execute("SELECT messageid FROM messages WHERE messageid=%(q)s", { + 'q': query, + }) + a = curs.fetchall() + if len(a) == 1: + # Yup, this was a messageid + resp = HttpResponse(content_type='application/json') + + json.dump({'messageidmatch': 1}, resp) + return resp + # If not found, fall through to a regular search + + curs.execute("SET gin_fuzzy_search_limit=10000") + qstr = "SELECT messageid, date, subject, _from, ts_rank_cd(fti, plainto_tsquery('public.pg', %(q)s)), ts_headline(bodytxt, plainto_tsquery('public.pg', %(q)s),'StartSel=\"[[[[[[\",StopSel=\"]]]]]]\"') FROM messages m WHERE fti @@ plainto_tsquery('public.pg', %(q)s)" + params = { + 'q': query, + } + if lists: + qstr += " AND EXISTS (SELECT 1 FROM list_threads lt WHERE lt.threadid=m.threadid AND lt.listid=ANY(%(lists)s))" + params['lists'] = lists + if firstdate: + qstr += " AND m.date > %(date)s" + params['date'] = firstdate + if list_sort == 'r': + qstr += " ORDER BY ts_rank_cd(fti, plainto_tsquery(%(q)s)) DESC LIMIT 1000" + elif list_sort == 'd': + qstr += " ORDER BY date DESC LIMIT 1000" + else: + qstr += " ORDER BY date ASC LIMIT 1000" + + curs.execute(qstr, params) + + resp = HttpResponse(content_type='application/json') + + json.dump([{ + 'm': messageid, + 'd': date.isoformat(), + 's': subject, + 'f': mailfrom, + 'r': rank, + 'a': abstract.replace("[[[[[[", "").replace("]]]]]]",""), + + } for messageid, date, subject, mailfrom, rank, abstract in curs.fetchall()], + resp) + return resp @cache(seconds=10) def web_sync_timestamp(request): - s = datetime.now().strftime("%Y-%m-%d %H:%M:%S\n") - r = HttpResponse(s, content_type='text/plain') - r['Content-Length'] = len(s) - return r + s = datetime.now().strftime("%Y-%m-%d %H:%M:%S\n") + r = HttpResponse(s, content_type='text/plain') + r['Content-Length'] = len(s) + return r @cache(hours=8) def legacy(request, listname, year, month, msgnum): - curs = connection.cursor() - curs.execute("SELECT msgid FROM legacymap WHERE listid=(SELECT listid FROM lists WHERE listname=%(list)s) AND year=%(year)s AND month=%(month)s AND msgnum=%(msgnum)s", { - 'list': listname, - 'year': year, - 'month': month, - 'msgnum': msgnum, - }) - r = curs.fetchall() - if len(r) != 1: - raise Http404('Message does not exist') - return HttpResponsePermanentRedirect('/message-id/%s' % r[0][0]) + curs = connection.cursor() + curs.execute("SELECT msgid FROM legacymap WHERE listid=(SELECT listid FROM lists WHERE listname=%(list)s) AND year=%(year)s AND month=%(month)s AND msgnum=%(msgnum)s", { + 'list': listname, + 'year': year, + 'month': month, + 'msgnum': msgnum, + }) + r = curs.fetchall() + if len(r) != 1: + raise Http404('Message does not exist') + return HttpResponsePermanentRedirect('/message-id/%s' % r[0][0]) # dynamic CSS serving, meaning we merge a number of different CSS into a # single one, making sure it turns into a single http response. We do this # dynamically, since the output will be cached. _dynamic_cssmap = { - 'base': ['media/css/main.css', - 'media/css/normalize.css',], - 'docs': ['media/css/global.css', - 'media/css/table.css', - 'media/css/text.css', - 'media/css/docs.css'], - } + 'base': ['media/css/main.css', + 'media/css/normalize.css',], + 'docs': ['media/css/global.css', + 'media/css/table.css', + 'media/css/text.css', + 'media/css/docs.css'], + } @cache(hours=8) def dynamic_css(request, css): - if css not in _dynamic_cssmap: - raise Http404('CSS not found') - files = _dynamic_cssmap[css] - resp = HttpResponse(content_type='text/css') - - # We honor if-modified-since headers by looking at the most recently - # touched CSS file. - latestmod = 0 - for fn in files: - try: - stime = os.stat(fn).st_mtime - if latestmod < stime: - latestmod = stime - except OSError: - # If we somehow referred to a file that didn't exist, or - # one that we couldn't access. - raise Http404('CSS (sub) not found') - if 'HTTP_IF_MODIFIED_SINCE' in request.META: - # This code is mostly stolen from django :) - matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", - request.META.get('HTTP_IF_MODIFIED_SINCE'), - re.IGNORECASE) - header_mtime = parse_http_date_safe(matches.group(1)) - # We don't do length checking, just the date - if int(latestmod) <= header_mtime: - return HttpResponseNotModified(content_type='text/css') - resp['Last-Modified'] = http_date(latestmod) - - for fn in files: - with open(fn) as f: - resp.write("/* %s */\n" % fn) - resp.write(f.read()) - resp.write("\n") - - return resp + if css not in _dynamic_cssmap: + raise Http404('CSS not found') + files = _dynamic_cssmap[css] + resp = HttpResponse(content_type='text/css') + + # We honor if-modified-since headers by looking at the most recently + # touched CSS file. + latestmod = 0 + for fn in files: + try: + stime = os.stat(fn).st_mtime + if latestmod < stime: + latestmod = stime + except OSError: + # If we somehow referred to a file that didn't exist, or + # one that we couldn't access. + raise Http404('CSS (sub) not found') + if 'HTTP_IF_MODIFIED_SINCE' in request.META: + # This code is mostly stolen from django :) + matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", + request.META.get('HTTP_IF_MODIFIED_SINCE'), + re.IGNORECASE) + header_mtime = parse_http_date_safe(matches.group(1)) + # We don't do length checking, just the date + if int(latestmod) <= header_mtime: + return HttpResponseNotModified(content_type='text/css') + resp['Last-Modified'] = http_date(latestmod) + + for fn in files: + with open(fn) as f: + resp.write("/* %s */\n" % fn) + resp.write(f.read()) + resp.write("\n") + + return resp # Redirect to the requested url, with a slash first. This is used to remove # trailing slashes on messageid links by doing a permanent redirect. This is @@ -769,11 +769,11 @@ def dynamic_css(request, css): # in the cache. @cache(hours=8) def slash_redirect(request, url): - return HttpResponsePermanentRedirect("/%s" % url) + return HttpResponsePermanentRedirect("/%s" % url) # Redirect the requested URL to whatever happens to be in the regexp capture. # This is used for user agents that generate broken URLs that are easily # captured using regexp. @cache(hours=8) def re_redirect(request, prefix, msgid): - return HttpResponsePermanentRedirect("/%s%s" % (prefix, msgid)) + return HttpResponsePermanentRedirect("/%s%s" % (prefix, msgid)) diff --git a/django/archives/settings.py b/django/archives/settings.py index 6a42a26..5d214d7 100644 --- a/django/archives/settings.py +++ b/django/archives/settings.py @@ -96,18 +96,18 @@ ROOT_URLCONF = 'archives.urls' TEMPLATES = [{ - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.request', - 'django.contrib.messages.context_processors.messages', - 'archives.util.PGWebContextProcessor', - ], - 'loaders': [ - 'django.template.loaders.filesystem.Loader', - 'django.template.loaders.app_directories.Loader', - ], - }, + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.request', + 'django.contrib.messages.context_processors.messages', + 'archives.util.PGWebContextProcessor', + ], + 'loaders': [ + 'django.template.loaders.filesystem.Loader', + 'django.template.loaders.app_directories.Loader', + ], + }, }] @@ -122,7 +122,7 @@ # 'django.contrib.admin', # Uncomment the next line to enable admin documentation: # 'django.contrib.admindocs', - 'archives.mailarchives', + 'archives.mailarchives', ] # A sample logging configuration. The only tangible logging @@ -159,23 +159,23 @@ PUBLIC_ARCHIVES = False try: - from .settings_local import * + from .settings_local import * except ImportError: - pass + pass # If this is a non-public site, enable middleware for handling logins etc if not PUBLIC_ARCHIVES: - MIDDLEWARE_CLASSES = [ - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - ] + MIDDLEWARE_CLASSES - MIDDLEWARE_CLASSES.append('archives.mailarchives.redirecthandler.RedirectMiddleware') - - INSTALLED_APPS = [ - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - ] + INSTALLED_APPS - - from archives.util import validate_new_user - PGAUTH_CREATEUSER_CALLBACK=validate_new_user + MIDDLEWARE_CLASSES = [ + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + ] + MIDDLEWARE_CLASSES + MIDDLEWARE_CLASSES.append('archives.mailarchives.redirecthandler.RedirectMiddleware') + + INSTALLED_APPS = [ + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + ] + INSTALLED_APPS + + from archives.util import validate_new_user + PGAUTH_CREATEUSER_CALLBACK=validate_new_user diff --git a/django/archives/util.py b/django/archives/util.py index 425fffe..4ed9730 100644 --- a/django/archives/util.py +++ b/django/archives/util.py @@ -3,42 +3,42 @@ from django.utils.functional import SimpleLazyObject def validate_new_user(username, email, firstname, lastname): - # Only allow user creation if they are already a subscriber - curs = connection.cursor() - curs.execute("SELECT EXISTS(SELECT 1 FROM listsubscribers WHERE username=%(username)s)", { - 'username': username, - }) - if curs.fetchone()[0]: - # User is subscribed to something, so allow creation - return None + # Only allow user creation if they are already a subscriber + curs = connection.cursor() + curs.execute("SELECT EXISTS(SELECT 1 FROM listsubscribers WHERE username=%(username)s)", { + 'username': username, + }) + if curs.fetchone()[0]: + # User is subscribed to something, so allow creation + return None - return HttpResponse("You are not currently subscribed to any mailing list on this server. Account not created.") + return HttpResponse("You are not currently subscribed to any mailing list on this server. Account not created.") def _get_gitrev(): - # Return the current git revision, that is used for - # cache-busting URLs. - try: - with open('../.git/refs/heads/master') as f: - return f.readline()[:8] - except IOError: - # A "git gc" will remove the ref and replace it with a packed-refs. - try: - with open('../.git/packed-refs') as f: - for l in f.readlines(): - if l.endswith("refs/heads/master\n"): - return l[:8] - # Not found in packed-refs. Meh, just make one up. - return 'ffffffff' - except IOError: - # If packed-refs also can't be read, just give up - return 'eeeeeeee' + # Return the current git revision, that is used for + # cache-busting URLs. + try: + with open('../.git/refs/heads/master') as f: + return f.readline()[:8] + except IOError: + # A "git gc" will remove the ref and replace it with a packed-refs. + try: + with open('../.git/packed-refs') as f: + for l in f.readlines(): + if l.endswith("refs/heads/master\n"): + return l[:8] + # Not found in packed-refs. Meh, just make one up. + return 'ffffffff' + except IOError: + # If packed-refs also can't be read, just give up + return 'eeeeeeee' # Template context processor to add information about the root link and # the current git revision. git revision is returned as a lazy object so # we don't spend effort trying to load it if we don't need it (though # all general pages will need it since it's used to render the css urls) def PGWebContextProcessor(request): - gitrev = SimpleLazyObject(_get_gitrev) - return { - 'gitrev': gitrev, - } + gitrev = SimpleLazyObject(_get_gitrev) + return { + 'gitrev': gitrev, + } diff --git a/loader/clean_date.py b/loader/clean_date.py index 4ea2951..e94d9cb 100755 --- a/loader/clean_date.py +++ b/loader/clean_date.py @@ -16,62 +16,62 @@ import psycopg2 def scan_message(messageid, olddate, curs): - u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid - print("Scanning message at %s (date reported as %s)..." % (u, olddate)) + u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid + print("Scanning message at %s (date reported as %s)..." % (u, olddate)) - f = urlopen(u) - p = Parser() - msg = p.parse(f) - f.close() + f = urlopen(u) + p = Parser() + msg = p.parse(f) + f.close() - # Can be either one of them, but we really don't care... - ds = None - for k,r in list(msg.items()): - if k != 'Received': continue + # Can be either one of them, but we really don't care... + ds = None + for k,r in list(msg.items()): + if k != 'Received': continue - print("Trying on %s" % r) - m = re.search(';\s*(.*)$', r) - if m: - ds = m.group(1) - break - m = re.search(';\s*(.*)\s*\(envelope-from [^\)]+\)$', r) - if m: - ds = m.group(1) - break + print("Trying on %s" % r) + m = re.search(';\s*(.*)$', r) + if m: + ds = m.group(1) + break + m = re.search(';\s*(.*)\s*\(envelope-from [^\)]+\)$', r) + if m: + ds = m.group(1) + break - if not ds: - print("Could not find date. Sorry.") - return False - d = None - try: - d = dateutil.parser.parse(ds) - except: - print("Could not parse date '%s', sorry." % ds) - return + if not ds: + print("Could not find date. Sorry.") + return False + d = None + try: + d = dateutil.parser.parse(ds) + except: + print("Could not parse date '%s', sorry." % ds) + return - while True: - x = input("Parsed this as date %s. Update? " % d) - if x.upper() == 'Y': - curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", { - 'd': d, - 'm': messageid, - }) - print("Updated.") - break - elif x.upper() == 'N': - break - + while True: + x = input("Parsed this as date %s. Update? " % d) + if x.upper() == 'Y': + curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", { + 'd': d, + 'm': messageid, + }) + print("Updated.") + break + elif x.upper() == 'N': + break + if __name__ == "__main__": - cfg = ConfigParser() - cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) - connstr = cfg.get('db','connstr') + cfg = ConfigParser() + cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) + connstr = cfg.get('db','connstr') - conn = psycopg2.connect(connstr) + conn = psycopg2.connect(connstr) - curs = conn.cursor() - curs.execute("SELECT messageid, date FROM messages WHERE date>(CURRENT_TIMESTAMP+'1 day'::interval) OR date < '1997-01-01'") - for messageid, date in curs.fetchall(): - scan_message(messageid, date, curs) + curs = conn.cursor() + curs.execute("SELECT messageid, date FROM messages WHERE date>(CURRENT_TIMESTAMP+'1 day'::interval) OR date < '1997-01-01'") + for messageid, date in curs.fetchall(): + scan_message(messageid, date, curs) - conn.commit() - print("Done.") + conn.commit() + print("Done.") diff --git a/loader/generate_mbox.py b/loader/generate_mbox.py index c2299e1..3ac1b97 100755 --- a/loader/generate_mbox.py +++ b/loader/generate_mbox.py @@ -21,96 +21,96 @@ def generate_single_mbox(conn, listid, year, month, destination): - curs = conn.cursor() - curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", { - 'listid': listid, - 'startdate': date(year, month, 1), - 'enddate': date(year, month, calendar.monthrange(year, month)[1]), - }) - with open(destination, 'w', encoding='utf8') as f: - for id, raw, in curs: - s = BytesIO(raw) - parser = email.parser.BytesParser(policy=email.policy.compat32) - msg = parser.parse(s) - try: - x = msg.as_string(unixfrom=True) - f.write(x) - except UnicodeEncodeError as e: - print("Not including {0}, unicode error".format(msg['message-id'])) - except Exception as e: - print("Not including {0}, exception {1}".format(msg['message-id'], e)) + curs = conn.cursor() + curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", { + 'listid': listid, + 'startdate': date(year, month, 1), + 'enddate': date(year, month, calendar.monthrange(year, month)[1]), + }) + with open(destination, 'w', encoding='utf8') as f: + for id, raw, in curs: + s = BytesIO(raw) + parser = email.parser.BytesParser(policy=email.policy.compat32) + msg = parser.parse(s) + try: + x = msg.as_string(unixfrom=True) + f.write(x) + except UnicodeEncodeError as e: + print("Not including {0}, unicode error".format(msg['message-id'])) + except Exception as e: + print("Not including {0}, exception {1}".format(msg['message-id'], e)) if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate mbox file(s)") - parser.add_argument('--list', type=str, help='List to generate for') - parser.add_argument('--month', type=str, help='year-month to generate for, e.g. 2017-02') - parser.add_argument('--destination', type=str, help='File to write into (or directory for --auto)', required=True) - parser.add_argument('--auto', action='store_true', help='Auto-generate latest month mboxes for all lists') - parser.add_argument('--quiet', action='store_true', help='Run quiet') + parser = argparse.ArgumentParser(description="Generate mbox file(s)") + parser.add_argument('--list', type=str, help='List to generate for') + parser.add_argument('--month', type=str, help='year-month to generate for, e.g. 2017-02') + parser.add_argument('--destination', type=str, help='File to write into (or directory for --auto)', required=True) + parser.add_argument('--auto', action='store_true', help='Auto-generate latest month mboxes for all lists') + parser.add_argument('--quiet', action='store_true', help='Run quiet') - args = parser.parse_args() + args = parser.parse_args() - if args.auto: - if (args.list or args.month): - print("Must not specify list and month when auto-generating!") - sys.exit(1) - if not os.path.isdir(args.destination): - print("Destination must be a directory, and exist, when auto-generating") - sys.exit(1) - else: - if not (args.list and args.month and args.destination): - print("Must specify list, month and destination when generating a single mailbox") - parser.print_help() - sys.exit(1) + if args.auto: + if (args.list or args.month): + print("Must not specify list and month when auto-generating!") + sys.exit(1) + if not os.path.isdir(args.destination): + print("Destination must be a directory, and exist, when auto-generating") + sys.exit(1) + else: + if not (args.list and args.month and args.destination): + print("Must specify list, month and destination when generating a single mailbox") + parser.print_help() + sys.exit(1) - # Arguments OK, now connect - cfg = ConfigParser() - cfg.read(os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'archives.ini')) - try: - connstr = cfg.get('db','connstr') - except: - connstr = 'need_connstr' + # Arguments OK, now connect + cfg = ConfigParser() + cfg.read(os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'archives.ini')) + try: + connstr = cfg.get('db','connstr') + except: + connstr = 'need_connstr' - conn = psycopg2.connect(connstr) - curs = conn.cursor() + conn = psycopg2.connect(connstr) + curs = conn.cursor() - if args.auto: - curs.execute("SELECT listid, listname FROM lists WHERE active ORDER BY listname") - all_lists = curs.fetchall() - today = date.today() - yesterday = today - timedelta(days=1) - if today.month == yesterday.month: - # Same month, so do it - monthrange = ((today.year, today.month),) - else: - monthrange = ((today.year, today.month),(yesterday.year, yesterday.month)) - for lid, lname in all_lists: - for year, month in monthrange: - fullpath = os.path.join(args.destination, lname, 'files/public/archive') - if not os.path.isdir(fullpath): - os.makedirs(fullpath) - if not args.quiet: - print("Generating {0}-{1} for {2}".format(year, month, lname)) - generate_single_mbox(conn, lid, year, month, - os.path.join(fullpath, "{0}.{0:04d}{1:02d}".format(year, month))) - else: - # Parse year and month - m = re.match('^(\d{4})-(\d{2})$', args.month) - if not m: - print("Month must be specified on format YYYY-MM, not {0}".format(args.month)) - sys.exit(1) - year = int(m.group(1)) - month = int(m.group(2)) + if args.auto: + curs.execute("SELECT listid, listname FROM lists WHERE active ORDER BY listname") + all_lists = curs.fetchall() + today = date.today() + yesterday = today - timedelta(days=1) + if today.month == yesterday.month: + # Same month, so do it + monthrange = ((today.year, today.month),) + else: + monthrange = ((today.year, today.month),(yesterday.year, yesterday.month)) + for lid, lname in all_lists: + for year, month in monthrange: + fullpath = os.path.join(args.destination, lname, 'files/public/archive') + if not os.path.isdir(fullpath): + os.makedirs(fullpath) + if not args.quiet: + print("Generating {0}-{1} for {2}".format(year, month, lname)) + generate_single_mbox(conn, lid, year, month, + os.path.join(fullpath, "{0}.{0:04d}{1:02d}".format(year, month))) + else: + # Parse year and month + m = re.match('^(\d{4})-(\d{2})$', args.month) + if not m: + print("Month must be specified on format YYYY-MM, not {0}".format(args.month)) + sys.exit(1) + year = int(m.group(1)) + month = int(m.group(2)) - curs.execute("SELECT listid FROM lists WHERE listname=%(name)s", { - 'name': args.list, - }) - if curs.rowcount != 1: - print("List {0} not found.".format(args.list)) - sys.exit(1) + curs.execute("SELECT listid FROM lists WHERE listname=%(name)s", { + 'name': args.list, + }) + if curs.rowcount != 1: + print("List {0} not found.".format(args.list)) + sys.exit(1) - if not args.quiet: - print("Generating {0}-{1} for {2}".format(year, month, args.list)) - generate_single_mbox(conn, curs.fetchone()[0], year, month, args.destination) + if not args.quiet: + print("Generating {0}-{1} for {2}".format(year, month, args.list)) + generate_single_mbox(conn, curs.fetchone()[0], year, month, args.destination) diff --git a/loader/hide_message.py b/loader/hide_message.py index 8bb9359..7a0f524 100755 --- a/loader/hide_message.py +++ b/loader/hide_message.py @@ -15,85 +15,85 @@ from lib.varnish import VarnishPurger reasons = [ - None, # Placeholder for 0 - "virus", - "violates policies", - "privacy", - "corrupt", + None, # Placeholder for 0 + "virus", + "violates policies", + "privacy", + "corrupt", ] if __name__ == "__main__": - optparser = OptionParser() - optparser.add_option('-m', '--msgid', dest='msgid', help='Messageid to hide') - - (opt, args) = optparser.parse_args() - - if (len(args)): - print("No bare arguments accepted") - optparser.print_help() - sys.exit(1) - - if not opt.msgid: - print("Message-id must be specified") - optparser.print_help() - sys.exit(1) - - cfg = ConfigParser() - cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) - try: - connstr = cfg.get('db','connstr') - except: - connstr = 'need_connstr' - - conn = psycopg2.connect(connstr) - curs = conn.cursor() - - curs.execute("SELECT id, threadid, hiddenstatus FROM messages WHERE messageid=%(msgid)s", { - 'msgid': opt.msgid, - }) - if curs.rowcount <= 0: - print("Message not found.") - sys.exit(1) - - id, threadid, previous = curs.fetchone() - - # Message found, ask for reason - reason = 0 - print("Current status: %s" % reasons[previous or 0]) - print("\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons)))) - while True: - reason = input('Reason for hiding message? ') - try: - reason = int(reason) - except ValueError: - continue - - if reason == 0: - print("Un-hiding message") - reason = None - break - else: - try: - print("Hiding message for reason: %s" % reasons[reason]) - except: - continue - break - if previous == reason: - print("No change in status, not updating") - conn.close() - sys.exit(0) - - curs.execute("UPDATE messages SET hiddenstatus=%(new)s WHERE id=%(id)s", { - 'new': reason, - 'id': id, - }) - if curs.rowcount != 1: - print("Failed to update! Not hiding!") - conn.rollback() - sys.exit(0) - conn.commit() - - VarnishPurger(cfg).purge([int(threadid), ]) - conn.close() - - print("Message hidden and varnish purge triggered.") + optparser = OptionParser() + optparser.add_option('-m', '--msgid', dest='msgid', help='Messageid to hide') + + (opt, args) = optparser.parse_args() + + if (len(args)): + print("No bare arguments accepted") + optparser.print_help() + sys.exit(1) + + if not opt.msgid: + print("Message-id must be specified") + optparser.print_help() + sys.exit(1) + + cfg = ConfigParser() + cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) + try: + connstr = cfg.get('db','connstr') + except: + connstr = 'need_connstr' + + conn = psycopg2.connect(connstr) + curs = conn.cursor() + + curs.execute("SELECT id, threadid, hiddenstatus FROM messages WHERE messageid=%(msgid)s", { + 'msgid': opt.msgid, + }) + if curs.rowcount <= 0: + print("Message not found.") + sys.exit(1) + + id, threadid, previous = curs.fetchone() + + # Message found, ask for reason + reason = 0 + print("Current status: %s" % reasons[previous or 0]) + print("\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons)))) + while True: + reason = input('Reason for hiding message? ') + try: + reason = int(reason) + except ValueError: + continue + + if reason == 0: + print("Un-hiding message") + reason = None + break + else: + try: + print("Hiding message for reason: %s" % reasons[reason]) + except: + continue + break + if previous == reason: + print("No change in status, not updating") + conn.close() + sys.exit(0) + + curs.execute("UPDATE messages SET hiddenstatus=%(new)s WHERE id=%(id)s", { + 'new': reason, + 'id': id, + }) + if curs.rowcount != 1: + print("Failed to update! Not hiding!") + conn.rollback() + sys.exit(0) + conn.commit() + + VarnishPurger(cfg).purge([int(threadid), ]) + conn.close() + + print("Message hidden and varnish purge triggered.") diff --git a/loader/legacy/scan_old_archives.py b/loader/legacy/scan_old_archives.py index 5d9a242..53d339a 100755 --- a/loader/legacy/scan_old_archives.py +++ b/loader/legacy/scan_old_archives.py @@ -101,23 +101,23 @@ hp = HTMLParser() def get_messageid(fn): - with open(fn) as f: - for l in f: - if l.startswith('