diff --git a/django/archives/auth.py b/django/archives/auth.py index b3af1a5..6ccf131 100644 --- a/django/archives/auth.py +++ b/django/archives/auth.py @@ -28,18 +28,19 @@ import base64 import json import socket -import urlparse -import urllib +from urllib.parse import urlparse, urlencode, parse_qs +import urllib.request from Crypto.Cipher import AES from Crypto.Hash import SHA from Crypto import Random import time + class AuthBackend(ModelBackend): - # We declare a fake backend that always fails direct authentication - - # since we should never be using direct authentication in the first place! - def authenticate(self, username=None, password=None): - raise Exception("Direct authentication not supported") + # We declare a fake backend that always fails direct authentication - + # since we should never be using direct authentication in the first place! + def authenticate(self, username=None, password=None): + raise Exception("Direct authentication not supported") #### @@ -48,90 +49,92 @@ def authenticate(self, username=None, password=None): # Handle login requests by sending them off to the main site def login(request): - if not hasattr(settings, 'PGAUTH_REDIRECT'): - # No pgauth installed, so allow local installs. - from django.contrib.auth.views import login - return login(request, template_name='admin.html') - - if 'next' in request.GET: - # Put together an url-encoded dict of parameters we're getting back, - # including a small nonce at the beginning to make sure it doesn't - # encrypt the same way every time. - s = "t=%s&%s" % (int(time.time()), urllib.urlencode({'r': request.GET['next']})) - # Now encrypt it - r = Random.new() - iv = r.read(16) - encryptor = AES.new(SHA.new(settings.SECRET_KEY).digest()[:16], AES.MODE_CBC, iv) - cipher = encryptor.encrypt(s + ' ' * (16-(len(s) % 16))) # pad to 16 bytes - - return HttpResponseRedirect("%s?d=%s$%s" % ( - settings.PGAUTH_REDIRECT, - base64.b64encode(iv, "-_"), - base64.b64encode(cipher, "-_"), - )) - else: - return HttpResponseRedirect(settings.PGAUTH_REDIRECT) + if not hasattr(settings, 'PGAUTH_REDIRECT'): + # No pgauth installed, so allow local installs. + from django.contrib.auth.views import login + return login(request, template_name='admin.html') + + if 'next' in request.GET: + # Put together an url-encoded dict of parameters we're getting back, + # including a small nonce at the beginning to make sure it doesn't + # encrypt the same way every time. + s = "t=%s&%s" % (int(time.time()), urlencode({'r': request.GET['next']})) + # Now encrypt it + r = Random.new() + iv = r.read(16) + encryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], AES.MODE_CBC, iv) + cipher = encryptor.encrypt(s + ' ' * (16 - (len(s) % 16))) # pad to 16 bytes + + return HttpResponseRedirect("%s?d=%s$%s" % ( + settings.PGAUTH_REDIRECT, + base64.b64encode(iv, b"-_").decode('utf8'), + base64.b64encode(cipher, b"-_").decode('utf8'), + )) + else: + return HttpResponseRedirect(settings.PGAUTH_REDIRECT) + # Handle logout requests by logging out of this site and then # redirecting to log out from the main site as well. def logout(request): - if request.user.is_authenticated(): - django_logout(request) - return HttpResponseRedirect("%slogout/" % settings.PGAUTH_REDIRECT) + if request.user.is_authenticated(): + django_logout(request) + return HttpResponseRedirect("%slogout/" % settings.PGAUTH_REDIRECT) + # Receive an authentication response from the main website and try # to log the user in. def auth_receive(request): - if 's' in request.GET and request.GET['s'] == "logout": - # This was a logout request - return HttpResponseRedirect('/') - - if 'i' not in request.GET: - return HttpResponse("Missing IV in url!", status=400) - if 'd' not in request.GET: - return HttpResponse("Missing data in url!", status=400) - - # Set up an AES object and decrypt the data we received - decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY), - AES.MODE_CBC, - base64.b64decode(str(request.GET['i']), "-_")) - s = decryptor.decrypt(base64.b64decode(str(request.GET['d']), "-_")).rstrip(' ') - - # Now un-urlencode it - try: - data = urlparse.parse_qs(s, strict_parsing=True) - except ValueError: - return HttpResponse("Invalid encrypted data received.", status=400) - - # Check the timestamp in the authentication - if (int(data['t'][0]) < time.time() - 10): - return HttpResponse("Authentication token too old.", status=400) - - # Update the user record (if any) - try: - user = User.objects.get(username=data['u'][0]) - # User found, let's see if any important fields have changed - changed = False - if user.first_name != data['f'][0]: - user.first_name = data['f'][0] - changed = True - if user.last_name != data['l'][0]: - user.last_name = data['l'][0] - changed = True - if user.email != data['e'][0]: - user.email = data['e'][0] - changed= True - if changed: - user.save() - except User.DoesNotExist: - # User not found, create it! - - # NOTE! We have some legacy users where there is a user in - # the database with a different userid. Instead of trying to - # somehow fix that live, give a proper error message and - # have somebody look at it manually. - if User.objects.filter(email=data['e'][0]).exists(): - return HttpResponse("""A user with email %s already exists, but with + if 's' in request.GET and request.GET['s'] == "logout": + # This was a logout request + return HttpResponseRedirect('/') + + if 'i' not in request.GET: + return HttpResponse("Missing IV in url!", status=400) + if 'd' not in request.GET: + return HttpResponse("Missing data in url!", status=400) + + # Set up an AES object and decrypt the data we received + decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY), + AES.MODE_CBC, + base64.b64decode(str(request.GET['i']), "-_")) + s = decryptor.decrypt(base64.b64decode(str(request.GET['d']), "-_")).rstrip(b' ').decode('utf8') + + # Now un-urlencode it + try: + data = parse_qs(s, strict_parsing=True) + except ValueError: + return HttpResponse("Invalid encrypted data received.", status=400) + + # Check the timestamp in the authentication + if (int(data['t'][0]) < time.time() - 10): + return HttpResponse("Authentication token too old.", status=400) + + # Update the user record (if any) + try: + user = User.objects.get(username=data['u'][0]) + # User found, let's see if any important fields have changed + changed = False + if user.first_name != data['f'][0]: + user.first_name = data['f'][0] + changed = True + if user.last_name != data['l'][0]: + user.last_name = data['l'][0] + changed = True + if user.email != data['e'][0]: + user.email = data['e'][0] + changed = True + if changed: + user.save() + except User.DoesNotExist: + # User not found, create it! + + # NOTE! We have some legacy users where there is a user in + # the database with a different userid. Instead of trying to + # somehow fix that live, give a proper error message and + # have somebody look at it manually. + if User.objects.filter(email=data['e'][0]).exists(): + return HttpResponse("""A user with email %s already exists, but with a different username than %s. This is almost certainly caused by some legacy data in our database. @@ -142,51 +145,51 @@ def auth_receive(request): We apologize for the inconvenience. """ % (data['e'][0], data['u'][0]), content_type='text/plain') - if hasattr(settings, 'PGAUTH_CREATEUSER_CALLBACK'): - res = getattr(settings, 'PGAUTH_CREATEUSER_CALLBACK')( - data['u'][0], - data['e'][0], - ['f'][0], - data['l'][0], - ) - # If anything is returned, we'll return that as our result. - # If None is returned, it means go ahead and create the user. - if res: - return res - - user = User(username=data['u'][0], - first_name=data['f'][0], - last_name=data['l'][0], - email=data['e'][0], - password='setbypluginnotasha1', - ) - user.save() - - # Ok, we have a proper user record. Now tell django that - # we're authenticated so it persists it in the session. Before - # we do that, we have to annotate it with the backend information. - user.backend = "%s.%s" % (AuthBackend.__module__, AuthBackend.__name__) - django_login(request, user) - - # Finally, check of we have a data package that tells us where to - # redirect the user. - if 'd' in data: - (ivs, datas) = data['d'][0].split('$') - decryptor = AES.new(SHA.new(settings.SECRET_KEY).digest()[:16], - AES.MODE_CBC, - base64.b64decode(ivs, "-_")) - s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(' ') - try: - rdata = urlparse.parse_qs(s, strict_parsing=True) - except ValueError: - return HttpResponse("Invalid encrypted data received.", status=400) - if 'r' in rdata: - # Redirect address - return HttpResponseRedirect(rdata['r'][0]) - # No redirect specified, see if we have it in our settings - if hasattr(settings, 'PGAUTH_REDIRECT_SUCCESS'): - return HttpResponseRedirect(settings.PGAUTH_REDIRECT_SUCCESS) - return HttpResponse("Authentication successful, but don't know where to redirect!", status=500) + if hasattr(settings, 'PGAUTH_CREATEUSER_CALLBACK'): + res = getattr(settings, 'PGAUTH_CREATEUSER_CALLBACK')( + data['u'][0], + data['e'][0], + ['f'][0], + data['l'][0], + ) + # If anything is returned, we'll return that as our result. + # If None is returned, it means go ahead and create the user. + if res: + return res + + user = User(username=data['u'][0], + first_name=data['f'][0], + last_name=data['l'][0], + email=data['e'][0], + password='setbypluginnotasha1', + ) + user.save() + + # Ok, we have a proper user record. Now tell django that + # we're authenticated so it persists it in the session. Before + # we do that, we have to annotate it with the backend information. + user.backend = "%s.%s" % (AuthBackend.__module__, AuthBackend.__name__) + django_login(request, user) + + # Finally, check of we have a data package that tells us where to + # redirect the user. + if 'd' in data: + (ivs, datas) = data['d'][0].split('$') + decryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], + AES.MODE_CBC, + base64.b64decode(ivs, b"-_")) + s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(b' ').decode('utf8') + try: + rdata = parse_qs(s, strict_parsing=True) + except ValueError: + return HttpResponse("Invalid encrypted data received.", status=400) + if 'r' in rdata: + # Redirect address + return HttpResponseRedirect(rdata['r'][0]) + # No redirect specified, see if we have it in our settings + if hasattr(settings, 'PGAUTH_REDIRECT_SUCCESS'): + return HttpResponseRedirect(settings.PGAUTH_REDIRECT_SUCCESS) + return HttpResponse("Authentication successful, but don't know where to redirect!", status=500) # Perform a search in the central system. Note that the results are returned as an @@ -197,29 +200,30 @@ def auth_receive(request): # Unlike the authentication, searching does not involve the browser - we just make # a direct http call. def user_search(searchterm=None, userid=None): - # If upsteam isn't responding quickly, it's not going to respond at all, and - # 10 seconds is already quite long. - socket.setdefaulttimeout(10) - if userid: - q = {'u': userid} - else: - q = {'s': searchterm} - - u = urllib.urlopen('%ssearch/?%s' % ( - settings.PGAUTH_REDIRECT, - urllib.urlencode(q), - )) - (ivs, datas) = u.read().split('&') - u.close() - - # Decryption time - decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY), - AES.MODE_CBC, - base64.b64decode(ivs, "-_")) - s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(' ') - j = json.loads(s) - - return j + # If upsteam isn't responding quickly, it's not going to respond at all, and + # 10 seconds is already quite long. + socket.setdefaulttimeout(10) + if userid: + q = {'u': userid} + else: + q = {'s': searchterm} + + u = urllib.request.urlopen('%ssearch/?%s' % ( + settings.PGAUTH_REDIRECT, + urlencode(q), + )) + (ivs, datas) = u.read().split('&') + u.close() + + # Decryption time + decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY), + AES.MODE_CBC, + base64.b64decode(ivs, "-_")) + s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(' ') + j = json.loads(s) + + return j + # Import a user into the local authentication system. Will initially # make a search for it, and if anything other than one entry is returned @@ -230,18 +234,18 @@ def user_search(searchterm=None, userid=None): # The call to this function should normally be wrapped in a transaction, # and this function itself will make no attempt to do anything about that. def user_import(uid): - u = user_search(userid=uid) - if len(u) != 1: - raise Exception("Internal error, duplicate or no user found") + u = user_search(userid=uid) + if len(u) != 1: + raise Exception("Internal error, duplicate or no user found") - u = u[0] + u = u[0] - if User.objects.filter(username=u['u']).exists(): - raise Exception("User already exists") + if User.objects.filter(username=u['u']).exists(): + raise Exception("User already exists") - User(username=u['u'], - first_name=u['f'], - last_name=u['l'], - email=u['e'], - password='setbypluginnotsha1', - ).save() + User(username=u['u'], + first_name=u['f'], + last_name=u['l'], + email=u['e'], + password='setbypluginnotsha1', + ).save() diff --git a/django/archives/mailarchives/api.py b/django/archives/mailarchives/api.py index 85eae8e..b53e08e 100644 --- a/django/archives/mailarchives/api.py +++ b/django/archives/mailarchives/api.py @@ -2,134 +2,139 @@ from django.shortcuts import get_object_or_404 from django.conf import settings -from views import cache -from models import Message, List, ApiClient, ThreadSubscription +from .views import cache +from .models import Message, List, ApiClient, ThreadSubscription import json @cache(hours=4) def listinfo(request): - if not settings.PUBLIC_ARCHIVES: - return HttpResponseForbidden('No API access on private archives for now') + if not settings.PUBLIC_ARCHIVES: + return HttpResponseForbidden('No API access on private archives for now') - if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: - return HttpResponseForbidden('Invalid host') + if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: + return HttpResponseForbidden('Invalid host') - resp = HttpResponse(content_type='application/json') - json.dump([{ - 'name': l.listname, - 'shortdesc': l.shortdesc, - 'description': l.description, - 'active': l.active, - 'group': l.group.groupname, - } for l in List.objects.select_related('group').all()], resp) + resp = HttpResponse(content_type='application/json') + json.dump([{ + 'name': l.listname, + 'shortdesc': l.shortdesc, + 'description': l.description, + 'active': l.active, + 'group': l.group.groupname, + } for l in List.objects.select_related('group').all()], resp) + + return resp - return resp @cache(hours=4) def latest(request, listname): - if not settings.PUBLIC_ARCHIVES: - return HttpResponseForbidden('No API access on private archives for now') - - if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: - return HttpResponseForbidden('Invalid host') - - # Return the latest messages on this list. - # If is not specified, return 50. Max value for is 100. - if 'n' in request.GET: - try: - limit = int(request.GET['n']) - except: - limit = 0 - else: - limit = 50 - if limit <= 0 or limit > 100: - limit = 50 - - extrawhere=[] - extraparams=[] - - # Return only messages that have attachments? - if 'a' in request.GET: - if request.GET['a'] == '1': - extrawhere.append("has_attachment") - - # Restrict by full text search - if 's' in request.GET and request.GET['s']: - extrawhere.append("fti @@ plainto_tsquery('public.pg', %s)") - extraparams.append(request.GET['s']) - - if listname != '*': - list = get_object_or_404(List, listname=listname) - extrawhere.append("threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % list.listid) - else: - list = None - extrawhere='' - - mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().extra(where=extrawhere, params=extraparams).order_by('-date')[:limit] - allyearmonths = set([(m.date.year, m.date.month) for m in mlist]) - - resp = HttpResponse(content_type='application/json') - json.dump([ - {'msgid': m.messageid, - 'date': m.date.isoformat(), - 'from': m.mailfrom, - 'subj': m.subject,} - for m in mlist], resp) - - # Make sure this expires from the varnish cache when new entries show - # up in this month. - # XXX: need to deal with the global view, but for now API callers come in directly - if list: - resp['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (list.listid, year, month) for year, month in allyearmonths])) - return resp + if not settings.PUBLIC_ARCHIVES: + return HttpResponseForbidden('No API access on private archives for now') + + if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: + return HttpResponseForbidden('Invalid host') + + # Return the latest messages on this list. + # If is not specified, return 50. Max value for is 100. + if 'n' in request.GET: + try: + limit = int(request.GET['n']) + except: + limit = 0 + else: + limit = 50 + if limit <= 0 or limit > 100: + limit = 50 + + extrawhere = [] + extraparams = [] + + # Return only messages that have attachments? + if 'a' in request.GET: + if request.GET['a'] == '1': + extrawhere.append("has_attachment") + + # Restrict by full text search + if 's' in request.GET and request.GET['s']: + extrawhere.append("fti @@ plainto_tsquery('public.pg', %s)") + extraparams.append(request.GET['s']) + + if listname != '*': + list = get_object_or_404(List, listname=listname) + extrawhere.append("threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % list.listid) + else: + list = None + extrawhere = '' + + mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().extra(where=extrawhere, params=extraparams).order_by('-date')[:limit] + allyearmonths = set([(m.date.year, m.date.month) for m in mlist]) + + resp = HttpResponse(content_type='application/json') + json.dump([ + { + 'msgid': m.messageid, + 'date': m.date.isoformat(), + 'from': m.mailfrom, + 'subj': m.subject, + } + for m in mlist], resp) + + # Make sure this expires from the varnish cache when new entries show + # up in this month. + # XXX: need to deal with the global view, but for now API callers come in directly + if list: + resp['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (list.listid, year, month) for year, month in allyearmonths])) + return resp @cache(hours=4) def thread(request, msgid): - if not settings.PUBLIC_ARCHIVES: - return HttpResponseForbidden('No API access on private archives for now') - - if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: - return HttpResponseForbidden('Invalid host') - - # Return metadata about a single thread. A list of all the emails - # that are in the thread with their basic attributes are included. - msg = get_object_or_404(Message, messageid=msgid) - mlist = Message.objects.defer('bodytxt', 'cc', 'to').filter(threadid=msg.threadid) - - resp = HttpResponse(content_type='application/json') - json.dump([ - {'msgid': m.messageid, - 'date': m.date.isoformat(), - 'from': m.mailfrom, - 'subj': m.subject, - 'atts': [{'id': a.id, 'name': a.filename} for a in m.attachment_set.all()], - } - for m in mlist], resp) - resp['X-pgthread'] = m.threadid - return resp + if not settings.PUBLIC_ARCHIVES: + return HttpResponseForbidden('No API access on private archives for now') + + if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: + return HttpResponseForbidden('Invalid host') + + # Return metadata about a single thread. A list of all the emails + # that are in the thread with their basic attributes are included. + msg = get_object_or_404(Message, messageid=msgid) + mlist = Message.objects.defer('bodytxt', 'cc', 'to').filter(threadid=msg.threadid) + + resp = HttpResponse(content_type='application/json') + json.dump([ + { + 'msgid': m.messageid, + 'date': m.date.isoformat(), + 'from': m.mailfrom, + 'subj': m.subject, + 'atts': [{'id': a.id, 'name': a.filename} for a in m.attachment_set.all()], + } + for m in mlist], resp) + resp['X-pgthread'] = m.threadid + return resp + def thread_subscribe(request, msgid): - if not settings.PUBLIC_ARCHIVES: - return HttpResponseForbidden('No API access on private archives for now') + if not settings.PUBLIC_ARCHIVES: + return HttpResponseForbidden('No API access on private archives for now') - if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: - return HttpResponseForbidden('Invalid host') + if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS: + return HttpResponseForbidden('Invalid host') - if 'HTTP_X_APIKEY' not in request.META: - return HttpResponseForbidden('No API key') + if 'HTTP_X_APIKEY' not in request.META: + return HttpResponseForbidden('No API key') - if request.method != 'PUT': - return HttpResponseForbidden('Invalid HTTP verb') + if request.method != 'PUT': + return HttpResponseForbidden('Invalid HTTP verb') - apiclient = get_object_or_404(ApiClient, apikey=request.META['HTTP_X_APIKEY']) - msg = get_object_or_404(Message, messageid=msgid) + apiclient = get_object_or_404(ApiClient, apikey=request.META['HTTP_X_APIKEY']) + msg = get_object_or_404(Message, messageid=msgid) - (obj, created) = ThreadSubscription.objects.get_or_create(apiclient=apiclient, - threadid=msg.threadid) - if created: - return HttpResponse(status=201) - else: - return HttpResponse(status=200) + (obj, created) = ThreadSubscription.objects.get_or_create(apiclient=apiclient, + threadid=msg.threadid) + if created: + return HttpResponse(status=201) + else: + return HttpResponse(status=200) diff --git a/django/archives/mailarchives/models.py b/django/archives/mailarchives/models.py index 6270974..88137f8 100644 --- a/django/archives/mailarchives/models.py +++ b/django/archives/mailarchives/models.py @@ -4,128 +4,134 @@ # We're intentionally putting the prefix text in the array here, since # we might need that flexibility in the future. hide_reasons = [ - None, # placeholder for 0 - 'This message has been hidden because a virus was found in the message.', # 1 - 'This message has been hidden because the message violated policies.', # 2 - 'This message has been hidden because for privacy reasons.', # 3 - 'This message was corrupt', # 4 - ] + None, # placeholder for 0 + 'This message has been hidden because a virus was found in the message.', # 1 + 'This message has been hidden because the message violated policies.', # 2 + 'This message has been hidden because for privacy reasons.', # 3 + 'This message was corrupt', # 4 +] class Message(models.Model): - threadid = models.IntegerField(null=False, blank=False) - mailfrom = models.TextField(null=False, db_column='_from') - to = models.TextField(null=False, db_column='_to') - cc = models.TextField(null=False) - subject = models.TextField(null=False) - date = models.DateTimeField(null=False) - messageid = models.TextField(null=False) - bodytxt = models.TextField(null=False) - # rawtxt is a bytea field, which django doesn't support (easily) - parentid = models.IntegerField(null=False, blank=False) - has_attachment = models.BooleanField(null=False, default=False) - hiddenstatus = models.IntegerField(null=True) - # fti is a tsvector field, which django doesn't support (easily) - - class Meta: - db_table = 'messages' - - @property - def printdate(self): - return self.date.strftime("%Y-%m-%d %H:%M:%S") - - @property - def shortdate(self): - return self.date.strftime("%Y%m%d%H%M") - - # We explicitly cache the attachments here, so we can use them - # multiple times from templates without generating multiple queries - # to the database. - _attachments = None - @property - def attachments(self): - if not self._attachments: - self._attachments = self.attachment_set.extra(select={'len': 'length(attachment)'}).all() - return self._attachments - - @property - def hiddenreason(self): - if not self.hiddenstatus: return None - try: - return hide_reasons[self.hiddenstatus] - except: - # Weird value - return 'This message has been hidden.' + threadid = models.IntegerField(null=False, blank=False) + mailfrom = models.TextField(null=False, db_column='_from') + to = models.TextField(null=False, db_column='_to') + cc = models.TextField(null=False) + subject = models.TextField(null=False) + date = models.DateTimeField(null=False) + messageid = models.TextField(null=False) + bodytxt = models.TextField(null=False) + # rawtxt is a bytea field, which django doesn't support (easily) + parentid = models.IntegerField(null=False, blank=False) + has_attachment = models.BooleanField(null=False, default=False) + hiddenstatus = models.IntegerField(null=True) + # fti is a tsvector field, which django doesn't support (easily) + + class Meta: + db_table = 'messages' + + @property + def printdate(self): + return self.date.strftime("%Y-%m-%d %H:%M:%S") + + @property + def shortdate(self): + return self.date.strftime("%Y%m%d%H%M") + + # We explicitly cache the attachments here, so we can use them + # multiple times from templates without generating multiple queries + # to the database. + _attachments = None + + @property + def attachments(self): + if not self._attachments: + self._attachments = self.attachment_set.extra(select={'len': 'length(attachment)'}).all() + return self._attachments + + @property + def hiddenreason(self): + if not self.hiddenstatus: + return None + try: + return hide_reasons[self.hiddenstatus] + except: + # Weird value + return 'This message has been hidden.' + class ListGroup(models.Model): - groupid = models.IntegerField(null=False, primary_key=True) - groupname = models.CharField(max_length=200, null=False, blank=False) - sortkey = models.IntegerField(null=False) + groupid = models.IntegerField(null=False, primary_key=True) + groupname = models.CharField(max_length=200, null=False, blank=False) + sortkey = models.IntegerField(null=False) + + class Meta: + db_table = 'listgroups' - class Meta: - db_table = 'listgroups' class List(models.Model): - listid = models.IntegerField(null=False, primary_key=True) - listname = models.CharField(max_length=200, null=False, blank=False, unique=True) - shortdesc = models.TextField(null=False, blank=False) - description = models.TextField(null=False, blank=False) - active = models.BooleanField(null=False, blank=False) - group = models.ForeignKey(ListGroup, db_column='groupid') - subscriber_access = models.BooleanField(null=False, blank=False, default=False, help_text="Subscribers can access contents (default is admins only)") + listid = models.IntegerField(null=False, primary_key=True) + listname = models.CharField(max_length=200, null=False, blank=False, unique=True) + shortdesc = models.TextField(null=False, blank=False) + description = models.TextField(null=False, blank=False) + active = models.BooleanField(null=False, blank=False) + group = models.ForeignKey(ListGroup, db_column='groupid') + subscriber_access = models.BooleanField(null=False, blank=False, default=False, help_text="Subscribers can access contents (default is admins only)") + @property + def maybe_shortdesc(self): + if self.shortdesc: + return self.shortdesc + return self.listname - @property - def maybe_shortdesc(self): - if self.shortdesc: - return self.shortdesc - return self.listname + class Meta: + db_table = 'lists' - class Meta: - db_table = 'lists' class Attachment(models.Model): - message = models.ForeignKey(Message, null=False, blank=False, db_column='message') - filename = models.CharField(max_length=1000, null=False, blank=False) - contenttype = models.CharField(max_length=1000, null=False, blank=False) - # attachment = bytea, not supported by django at this point + message = models.ForeignKey(Message, null=False, blank=False, db_column='message') + filename = models.CharField(max_length=1000, null=False, blank=False) + contenttype = models.CharField(max_length=1000, null=False, blank=False) + # attachment = bytea, not supported by django at this point - class Meta: - db_table = 'attachments' - # Predictable same-as-insert order - ordering = ('id',) + class Meta: + db_table = 'attachments' + # Predictable same-as-insert order + ordering = ('id',) - def inlineable(self): - # Return True if this image should be inlined - if self.contenttype in ('image/png', 'image/gif', 'image/jpg', 'image/jpeg'): - # Note! len needs to be set with extra(select=) - if self.len < 75000: - return True - return False + def inlineable(self): + # Return True if this image should be inlined + if self.contenttype in ('image/png', 'image/gif', 'image/jpg', 'image/jpeg'): + # Note! len needs to be set with extra(select=) + if self.len < 75000: + return True + return False class ListSubscriber(models.Model): - # Only used when public access is not allowed. - # We set the username of the community account instead of a - # foreign key, because the user might not exist. - list = models.ForeignKey(List, null=False, blank=False) - username = models.CharField(max_length=30, null=False, blank=False) + # Only used when public access is not allowed. + # We set the username of the community account instead of a + # foreign key, because the user might not exist. + list = models.ForeignKey(List, null=False, blank=False) + username = models.CharField(max_length=30, null=False, blank=False) + + class Meta: + unique_together = (('list', 'username'), ) + db_table = 'listsubscribers' - class Meta: - unique_together = (('list', 'username'), ) - db_table = 'listsubscribers' class ApiClient(models.Model): - apikey = models.CharField(max_length=100, null=False, blank=False) - postback = models.URLField(max_length=500, null=False, blank=False) + apikey = models.CharField(max_length=100, null=False, blank=False) + postback = models.URLField(max_length=500, null=False, blank=False) + + class Meta: + db_table = 'apiclients' - class Meta: - db_table = 'apiclients' class ThreadSubscription(models.Model): - apiclient = models.ForeignKey(ApiClient, null=False, blank=False) - threadid = models.IntegerField(null=False, blank=False) + apiclient = models.ForeignKey(ApiClient, null=False, blank=False) + threadid = models.IntegerField(null=False, blank=False) - class Meta: - db_table = 'threadsubscriptions' - unique_together = (('apiclient', 'threadid'),) + class Meta: + db_table = 'threadsubscriptions' + unique_together = (('apiclient', 'threadid'),) diff --git a/django/archives/mailarchives/redirecthandler.py b/django/archives/mailarchives/redirecthandler.py index 030b43f..fc6a575 100644 --- a/django/archives/mailarchives/redirecthandler.py +++ b/django/archives/mailarchives/redirecthandler.py @@ -1,10 +1,12 @@ from django import shortcuts + class ERedirect(Exception): - def __init__(self, url): - self.url = url + def __init__(self, url): + self.url = url + class RedirectMiddleware(object): - def process_exception(self, request, exception): - if isinstance(exception, ERedirect): - return shortcuts.redirect(exception.url) + def process_exception(self, request, exception): + if isinstance(exception, ERedirect): + return shortcuts.redirect(exception.url) diff --git a/django/archives/mailarchives/templatetags/pgfilters.py b/django/archives/mailarchives/templatetags/pgfilters.py index 4f85e80..83bb477 100644 --- a/django/archives/mailarchives/templatetags/pgfilters.py +++ b/django/archives/mailarchives/templatetags/pgfilters.py @@ -7,13 +7,16 @@ register = template.Library() + def _rewrite_email(value): - return value.replace('@', '(at)').replace('.','(dot)') + return value.replace('@', '(at)').replace('.', '(dot)') + @register.filter(name='hidemail') @stringfilter def hidemail(value): - return _rewrite_email(value) + return _rewrite_email(value) + # A regular expression and replacement function to mangle email addresses. # @@ -26,26 +29,31 @@ def hidemail(value): # Those are not email addresses, so ignore them. The links won't work if they # are mangled. _re_mail = re.compile('(/m(essage-id)?/)?[^()<>@,;:\/\s"\'&|]+@[^()<>@,;:\/\s"\'&|]+') + + def _rewrite_email_match(match): - if match.group(1): - return match.group(0) # was preceded by /message-id/ - else: - return _rewrite_email(match.group(0)) + if match.group(1): + return match.group(0) # was preceded by /message-id/ + else: + return _rewrite_email(match.group(0)) + @register.filter(name='hideallemail') @stringfilter def hideallemail(value): - return _re_mail.sub(lambda x: _rewrite_email_match(x), value) + return _re_mail.sub(lambda x: _rewrite_email_match(x), value) + @register.filter(name='nameonly') @stringfilter def nameonly(value): - (name, email) = parseaddr(value) - if name: - return name - return email.split('@')[0] + (name, email) = parseaddr(value) + if name: + return name + return email.split('@')[0] + @register.filter(name='md5') @stringfilter def md5(value): - return hashlib.md5(value).hexdigest() + return hashlib.md5(value.encode('utf8')).hexdigest() diff --git a/django/archives/mailarchives/views.py b/django/archives/mailarchives/views.py index dc5c177..dfb4874 100644 --- a/django/archives/mailarchives/views.py +++ b/django/archives/mailarchives/views.py @@ -10,57 +10,60 @@ from django.conf import settings import copy -import urllib import re import os import base64 from datetime import datetime, timedelta, date import calendar import email.parser -from StringIO import StringIO +import email.policy +from io import BytesIO import json -from redirecthandler import ERedirect +from .redirecthandler import ERedirect + +from .models import * -from models import * # Ensure the user is logged in (if it's not public lists) def ensure_logged_in(request): - if settings.PUBLIC_ARCHIVES: - return - if hasattr(request, 'user') and request.user.is_authenticated(): - return - raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) + if settings.PUBLIC_ARCHIVES: + return + if hasattr(request, 'user') and request.user.is_authenticated(): + return + raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) + # Ensure the user has permissions to access a list. If not, raise # a permissions exception. def ensure_list_permissions(request, l): - if settings.PUBLIC_ARCHIVES: - return - if hasattr(request, 'user') and request.user.is_authenticated(): - if request.user.is_superuser: - return - if l.subscriber_access and ListSubscriber.objects.filter(list=l, username=request.user.username).exists(): - return - # Logged in but no access - raise PermissionDenied("Access denied.") - - # Redirect to a login page - raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) + if settings.PUBLIC_ARCHIVES: + return + if hasattr(request, 'user') and request.user.is_authenticated(): + if request.user.is_superuser: + return + if l.subscriber_access and ListSubscriber.objects.filter(list=l, username=request.user.username).exists(): + return + # Logged in but no access + raise PermissionDenied("Access denied.") + + # Redirect to a login page + raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) + # Ensure the user has permissions to access a message. In order to view # a message, the user must have permissions on *all* lists the thread # appears on. def ensure_message_permissions(request, msgid): - if settings.PUBLIC_ARCHIVES: - return - if hasattr(request, 'user') and request.user.is_authenticated(): - if request.user.is_superuser: - return - - curs = connection.cursor() - curs.execute("""SELECT EXISTS ( + if settings.PUBLIC_ARCHIVES: + return + if hasattr(request, 'user') and request.user.is_authenticated(): + if request.user.is_superuser: + return + + curs = connection.cursor() + curs.execute("""SELECT EXISTS ( SELECT 1 FROM list_threads INNER JOIN messages ON messages.threadid=list_threads.threadid WHERE messages.messageid=%(msgid)s @@ -70,302 +73,317 @@ def ensure_message_permissions(request, msgid): AND listsubscribers.username=%(username)s ) )""", { - 'msgid': msgid, - 'username': request.user.username, - }) - if not curs.fetchone()[0]: - # This thread is not on any list that the user does not have permissions on. - return + 'msgid': msgid, + 'username': request.user.username, + }) + if not curs.fetchone()[0]: + # This thread is not on any list that the user does not have permissions on. + return - # Logged in but no access - raise PermissionDenied("Access denied.") + # Logged in but no access + raise PermissionDenied("Access denied.") + + # Redirect to a login page + raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) - # Redirect to a login page - raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) # Decorator to set cache age def cache(days=0, hours=0, minutes=0, seconds=0): - "Set the server to cache object a specified time. td must be a timedelta object" - def _cache(fn): - def __cache(request, *_args, **_kwargs): - resp = fn(request, *_args, **_kwargs) - if settings.PUBLIC_ARCHIVES: - # Only set cache headers on public archives - td = timedelta(hours=hours, minutes=minutes, seconds=seconds) - resp['Cache-Control'] = 's-maxage=%s' % (td.days*3600*24 + td.seconds) - return resp - return __cache - return _cache + "Set the server to cache object a specified time. td must be a timedelta object" + def _cache(fn): + def __cache(request, *_args, **_kwargs): + resp = fn(request, *_args, **_kwargs) + if settings.PUBLIC_ARCHIVES: + # Only set cache headers on public archives + td = timedelta(hours=hours, minutes=minutes, seconds=seconds) + resp['Cache-Control'] = 's-maxage=%s' % (td.days * 3600 * 24 + td.seconds) + return resp + return __cache + return _cache + def nocache(fn): - def _nocache(request, *_args, **_kwargs): - resp = fn(request, *_args, **_kwargs) - if settings.PUBLIC_ARCHIVES: - # Only set cache headers on public archives - resp['Cache-Control'] = 's-maxage=0' - return resp - return _nocache + def _nocache(request, *_args, **_kwargs): + resp = fn(request, *_args, **_kwargs) + if settings.PUBLIC_ARCHIVES: + # Only set cache headers on public archives + resp['Cache-Control'] = 's-maxage=0' + return resp + return _nocache + # Decorator to require http auth def antispam_auth(fn): - def _antispam_auth(request, *_args, **_kwargs): - if not settings.PUBLIC_ARCHIVES: - return fn(request, *_args, **_kwargs) - - if 'HTTP_AUTHORIZATION' in request.META: - auth = request.META['HTTP_AUTHORIZATION'].split() - if len(auth) != 2: - return HttpResponseForbidden("Invalid authentication") - if auth[0].lower() == "basic": - user, pwd = base64.b64decode(auth[1]).split(':') - if user == 'archives' and pwd == 'antispam': - # Actually run the function if auth is correct - resp = fn(request, *_args, **_kwargs) - return resp - # Require authentication - response = HttpResponse() - response.status_code = 401 - response['WWW-Authenticate'] = 'Basic realm="Please authenticate with user archives and password antispam"' - return response - - return _antispam_auth - + def _antispam_auth(request, *_args, **_kwargs): + if not settings.PUBLIC_ARCHIVES: + return fn(request, *_args, **_kwargs) + + if 'HTTP_AUTHORIZATION' in request.META: + auth = request.META['HTTP_AUTHORIZATION'].split() + if len(auth) != 2: + return HttpResponseForbidden("Invalid authentication") + if auth[0].lower() == "basic": + user, pwd = base64.b64decode(auth[1]).decode('utf8', errors='ignore').split(':') + if user == 'archives' and pwd == 'antispam': + # Actually run the function if auth is correct + resp = fn(request, *_args, **_kwargs) + return resp + # Require authentication + response = HttpResponse() + response.status_code = 401 + response['WWW-Authenticate'] = 'Basic realm="Please authenticate with user archives and password antispam"' + return response + + return _antispam_auth def get_all_groups_and_lists(request, listid=None): - # Django doesn't (yet) support traversing the reverse relationship, - # so we'll get all the lists and rebuild it backwards. - if settings.PUBLIC_ARCHIVES or request.user.is_superuser: - lists = List.objects.select_related('group').all().order_by('listname') - else: - lists = List.objects.select_related('group').filter(subscriber_access=True, listsubscriber__username=request.user.username).order_by('listname') - listgroupid = None - groups = {} - for l in lists: - if l.listid == listid: - listgroupid = l.group.groupid - - if l.group.groupid in groups: - groups[l.group.groupid]['lists'].append(l) - else: - groups[l.group.groupid] = { - 'groupid': l.group.groupid, - 'groupname': l.group.groupname, - 'sortkey': l.group.sortkey, - 'lists': [l,], - 'homelink': 'list/group/%s' % l.group.groupid, - } - - return (sorted(groups.values(), key=lambda g: g['sortkey']), listgroupid) + # Django doesn't (yet) support traversing the reverse relationship, + # so we'll get all the lists and rebuild it backwards. + if settings.PUBLIC_ARCHIVES or request.user.is_superuser: + lists = List.objects.select_related('group').all().order_by('listname') + else: + lists = List.objects.select_related('group').filter(subscriber_access=True, listsubscriber__username=request.user.username).order_by('listname') + listgroupid = None + groups = {} + for l in lists: + if l.listid == listid: + listgroupid = l.group.groupid + + if l.group.groupid in groups: + groups[l.group.groupid]['lists'].append(l) + else: + groups[l.group.groupid] = { + 'groupid': l.group.groupid, + 'groupname': l.group.groupname, + 'sortkey': l.group.sortkey, + 'lists': [l, ], + 'homelink': 'list/group/%s' % l.group.groupid, + } + + return (sorted(list(groups.values()), key=lambda g: g['sortkey']), listgroupid) class NavContext(object): - def __init__(self, request, listid=None, listname=None, all_groups=None, expand_groupid=None): - self.request = request - self.ctx = {} - - if all_groups: - groups = copy.deepcopy(all_groups) - if expand_groupid: - listgroupid = int(expand_groupid) - else: - (groups, listgroupid) = get_all_groups_and_lists(request, listid) - - for g in groups: - # On the root page, remove *all* entries - # On other lists, remove the entries in all groups other than our - # own. - if (not listid and not expand_groupid) or listgroupid != g['groupid']: - # Root page, so remove *all* entries - g['lists'] = [] - - self.ctx.update({'listgroups': groups}) - if listname: - self.ctx.update({'searchform_listname': listname}) + def __init__(self, request, listid=None, listname=None, all_groups=None, expand_groupid=None): + self.request = request + self.ctx = {} + + if all_groups: + groups = copy.deepcopy(all_groups) + if expand_groupid: + listgroupid = int(expand_groupid) + else: + (groups, listgroupid) = get_all_groups_and_lists(request, listid) + + for g in groups: + # On the root page, remove *all* entries + # On other lists, remove the entries in all groups other than our + # own. + if (not listid and not expand_groupid) or listgroupid != g['groupid']: + # Root page, so remove *all* entries + g['lists'] = [] + + self.ctx.update({'listgroups': groups}) + if listname: + self.ctx.update({'searchform_listname': listname}) + def render_nav(navcontext, template, ctx): - ctx.update(navcontext.ctx) - return render(navcontext.request, template, ctx) + ctx.update(navcontext.ctx) + return render(navcontext.request, template, ctx) + @cache(hours=4) def index(request): - ensure_logged_in(request) + ensure_logged_in(request) - (groups, listgroupid) = get_all_groups_and_lists(request) - return render_nav(NavContext(request, all_groups=groups), 'index.html', { - 'groups': [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups], - }) + (groups, listgroupid) = get_all_groups_and_lists(request) + return render_nav(NavContext(request, all_groups=groups), 'index.html', { + 'groups': [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups], + }) @cache(hours=8) def groupindex(request, groupid): - (groups, listgroupid) = get_all_groups_and_lists(request) - mygroups = [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups if g['groupid']==int(groupid)] - if len(mygroups) == 0: - raise Http404('List group does not exist') + (groups, listgroupid) = get_all_groups_and_lists(request) + mygroups = [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups if g['groupid'] == int(groupid)] + if len(mygroups) == 0: + raise Http404('List group does not exist') + + return render_nav(NavContext(request, all_groups=groups, expand_groupid=groupid), 'index.html', { + 'groups': mygroups, + }) - return render_nav(NavContext(request, all_groups=groups, expand_groupid=groupid), 'index.html', { - 'groups': mygroups, - }) @cache(hours=8) def monthlist(request, listname): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) + + curs = connection.cursor() + curs.execute("SELECT year, month FROM list_months WHERE listid=%(listid)s ORDER BY year DESC, month DESC", {'listid': l.listid}) + months = [{'year': r[0], 'month': r[1], 'date': datetime(r[0], r[1], 1)} for r in curs.fetchall()] - curs = connection.cursor() - curs.execute("SELECT year, month FROM list_months WHERE listid=%(listid)s ORDER BY year DESC, month DESC", {'listid': l.listid}) - months=[{'year':r[0],'month':r[1], 'date':datetime(r[0],r[1],1)} for r in curs.fetchall()] + return render_nav(NavContext(request, l.listid, l.listname), 'monthlist.html', { + 'list': l, + 'months': months, + }) - return render_nav(NavContext(request, l.listid, l.listname), 'monthlist.html', { - 'list': l, - 'months': months, - }) def get_monthday_info(mlist, l, d): - allmonths = set([m.date.month for m in mlist]) - monthdate = None - daysinmonth = None - if len(allmonths) == 1: - # All hits are from one month, so generate month links - monthdate = mlist[0].date - elif len(allmonths) == 0: - # No hits at all, so generate month links from the specified date - monthdate = d - - if monthdate: - curs = connection.cursor() - curs.execute("SELECT DISTINCT extract(day FROM date) FROM messages WHERE date >= %(startdate)s AND date < %(enddate)s AND threadid IN (SELECT threadid FROM list_threads WHERE listid=%(listid)s) ORDER BY 1", { - 'startdate': datetime(year=monthdate.year, month=monthdate.month, day=1), - 'enddate': monthdate + timedelta(days=calendar.monthrange(monthdate.year, monthdate.month)[1]), - 'listid': l.listid, - }) - daysinmonth = [int(r[0]) for r in curs.fetchall()] - - yearmonth = None - if monthdate: - yearmonth = "%s%02d" % (monthdate.year, monthdate.month) - return (yearmonth, daysinmonth) + allmonths = set([m.date.month for m in mlist]) + monthdate = None + daysinmonth = None + if len(allmonths) == 1: + # All hits are from one month, so generate month links + monthdate = mlist[0].date + elif len(allmonths) == 0: + # No hits at all, so generate month links from the specified date + monthdate = d + + if monthdate: + curs = connection.cursor() + curs.execute("SELECT DISTINCT extract(day FROM date) FROM messages WHERE date >= %(startdate)s AND date < %(enddate)s AND threadid IN (SELECT threadid FROM list_threads WHERE listid=%(listid)s) ORDER BY 1", { + 'startdate': datetime(year=monthdate.year, month=monthdate.month, day=1), + 'enddate': monthdate + timedelta(days=calendar.monthrange(monthdate.year, monthdate.month)[1]), + 'listid': l.listid, + }) + daysinmonth = [int(r[0]) for r in curs.fetchall()] + + yearmonth = None + if monthdate: + yearmonth = "%s%02d" % (monthdate.year, monthdate.month) + return (yearmonth, daysinmonth) def _render_datelist(request, l, d, datefilter, title, queryproc): - # NOTE! Basic permissions checks must be done before calling this function! - - if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser: - mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra( - where=["threadid IN (SELECT threadid FROM list_threads t WHERE listid=%s AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%s)))"], - params=(l.listid, request.user.username), - ) - else: - # Else we return everything - mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]) - mlist = queryproc(mlist) - - allyearmonths = set([(m.date.year, m.date.month) for m in mlist]) - (yearmonth, daysinmonth) = get_monthday_info(mlist, l, d) - - r = render_nav(NavContext(request, l.listid, l.listname), 'datelist.html', { - 'list': l, - 'messages': mlist, - 'title': title, - 'daysinmonth': daysinmonth, - 'yearmonth': yearmonth, - }) - r['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (l.listid, year, month) for year,month in allyearmonths])) - return r + # NOTE! Basic permissions checks must be done before calling this function! + + if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser: + mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra( + where=["threadid IN (SELECT threadid FROM list_threads t WHERE listid=%s AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%s)))"], + params=(l.listid, request.user.username), + ) + else: + # Else we return everything + mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]) + mlist = queryproc(mlist) + + allyearmonths = set([(m.date.year, m.date.month) for m in mlist]) + (yearmonth, daysinmonth) = get_monthday_info(mlist, l, d) + + r = render_nav(NavContext(request, l.listid, l.listname), 'datelist.html', { + 'list': l, + 'messages': mlist, + 'title': title, + 'daysinmonth': daysinmonth, + 'yearmonth': yearmonth, + }) + r['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (l.listid, year, month) for year, month in allyearmonths])) + return r + def render_datelist_from(request, l, d, title, to=None): - # NOTE! Basic permissions checks must be done before calling this function! - datefilter = Q(date__gte=d) - if to: - datefilter.add(Q(date__lt=to), Q.AND) + # NOTE! Basic permissions checks must be done before calling this function! + datefilter = Q(date__gte=d) + if to: + datefilter.add(Q(date__lt=to), Q.AND) + + return _render_datelist(request, l, d, datefilter, title, + lambda x: list(x.order_by('date')[:200])) - return _render_datelist(request, l, d, datefilter, title, - lambda x: list(x.order_by('date')[:200])) def render_datelist_to(request, l, d, title): - # NOTE! Basic permissions checks must be done before calling this function! + # NOTE! Basic permissions checks must be done before calling this function! - # Need to sort this backwards in the database to get the LIMIT applied - # properly, and then manually resort it in the correct order. We can do - # the second sort safely in python since it's not a lot of items.. + # Need to sort this backwards in the database to get the LIMIT applied + # properly, and then manually resort it in the correct order. We can do + # the second sort safely in python since it's not a lot of items.. + + return _render_datelist(request, l, d, Q(date__lte=d), title, + lambda x: sorted(x.order_by('-date')[:200], key=lambda m: m.date)) - return _render_datelist(request, l, d, Q(date__lte=d), title, - lambda x: sorted(x.order_by('-date')[:200], key=lambda m: m.date)) @cache(hours=2) def datelistsince(request, listname, msgid): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) + + msg = get_object_or_404(Message, messageid=msgid) + return render_datelist_from(request, l, msg.date, "%s since %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S"))) - msg = get_object_or_404(Message, messageid=msgid) - return render_datelist_from(request, l, msg.date, "%s since %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S"))) # Longer cache since this will be used for the fixed date links @cache(hours=4) def datelistsincetime(request, listname, year, month, day, hour, minute): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) + + try: + d = datetime(int(year), int(month), int(day), int(hour), int(minute)) + except ValueError: + raise Http404("Invalid date format, not found") + return render_datelist_from(request, l, d, "%s since %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M"))) - try: - d = datetime(int(year), int(month), int(day), int(hour), int(minute)) - except ValueError: - raise Http404("Invalid date format, not found") - return render_datelist_from(request, l, d, "%s since %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M"))) @cache(hours=2) def datelistbefore(request, listname, msgid): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) + + msg = get_object_or_404(Message, messageid=msgid) + return render_datelist_to(request, l, msg.date, "%s before %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S"))) - msg = get_object_or_404(Message, messageid=msgid) - return render_datelist_to(request, l, msg.date, "%s before %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S"))) @cache(hours=2) def datelistbeforetime(request, listname, year, month, day, hour, minute): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) + + try: + d = datetime(int(year), int(month), int(day), int(hour), int(minute)) + except ValueError: + raise Http404("Invalid date format, not found") + return render_datelist_to(request, l, d, "%s before %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M"))) - try: - d = datetime(int(year), int(month), int(day), int(hour), int(minute)) - except ValueError: - raise Http404("Invalid date format, not found") - return render_datelist_to(request, l, d, "%s before %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M"))) @cache(hours=4) def datelist(request, listname, year, month): - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) + + try: + d = datetime(int(year), int(month), 1) + except ValueError: + raise Http404("Malformatted date, month not found") - try: - d = datetime(int(year), int(month), 1) - except ValueError: - raise Http404("Malformatted date, month not found") + enddate = d + timedelta(days=31) + enddate = datetime(enddate.year, enddate.month, 1) + return render_datelist_from(request, l, d, "%s - %s %s" % (l.listname, d.strftime("%B"), d.year), enddate) - enddate = d+timedelta(days=31) - enddate = datetime(enddate.year, enddate.month, 1) - return render_datelist_from(request, l, d, "%s - %s %s" % (l.listname, d.strftime("%B"), d.year), enddate) @cache(hours=4) def attachment(request, attid): - # Use a direct query instead of django, since it has bad support for - # bytea - # XXX: minor information leak, because we load the whole attachment before we check - # the thread permissions. Is that OK? - curs = connection.cursor() - curs.execute("SELECT filename, contenttype, messageid, attachment FROM attachments INNER JOIN messages ON messages.id=attachments.message AND attachments.id=%(id)s AND messages.hiddenstatus IS NULL", {'id': int(attid)}) - r = curs.fetchall() - if len(r) != 1: - return HttpResponse("Attachment not found") + # Use a direct query instead of django, since it has bad support for + # bytea + # XXX: minor information leak, because we load the whole attachment before we check + # the thread permissions. Is that OK? + curs = connection.cursor() + curs.execute("SELECT filename, contenttype, messageid, attachment FROM attachments INNER JOIN messages ON messages.id=attachments.message AND attachments.id=%(id)s AND messages.hiddenstatus IS NULL", {'id': int(attid)}) + r = curs.fetchall() + if len(r) != 1: + return HttpResponse("Attachment not found") - ensure_message_permissions(request, r[0][2]) + ensure_message_permissions(request, r[0][2]) + + return HttpResponse(r[0][3], content_type=r[0][1]) - return HttpResponse(r[0][3], content_type=r[0][1]) def _build_thread_structure(threadid): - # Yeah, this is *way* too complicated for the django ORM - curs = connection.cursor() - curs.execute("""WITH RECURSIVE t(id, _from, subject, date, messageid, has_attachment, parentid, datepath) AS( + # Yeah, this is *way* too complicated for the django ORM + curs = connection.cursor() + curs.execute("""WITH RECURSIVE t(id, _from, subject, date, messageid, has_attachment, parentid, datepath) AS( SELECT id,_from,subject,date,messageid,has_attachment,parentid,array[]::timestamptz[] FROM messages m WHERE m.threadid=%(threadid)s AND parentid IS NULL UNION ALL SELECT m.id,m._from,m.subject,m.date,m.messageid,m.has_attachment,m.parentid,t.datepath||t.date FROM messages m INNER JOIN t ON t.id=m.parentid WHERE m.threadid=%(threadid)s @@ -373,395 +391,417 @@ def _build_thread_structure(threadid): SELECT id,_from,subject,date,messageid,has_attachment,parentid,datepath FROM t ORDER BY datepath||date """, {'threadid': threadid}) - for id,_from,subject,date,messageid,has_attachment,parentid,parentpath in curs.fetchall(): - yield {'id':id, 'mailfrom':_from, 'subject': subject, 'date': date, 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), 'messageid': messageid, 'hasattachment': has_attachment, 'parentid': parentid, 'indent': " " * len(parentpath)} + for id, _from, subject, date, messageid, has_attachment, parentid, parentpath in curs.fetchall(): + yield { + 'id': id, + 'mailfrom': _from, + 'subject': subject, + 'date': date, + 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), + 'messageid': messageid, + 'hasattachment': has_attachment, + 'parentid': parentid, + 'indent': " " * len(parentpath), + } def _get_nextprevious(listmap, dt): - curs = connection.cursor() - curs.execute("""WITH l(listid) AS ( + curs = connection.cursor() + curs.execute(""" +WITH l(listid) AS ( SELECT unnest(%(lists)s) ) SELECT l.listid,1, (SELECT ARRAY[messageid,to_char(date, 'yyyy-mm-dd hh24:mi:ss'),subject,_from] FROM messages m - INNER JOIN list_threads lt ON lt.threadid=m.threadid - WHERE m.date>%(time)s AND lt.listid=l.listid - ORDER BY m.date LIMIT 1 + INNER JOIN list_threads lt ON lt.threadid=m.threadid + WHERE m.date>%(time)s AND lt.listid=l.listid + ORDER BY m.date LIMIT 1 ) FROM l UNION ALL SELECT l.listid,0, (SELECT ARRAY[messageid,to_char(date, 'yyyy-mm-dd hh24:mi:ss'),subject,_from] FROM messages m - INNER JOIN list_threads lt ON lt.threadid=m.threadid - WHERE m.date<%(time)s AND lt.listid=l.listid - ORDER BY m.date DESC LIMIT 1 - ) FROM l""", { - 'lists': listmap.keys(), - 'time': dt, - }) - retval = {} - for listid, isnext, data in curs.fetchall(): - if data: - # Can be NULL, but if not, it will always have all fields - listname = listmap[listid] - d = { - 'msgid': data[0], - 'date': data[1], - 'subject': data[2], - 'from': data[3], - } - if listname in retval: - retval[listname][isnext and 'next' or 'prev'] = d - else: - retval[listname] = { - isnext and 'next' or 'prev': d - } - return retval + INNER JOIN list_threads lt ON lt.threadid=m.threadid + WHERE m.date<%(time)s AND lt.listid=l.listid + ORDER BY m.date DESC LIMIT 1 + ) FROM l""", + { + 'lists': list(listmap.keys()), + 'time': dt, + }) + retval = {} + for listid, isnext, data in curs.fetchall(): + if data: + # Can be NULL, but if not, it will always have all fields + listname = listmap[listid] + d = { + 'msgid': data[0], + 'date': data[1], + 'subject': data[2], + 'from': data[3], + } + if listname in retval: + retval[listname][isnext and 'next' or 'prev'] = d + else: + retval[listname] = { + isnext and 'next' or 'prev': d + } + return retval + @cache(hours=4) def message(request, msgid): - ensure_message_permissions(request, msgid) - - try: - m = Message.objects.get(messageid=msgid) - except Message.DoesNotExist: - raise Http404('Message does not exist') - - lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % m.threadid]).order_by('listname') - listmap = dict([(l.listid, l.listname) for l in lists]) - threadstruct = list(_build_thread_structure(m.threadid)) - newest = calendar.timegm(max(threadstruct, key=lambda x: x['date'])['date'].utctimetuple()) - if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG: - ims = parse_http_date_safe(request.META.get("HTTP_IF_MODIFIED_SINCE")) - if ims >= newest: - return HttpResponseNotModified() - - responses = [t for t in threadstruct if t['parentid']==m.id] - - if m.parentid: - for t in threadstruct: - if t['id'] == m.parentid: - parent = t - break - else: - parent = None - nextprev = _get_nextprevious(listmap, m.date) - - r = render_nav(NavContext(request, lists[0].listid, lists[0].listname), 'message.html', { - 'msg': m, - 'threadstruct': threadstruct, - 'responses': responses, - 'parent': parent, - 'lists': lists, - 'nextprev': nextprev, - }) - r['X-pgthread'] = ":%s:" % m.threadid - r['Last-Modified'] = http_date(newest) - return r + ensure_message_permissions(request, msgid) + + try: + m = Message.objects.get(messageid=msgid) + except Message.DoesNotExist: + raise Http404('Message does not exist') + + lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % m.threadid]).order_by('listname') + listmap = dict([(l.listid, l.listname) for l in lists]) + threadstruct = list(_build_thread_structure(m.threadid)) + newest = calendar.timegm(max(threadstruct, key=lambda x: x['date'])['date'].utctimetuple()) + if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG: + ims = parse_http_date_safe(request.META.get("HTTP_IF_MODIFIED_SINCE")) + if ims >= newest: + return HttpResponseNotModified() + + responses = [t for t in threadstruct if t['parentid'] == m.id] + + if m.parentid: + for t in threadstruct: + if t['id'] == m.parentid: + parent = t + break + else: + parent = None + nextprev = _get_nextprevious(listmap, m.date) + + r = render_nav(NavContext(request, lists[0].listid, lists[0].listname), 'message.html', { + 'msg': m, + 'threadstruct': threadstruct, + 'responses': responses, + 'parent': parent, + 'lists': lists, + 'nextprev': nextprev, + }) + r['X-pgthread'] = ":%s:" % m.threadid + r['Last-Modified'] = http_date(newest) + return r + @cache(hours=4) def message_flat(request, msgid): - ensure_message_permissions(request, msgid) - - try: - msg = Message.objects.get(messageid=msgid) - except Message.DoesNotExist: - raise Http404('Message does not exist') - allmsg = list(Message.objects.filter(threadid=msg.threadid).order_by('date')) - lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % msg.threadid]).order_by('listname') - - isfirst = (msg == allmsg[0]) - - newest = calendar.timegm(max(allmsg, key=lambda x: x.date).date.utctimetuple()) - if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG: - ims = parse_http_date_safe(request.META.get('HTTP_IF_MODIFIED_SINCE')) - if ims >= newest: - return HttpResponseNotModified() - - r = render_nav(NavContext(request), 'message_flat.html', { - 'msg': msg, - 'allmsg': allmsg, - 'lists': lists, - 'isfirst': isfirst, - }) - r['X-pgthread'] = ":%s:" % msg.threadid - r['Last-Modified'] = http_date(newest) - return r + ensure_message_permissions(request, msgid) + + try: + msg = Message.objects.get(messageid=msgid) + except Message.DoesNotExist: + raise Http404('Message does not exist') + allmsg = list(Message.objects.filter(threadid=msg.threadid).order_by('date')) + lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % msg.threadid]).order_by('listname') + + isfirst = (msg == allmsg[0]) + + newest = calendar.timegm(max(allmsg, key=lambda x: x.date).date.utctimetuple()) + if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG: + ims = parse_http_date_safe(request.META.get('HTTP_IF_MODIFIED_SINCE')) + if ims >= newest: + return HttpResponseNotModified() + + r = render_nav(NavContext(request), 'message_flat.html', { + 'msg': msg, + 'allmsg': allmsg, + 'lists': lists, + 'isfirst': isfirst, + }) + r['X-pgthread'] = ":%s:" % msg.threadid + r['Last-Modified'] = http_date(newest) + return r + @nocache @antispam_auth def message_raw(request, msgid): - ensure_message_permissions(request, msgid) + ensure_message_permissions(request, msgid) - curs = connection.cursor() - curs.execute("SELECT threadid, hiddenstatus, rawtxt FROM messages WHERE messageid=%(messageid)s", { - 'messageid': msgid, - }) - row = curs.fetchall() - if len(row) != 1: - raise Http404('Message does not exist') + curs = connection.cursor() + curs.execute("SELECT threadid, hiddenstatus, rawtxt FROM messages WHERE messageid=%(messageid)s", { + 'messageid': msgid, + }) + row = curs.fetchall() + if len(row) != 1: + raise Http404('Message does not exist') - if row[0][1]: - r = HttpResponse('This message has been hidden.', content_type='text/plain') - else: - r = HttpResponse(row[0][2], content_type='text/plain') - r['X-pgthread'] = ":%s:" % row[0][0] - return r + if row[0][1]: + r = HttpResponse('This message has been hidden.', content_type='text/plain') + else: + r = HttpResponse(row[0][2], content_type='text/plain') + r['X-pgthread'] = ":%s:" % row[0][0] + return r def _build_mbox(query, params, msgid=None): - connection.ensure_connection() + connection.ensure_connection() - # Rawmsg is not in the django model, so we have to query it separately - curs = connection.connection.cursor(name='mbox', withhold=True) - curs.itersize = 50 - curs.execute(query, params) + # Rawmsg is not in the django model, so we have to query it separately + curs = connection.connection.cursor(name='mbox', withhold=True) + curs.itersize = 50 + curs.execute(query, params) - firstmsg = curs.fetchone() - if msgid and firstmsg[0] != msgid: - # Always redirect to the first message in the thread when building - # the mbox, to not generate potentially multiple copies in - # the cache. - return HttpResponsePermanentRedirect(firstmsg[0]) + firstmsg = curs.fetchone() + if msgid and firstmsg[0] != msgid: + # Always redirect to the first message in the thread when building + # the mbox, to not generate potentially multiple copies in + # the cache. + return HttpResponsePermanentRedirect(firstmsg[0]) - def _one_message(raw): - # Parse as a message to generate headers - s = StringIO(raw) - parser = email.parser.Parser() - msg = parser.parse(s) - return msg.as_string(unixfrom=True) + def _one_message(raw): + # Parse as a message to generate headers + s = BytesIO(raw) + parser = email.parser.BytesParser(policy=email.policy.compat32) + msg = parser.parse(s) + return msg.as_string(unixfrom=True) + def _message_stream(first): + yield _one_message(first[1]) - def _message_stream(first): - yield _one_message(first[1]) + for mid, raw in curs: + yield _one_message(raw) - for mid, raw in curs: - yield _one_message(raw) + # Close must be done inside this function. If we close it in the + # main function, it won't let the iterator run to completion. + curs.close() - # Close must be done inside this function. If we close it in the - # main function, it won't let the iterator run to completion. - curs.close() + r = StreamingHttpResponse(_message_stream(firstmsg)) + r['Content-type'] = 'application/mbox' + return r - r = StreamingHttpResponse(_message_stream(firstmsg)) - r['Content-type'] = 'application/mbox' - return r @nocache @antispam_auth def message_mbox(request, msgid): - ensure_message_permissions(request, msgid) + ensure_message_permissions(request, msgid) - msg = get_object_or_404(Message, messageid=msgid) + msg = get_object_or_404(Message, messageid=msgid) + + return _build_mbox( + "SELECT messageid, rawtxt FROM messages WHERE threadid=%(thread)s AND hiddenstatus IS NULL ORDER BY date", + { + 'thread': msg.threadid, + }, + msgid) - return _build_mbox( - "SELECT messageid, rawtxt FROM messages WHERE threadid=%(thread)s AND hiddenstatus IS NULL ORDER BY date", - { - 'thread': msg.threadid, - }, - msgid) @nocache @antispam_auth def mbox(request, listname, listname2, mboxyear, mboxmonth): - if (listname != listname2): - raise Http404('List name mismatch') - l = get_object_or_404(List, listname=listname) - ensure_list_permissions(request, l) - - mboxyear = int(mboxyear) - mboxmonth = int(mboxmonth) - - query = "SELECT messageid, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE listid=%(listid)s AND hiddenstatus IS NULL AND date >= %(startdate)s AND date <= %(enddate)s %%% ORDER BY date" - params = { - 'listid': l.listid, - 'startdate': date(mboxyear, mboxmonth, 1), - 'enddate': datetime(mboxyear, mboxmonth, calendar.monthrange(mboxyear, mboxmonth)[1], 23, 59, 59), - } - - if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser: - # Restrict to only view messages that the user has permissions on all threads they're on - query = query.replace('%%%', 'AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%(username)s))') - params['username'] = request.user.username - else: - # Just return the whole thing - query = query.replace('%%%', '') - return _build_mbox(query, params) + if (listname != listname2): + raise Http404('List name mismatch') + l = get_object_or_404(List, listname=listname) + ensure_list_permissions(request, l) + + mboxyear = int(mboxyear) + mboxmonth = int(mboxmonth) + + query = "SELECT messageid, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE listid=%(listid)s AND hiddenstatus IS NULL AND date >= %(startdate)s AND date <= %(enddate)s %%% ORDER BY date" + params = { + 'listid': l.listid, + 'startdate': date(mboxyear, mboxmonth, 1), + 'enddate': datetime(mboxyear, mboxmonth, calendar.monthrange(mboxyear, mboxmonth)[1], 23, 59, 59), + } + + if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser: + # Restrict to only view messages that the user has permissions on all threads they're on + query = query.replace('%%%', 'AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%(username)s))') + params['username'] = request.user.username + else: + # Just return the whole thing + query = query.replace('%%%', '') + return _build_mbox(query, params) + def search(request): - if not settings.PUBLIC_ARCHIVES: - # We don't support searching of non-public archives at all at this point. - # XXX: room for future improvement - return HttpResponseForbidden('Not public archives') - - # Only certain hosts are allowed to call the search API - if not request.META['REMOTE_ADDR'] in settings.SEARCH_CLIENTS: - return HttpResponseForbidden('Invalid host') - - curs = connection.cursor() - - # Perform a search of the archives and return a JSON document. - # Expects the following (optional) POST parameters: - # q = query to search for - # ln = comma separate list of listnames to search in - # d = number of days back to search for, or -1 (or not specified) - # to search the full archives - # s = sort results by ['r'=rank, 'd'=date, 'i'=inverse date] - if not request.method == 'POST': - raise Http404('I only respond to POST') - - if 'q' not in request.POST: - raise Http404('No search query specified') - query = request.POST['q'] - - if 'ln' in request.POST: - try: - curs.execute("SELECT listid FROM lists WHERE listname=ANY(%(names)s)", { - 'names': request.POST['ln'].split(','), - }) - lists = [x for x, in curs.fetchall()] - except: - # If failing to parse list of lists, just search all - lists = None - else: - lists = None - - if 'd' in request.POST: - days = int(request.POST['d']) - if days < 1 or days > 365: - firstdate = None - else: - firstdate = datetime.now() - timedelta(days=days) - else: - firstdate = None - - if 's' in request.POST: - list_sort = request.POST['s'] - if not list_sort in ('d', 'r', 'i'): - list_stort = 'r' - else: - list_sort = 'r' - - # Ok, we have all we need to do the search - - if query.find('@') > 0: - # This could be a messageid. So try to get that one specifically first. - # We don't do a more specific check if it's a messageid because doing - # a key lookup is cheap... - curs.execute("SELECT messageid FROM messages WHERE messageid=%(q)s", { - 'q': query, - }) - a = curs.fetchall() - if len(a) == 1: - # Yup, this was a messageid - resp = HttpResponse(content_type='application/json') - - json.dump({'messageidmatch': 1}, resp) - return resp - # If not found, fall through to a regular search - - curs.execute("SET gin_fuzzy_search_limit=10000") - qstr = "SELECT messageid, date, subject, _from, ts_rank_cd(fti, plainto_tsquery('public.pg', %(q)s)), ts_headline(bodytxt, plainto_tsquery('public.pg', %(q)s),'StartSel=\"[[[[[[\",StopSel=\"]]]]]]\"') FROM messages m WHERE fti @@ plainto_tsquery('public.pg', %(q)s)" - params = { - 'q': query, - } - if lists: - qstr += " AND EXISTS (SELECT 1 FROM list_threads lt WHERE lt.threadid=m.threadid AND lt.listid=ANY(%(lists)s))" - params['lists'] = lists - if firstdate: - qstr += " AND m.date > %(date)s" - params['date'] = firstdate - if list_sort == 'r': - qstr += " ORDER BY ts_rank_cd(fti, plainto_tsquery(%(q)s)) DESC LIMIT 1000" - elif list_sort == 'd': - qstr += " ORDER BY date DESC LIMIT 1000" - else: - qstr += " ORDER BY date ASC LIMIT 1000" - - curs.execute(qstr, params) - - resp = HttpResponse(content_type='application/json') - - json.dump([{ - 'm': messageid, - 'd': date.isoformat(), - 's': subject, - 'f': mailfrom, - 'r': rank, - 'a': abstract.replace("[[[[[[", "").replace("]]]]]]",""), - - } for messageid, date, subject, mailfrom, rank, abstract in curs.fetchall()], - resp) - return resp + if not settings.PUBLIC_ARCHIVES: + # We don't support searching of non-public archives at all at this point. + # XXX: room for future improvement + return HttpResponseForbidden('Not public archives') + + # Only certain hosts are allowed to call the search API + if not request.META['REMOTE_ADDR'] in settings.SEARCH_CLIENTS: + return HttpResponseForbidden('Invalid host') + + curs = connection.cursor() + + # Perform a search of the archives and return a JSON document. + # Expects the following (optional) POST parameters: + # q = query to search for + # ln = comma separate list of listnames to search in + # d = number of days back to search for, or -1 (or not specified) + # to search the full archives + # s = sort results by ['r'=rank, 'd'=date, 'i'=inverse date] + if not request.method == 'POST': + raise Http404('I only respond to POST') + + if 'q' not in request.POST: + raise Http404('No search query specified') + query = request.POST['q'] + + if 'ln' in request.POST: + try: + curs.execute("SELECT listid FROM lists WHERE listname=ANY(%(names)s)", { + 'names': request.POST['ln'].split(','), + }) + lists = [x for x, in curs.fetchall()] + except: + # If failing to parse list of lists, just search all + lists = None + else: + lists = None + + if 'd' in request.POST: + days = int(request.POST['d']) + if days < 1 or days > 365: + firstdate = None + else: + firstdate = datetime.now() - timedelta(days=days) + else: + firstdate = None + + if 's' in request.POST: + list_sort = request.POST['s'] + if list_sort not in ('d', 'r', 'i'): + list_stort = 'r' + else: + list_sort = 'r' + + # Ok, we have all we need to do the search + + if query.find('@') > 0: + # This could be a messageid. So try to get that one specifically first. + # We don't do a more specific check if it's a messageid because doing + # a key lookup is cheap... + curs.execute("SELECT messageid FROM messages WHERE messageid=%(q)s", { + 'q': query, + }) + a = curs.fetchall() + if len(a) == 1: + # Yup, this was a messageid + resp = HttpResponse(content_type='application/json') + + json.dump({'messageidmatch': 1}, resp) + return resp + # If not found, fall through to a regular search + + curs.execute("SET gin_fuzzy_search_limit=10000") + qstr = "SELECT messageid, date, subject, _from, ts_rank_cd(fti, plainto_tsquery('public.pg', %(q)s)), ts_headline(bodytxt, plainto_tsquery('public.pg', %(q)s),'StartSel=\"[[[[[[\",StopSel=\"]]]]]]\"') FROM messages m WHERE fti @@ plainto_tsquery('public.pg', %(q)s)" + params = { + 'q': query, + } + if lists: + qstr += " AND EXISTS (SELECT 1 FROM list_threads lt WHERE lt.threadid=m.threadid AND lt.listid=ANY(%(lists)s))" + params['lists'] = lists + if firstdate: + qstr += " AND m.date > %(date)s" + params['date'] = firstdate + if list_sort == 'r': + qstr += " ORDER BY ts_rank_cd(fti, plainto_tsquery(%(q)s)) DESC LIMIT 1000" + elif list_sort == 'd': + qstr += " ORDER BY date DESC LIMIT 1000" + else: + qstr += " ORDER BY date ASC LIMIT 1000" + + curs.execute(qstr, params) + + resp = HttpResponse(content_type='application/json') + + json.dump([ + { + 'm': messageid, + 'd': date.isoformat(), + 's': subject, + 'f': mailfrom, + 'r': rank, + 'a': abstract.replace("[[[[[[", "").replace("]]]]]]", ""), + } for messageid, date, subject, mailfrom, rank, abstract in curs.fetchall()], + resp) + return resp + @cache(seconds=10) def web_sync_timestamp(request): - s = datetime.now().strftime("%Y-%m-%d %H:%M:%S\n") - r = HttpResponse(s, content_type='text/plain') - r['Content-Length'] = len(s) - return r + s = datetime.now().strftime("%Y-%m-%d %H:%M:%S\n") + r = HttpResponse(s, content_type='text/plain') + r['Content-Length'] = len(s) + return r + @cache(hours=8) def legacy(request, listname, year, month, msgnum): - curs = connection.cursor() - curs.execute("SELECT msgid FROM legacymap WHERE listid=(SELECT listid FROM lists WHERE listname=%(list)s) AND year=%(year)s AND month=%(month)s AND msgnum=%(msgnum)s", { - 'list': listname, - 'year': year, - 'month': month, - 'msgnum': msgnum, - }) - r = curs.fetchall() - if len(r) != 1: - raise Http404('Message does not exist') - return HttpResponsePermanentRedirect('/message-id/%s' % r[0][0]) + curs = connection.cursor() + curs.execute("SELECT msgid FROM legacymap WHERE listid=(SELECT listid FROM lists WHERE listname=%(list)s) AND year=%(year)s AND month=%(month)s AND msgnum=%(msgnum)s", { + 'list': listname, + 'year': year, + 'month': month, + 'msgnum': msgnum, + }) + r = curs.fetchall() + if len(r) != 1: + raise Http404('Message does not exist') + return HttpResponsePermanentRedirect('/message-id/%s' % r[0][0]) + # dynamic CSS serving, meaning we merge a number of different CSS into a # single one, making sure it turns into a single http response. We do this # dynamically, since the output will be cached. _dynamic_cssmap = { - 'base': ['media/css/main.css', - 'media/css/normalize.css',], - 'docs': ['media/css/global.css', - 'media/css/table.css', - 'media/css/text.css', - 'media/css/docs.css'], - } + 'base': ['media/css/main.css', + 'media/css/normalize.css', ], + 'docs': ['media/css/global.css', + 'media/css/table.css', + 'media/css/text.css', + 'media/css/docs.css'], +} + @cache(hours=8) def dynamic_css(request, css): - if css not in _dynamic_cssmap: - raise Http404('CSS not found') - files = _dynamic_cssmap[css] - resp = HttpResponse(content_type='text/css') - - # We honor if-modified-since headers by looking at the most recently - # touched CSS file. - latestmod = 0 - for fn in files: - try: - stime = os.stat(fn).st_mtime - if latestmod < stime: - latestmod = stime - except OSError: - # If we somehow referred to a file that didn't exist, or - # one that we couldn't access. - raise Http404('CSS (sub) not found') - if 'HTTP_IF_MODIFIED_SINCE' in request.META: - # This code is mostly stolen from django :) - matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", - request.META.get('HTTP_IF_MODIFIED_SINCE'), - re.IGNORECASE) - header_mtime = parse_http_date_safe(matches.group(1)) - # We don't do length checking, just the date - if int(latestmod) <= header_mtime: - return HttpResponseNotModified(content_type='text/css') - resp['Last-Modified'] = http_date(latestmod) - - for fn in files: - with open(fn) as f: - resp.write("/* %s */\n" % fn) - resp.write(f.read()) - resp.write("\n") - - return resp + if css not in _dynamic_cssmap: + raise Http404('CSS not found') + files = _dynamic_cssmap[css] + resp = HttpResponse(content_type='text/css') + + # We honor if-modified-since headers by looking at the most recently + # touched CSS file. + latestmod = 0 + for fn in files: + try: + stime = os.stat(fn).st_mtime + if latestmod < stime: + latestmod = stime + except OSError: + # If we somehow referred to a file that didn't exist, or + # one that we couldn't access. + raise Http404('CSS (sub) not found') + if 'HTTP_IF_MODIFIED_SINCE' in request.META: + # This code is mostly stolen from django :) + matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", + request.META.get('HTTP_IF_MODIFIED_SINCE'), + re.IGNORECASE) + header_mtime = parse_http_date_safe(matches.group(1)) + # We don't do length checking, just the date + if int(latestmod) <= header_mtime: + return HttpResponseNotModified(content_type='text/css') + resp['Last-Modified'] = http_date(latestmod) + + for fn in files: + with open(fn) as f: + resp.write("/* %s */\n" % fn) + resp.write(f.read()) + resp.write("\n") + + return resp + # Redirect to the requested url, with a slash first. This is used to remove # trailing slashes on messageid links by doing a permanent redirect. This is @@ -769,11 +809,12 @@ def dynamic_css(request, css): # in the cache. @cache(hours=8) def slash_redirect(request, url): - return HttpResponsePermanentRedirect("/%s" % url) + return HttpResponsePermanentRedirect("/%s" % url) + # Redirect the requested URL to whatever happens to be in the regexp capture. # This is used for user agents that generate broken URLs that are easily # captured using regexp. @cache(hours=8) def re_redirect(request, prefix, msgid): - return HttpResponsePermanentRedirect("/%s%s" % (prefix, msgid)) + return HttpResponsePermanentRedirect("/%s%s" % (prefix, msgid)) diff --git a/django/archives/settings.py b/django/archives/settings.py index 4fe7e96..80b990e 100644 --- a/django/archives/settings.py +++ b/django/archives/settings.py @@ -11,7 +11,7 @@ DATABASES = { 'default': { - 'ENGINE': 'django.db.backends.postgresql_psycopg2', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. + 'ENGINE': 'django.db.backends.postgresql_psycopg2', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. 'NAME': 'archives', # Or path to database file if using sqlite3. 'USER': '', # Not used with sqlite3. 'PASSWORD': '', # Not used with sqlite3. @@ -74,55 +74,33 @@ # Don't forget to use absolute paths, not relative paths. ) -# List of finder classes that know how to find static files in -# various locations. -#STATICFILES_FINDERS = ( -# 'django.contrib.staticfiles.finders.FileSystemFinder', -# 'django.contrib.staticfiles.finders.AppDirectoriesFinder', -# 'django.contrib.staticfiles.finders.DefaultStorageFinder', -#) - # Make this unique, and don't share it with anybody. SECRET_KEY = '7j9q&&!g26rkh!=g%1zb@20b^k^gmzy4=!mhzu2wesxb9b%16m' MIDDLEWARE_CLASSES = [ 'django.middleware.common.CommonMiddleware', -# 'django.contrib.sessions.middleware.SessionMiddleware', -# 'django.middleware.csrf.CsrfViewMiddleware', -# 'django.contrib.auth.middleware.AuthenticationMiddleware', -# 'django.contrib.messages.middleware.MessageMiddleware', ] ROOT_URLCONF = 'archives.urls' TEMPLATES = [{ - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.request', - 'django.contrib.messages.context_processors.messages', - 'archives.util.PGWebContextProcessor', - ], - 'loaders': [ - 'django.template.loaders.filesystem.Loader', - 'django.template.loaders.app_directories.Loader', - ], - }, + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.request', + 'django.contrib.messages.context_processors.messages', + 'archives.util.PGWebContextProcessor', + ], + 'loaders': [ + 'django.template.loaders.filesystem.Loader', + 'django.template.loaders.app_directories.Loader', + ], + }, }] INSTALLED_APPS = [ -# 'django.contrib.auth', -# 'django.contrib.contenttypes', -# 'django.contrib.sessions', -# 'django.contrib.sites', -# 'django.contrib.messages', -# 'django.contrib.staticfiles', - # Uncomment the next line to enable the admin: - # 'django.contrib.admin', - # Uncomment the next line to enable admin documentation: - # 'django.contrib.admindocs', - 'archives.mailarchives', + 'archives.mailarchives', ] # A sample logging configuration. The only tangible logging @@ -148,10 +126,8 @@ } } - - # Required for lighttpd -FORCE_SCRIPT_NAME="" +FORCE_SCRIPT_NAME = "" # Always override! SEARCH_CLIENTS = ('127.0.0.1',) @@ -159,23 +135,23 @@ PUBLIC_ARCHIVES = False try: - from settings_local import * + from .settings_local import * except ImportError: - pass + pass # If this is a non-public site, enable middleware for handling logins etc if not PUBLIC_ARCHIVES: - MIDDLEWARE_CLASSES = [ - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - ] + MIDDLEWARE_CLASSES - MIDDLEWARE_CLASSES.append('archives.mailarchives.redirecthandler.RedirectMiddleware') - - INSTALLED_APPS = [ - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - ] + INSTALLED_APPS - - from archives.util import validate_new_user - PGAUTH_CREATEUSER_CALLBACK=validate_new_user + MIDDLEWARE_CLASSES = [ + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + ] + MIDDLEWARE_CLASSES + MIDDLEWARE_CLASSES.append('archives.mailarchives.redirecthandler.RedirectMiddleware') + + INSTALLED_APPS = [ + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + ] + INSTALLED_APPS + + from archives.util import validate_new_user + PGAUTH_CREATEUSER_CALLBACK = validate_new_user diff --git a/django/archives/urls.py b/django/archives/urls.py index a582acc..dfc7e33 100644 --- a/django/archives/urls.py +++ b/django/archives/urls.py @@ -54,7 +54,7 @@ url(r'^list/([\w-]+|\*)/latest.json$', archives.mailarchives.api.latest), url(r'^message-id.json/(.+)$', archives.mailarchives.api.thread), url(r'^listinfo/$', archives.mailarchives.api.listinfo), -# url(r'^thread/(.+)/subscribe/$', archives.mailarchives.api.thread_subscribe), + # url(r'^thread/(.+)/subscribe/$', archives.mailarchives.api.thread_subscribe), # Legacy forwarding from old archives site url(r'^message-id/legacy/([\w-]+)/(\d+)-(\d+)/msg(\d+).php$', archives.mailarchives.views.legacy), diff --git a/django/archives/util.py b/django/archives/util.py index 425fffe..cf39e25 100644 --- a/django/archives/util.py +++ b/django/archives/util.py @@ -2,43 +2,46 @@ from django.db import connection from django.utils.functional import SimpleLazyObject + def validate_new_user(username, email, firstname, lastname): - # Only allow user creation if they are already a subscriber - curs = connection.cursor() - curs.execute("SELECT EXISTS(SELECT 1 FROM listsubscribers WHERE username=%(username)s)", { - 'username': username, - }) - if curs.fetchone()[0]: - # User is subscribed to something, so allow creation - return None + # Only allow user creation if they are already a subscriber + curs = connection.cursor() + curs.execute("SELECT EXISTS(SELECT 1 FROM listsubscribers WHERE username=%(username)s)", { + 'username': username, + }) + if curs.fetchone()[0]: + # User is subscribed to something, so allow creation + return None + + return HttpResponse("You are not currently subscribed to any mailing list on this server. Account not created.") - return HttpResponse("You are not currently subscribed to any mailing list on this server. Account not created.") def _get_gitrev(): - # Return the current git revision, that is used for - # cache-busting URLs. - try: - with open('../.git/refs/heads/master') as f: - return f.readline()[:8] - except IOError: - # A "git gc" will remove the ref and replace it with a packed-refs. - try: - with open('../.git/packed-refs') as f: - for l in f.readlines(): - if l.endswith("refs/heads/master\n"): - return l[:8] - # Not found in packed-refs. Meh, just make one up. - return 'ffffffff' - except IOError: - # If packed-refs also can't be read, just give up - return 'eeeeeeee' + # Return the current git revision, that is used for + # cache-busting URLs. + try: + with open('../.git/refs/heads/master') as f: + return f.readline()[:8] + except IOError: + # A "git gc" will remove the ref and replace it with a packed-refs. + try: + with open('../.git/packed-refs') as f: + for l in f.readlines(): + if l.endswith("refs/heads/master\n"): + return l[:8] + # Not found in packed-refs. Meh, just make one up. + return 'ffffffff' + except IOError: + # If packed-refs also can't be read, just give up + return 'eeeeeeee' + # Template context processor to add information about the root link and # the current git revision. git revision is returned as a lazy object so # we don't spend effort trying to load it if we don't need it (though # all general pages will need it since it's used to render the css urls) def PGWebContextProcessor(request): - gitrev = SimpleLazyObject(_get_gitrev) - return { - 'gitrev': gitrev, - } + gitrev = SimpleLazyObject(_get_gitrev) + return { + 'gitrev': gitrev, + } diff --git a/loader/clean_date.py b/loader/clean_date.py index 4ea2951..2297be6 100755 --- a/loader/clean_date.py +++ b/loader/clean_date.py @@ -15,63 +15,66 @@ import psycopg2 + def scan_message(messageid, olddate, curs): - u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid - print("Scanning message at %s (date reported as %s)..." % (u, olddate)) - - f = urlopen(u) - p = Parser() - msg = p.parse(f) - f.close() - - # Can be either one of them, but we really don't care... - ds = None - for k,r in list(msg.items()): - if k != 'Received': continue - - print("Trying on %s" % r) - m = re.search(';\s*(.*)$', r) - if m: - ds = m.group(1) - break - m = re.search(';\s*(.*)\s*\(envelope-from [^\)]+\)$', r) - if m: - ds = m.group(1) - break - - if not ds: - print("Could not find date. Sorry.") - return False - d = None - try: - d = dateutil.parser.parse(ds) - except: - print("Could not parse date '%s', sorry." % ds) - return - - while True: - x = input("Parsed this as date %s. Update? " % d) - if x.upper() == 'Y': - curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", { - 'd': d, - 'm': messageid, - }) - print("Updated.") - break - elif x.upper() == 'N': - break - + u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid + print("Scanning message at %s (date reported as %s)..." % (u, olddate)) + + f = urlopen(u) + p = Parser() + msg = p.parse(f) + f.close() + + # Can be either one of them, but we really don't care... + ds = None + for k, r in list(msg.items()): + if k != 'Received': + continue + + print("Trying on %s" % r) + m = re.search(';\s*(.*)$', r) + if m: + ds = m.group(1) + break + m = re.search(';\s*(.*)\s*\(envelope-from [^\)]+\)$', r) + if m: + ds = m.group(1) + break + + if not ds: + print("Could not find date. Sorry.") + return False + d = None + try: + d = dateutil.parser.parse(ds) + except: + print("Could not parse date '%s', sorry." % ds) + return + + while True: + x = input("Parsed this as date %s. Update? " % d) + if x.upper() == 'Y': + curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", { + 'd': d, + 'm': messageid, + }) + print("Updated.") + break + elif x.upper() == 'N': + break + + if __name__ == "__main__": - cfg = ConfigParser() - cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) - connstr = cfg.get('db','connstr') + cfg = ConfigParser() + cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) + connstr = cfg.get('db', 'connstr') - conn = psycopg2.connect(connstr) + conn = psycopg2.connect(connstr) - curs = conn.cursor() - curs.execute("SELECT messageid, date FROM messages WHERE date>(CURRENT_TIMESTAMP+'1 day'::interval) OR date < '1997-01-01'") - for messageid, date in curs.fetchall(): - scan_message(messageid, date, curs) + curs = conn.cursor() + curs.execute("SELECT messageid, date FROM messages WHERE date>(CURRENT_TIMESTAMP+'1 day'::interval) OR date < '1997-01-01'") + for messageid, date in curs.fetchall(): + scan_message(messageid, date, curs) - conn.commit() - print("Done.") + conn.commit() + print("Done.") diff --git a/loader/generate_mbox.py b/loader/generate_mbox.py index c2299e1..dfb8d3d 100755 --- a/loader/generate_mbox.py +++ b/loader/generate_mbox.py @@ -21,96 +21,95 @@ def generate_single_mbox(conn, listid, year, month, destination): - curs = conn.cursor() - curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", { - 'listid': listid, - 'startdate': date(year, month, 1), - 'enddate': date(year, month, calendar.monthrange(year, month)[1]), - }) - with open(destination, 'w', encoding='utf8') as f: - for id, raw, in curs: - s = BytesIO(raw) - parser = email.parser.BytesParser(policy=email.policy.compat32) - msg = parser.parse(s) - try: - x = msg.as_string(unixfrom=True) - f.write(x) - except UnicodeEncodeError as e: - print("Not including {0}, unicode error".format(msg['message-id'])) - except Exception as e: - print("Not including {0}, exception {1}".format(msg['message-id'], e)) + curs = conn.cursor() + curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", { + 'listid': listid, + 'startdate': date(year, month, 1), + 'enddate': date(year, month, calendar.monthrange(year, month)[1]), + }) + with open(destination, 'w', encoding='utf8') as f: + for id, raw, in curs: + s = BytesIO(raw) + parser = email.parser.BytesParser(policy=email.policy.compat32) + msg = parser.parse(s) + try: + x = msg.as_string(unixfrom=True) + f.write(x) + except UnicodeEncodeError as e: + print("Not including {0}, unicode error".format(msg['message-id'])) + except Exception as e: + print("Not including {0}, exception {1}".format(msg['message-id'], e)) if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate mbox file(s)") - parser.add_argument('--list', type=str, help='List to generate for') - parser.add_argument('--month', type=str, help='year-month to generate for, e.g. 2017-02') - parser.add_argument('--destination', type=str, help='File to write into (or directory for --auto)', required=True) - parser.add_argument('--auto', action='store_true', help='Auto-generate latest month mboxes for all lists') - parser.add_argument('--quiet', action='store_true', help='Run quiet') + parser = argparse.ArgumentParser(description="Generate mbox file(s)") + parser.add_argument('--list', type=str, help='List to generate for') + parser.add_argument('--month', type=str, help='year-month to generate for, e.g. 2017-02') + parser.add_argument('--destination', type=str, help='File to write into (or directory for --auto)', required=True) + parser.add_argument('--auto', action='store_true', help='Auto-generate latest month mboxes for all lists') + parser.add_argument('--quiet', action='store_true', help='Run quiet') - args = parser.parse_args() + args = parser.parse_args() - if args.auto: - if (args.list or args.month): - print("Must not specify list and month when auto-generating!") - sys.exit(1) - if not os.path.isdir(args.destination): - print("Destination must be a directory, and exist, when auto-generating") - sys.exit(1) - else: - if not (args.list and args.month and args.destination): - print("Must specify list, month and destination when generating a single mailbox") - parser.print_help() - sys.exit(1) + if args.auto: + if (args.list or args.month): + print("Must not specify list and month when auto-generating!") + sys.exit(1) + if not os.path.isdir(args.destination): + print("Destination must be a directory, and exist, when auto-generating") + sys.exit(1) + else: + if not (args.list and args.month and args.destination): + print("Must specify list, month and destination when generating a single mailbox") + parser.print_help() + sys.exit(1) + # Arguments OK, now connect + cfg = ConfigParser() + cfg.read(os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'archives.ini')) + try: + connstr = cfg.get('db', 'connstr') + except: + connstr = 'need_connstr' - # Arguments OK, now connect - cfg = ConfigParser() - cfg.read(os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'archives.ini')) - try: - connstr = cfg.get('db','connstr') - except: - connstr = 'need_connstr' + conn = psycopg2.connect(connstr) + curs = conn.cursor() - conn = psycopg2.connect(connstr) - curs = conn.cursor() + if args.auto: + curs.execute("SELECT listid, listname FROM lists WHERE active ORDER BY listname") + all_lists = curs.fetchall() + today = date.today() + yesterday = today - timedelta(days=1) + if today.month == yesterday.month: + # Same month, so do it + monthrange = ((today.year, today.month),) + else: + monthrange = ((today.year, today.month), (yesterday.year, yesterday.month)) + for lid, lname in all_lists: + for year, month in monthrange: + fullpath = os.path.join(args.destination, lname, 'files/public/archive') + if not os.path.isdir(fullpath): + os.makedirs(fullpath) + if not args.quiet: + print("Generating {0}-{1} for {2}".format(year, month, lname)) + generate_single_mbox(conn, lid, year, month, + os.path.join(fullpath, "{0}.{0:04d}{1:02d}".format(year, month))) + else: + # Parse year and month + m = re.match('^(\d{4})-(\d{2})$', args.month) + if not m: + print("Month must be specified on format YYYY-MM, not {0}".format(args.month)) + sys.exit(1) + year = int(m.group(1)) + month = int(m.group(2)) - if args.auto: - curs.execute("SELECT listid, listname FROM lists WHERE active ORDER BY listname") - all_lists = curs.fetchall() - today = date.today() - yesterday = today - timedelta(days=1) - if today.month == yesterday.month: - # Same month, so do it - monthrange = ((today.year, today.month),) - else: - monthrange = ((today.year, today.month),(yesterday.year, yesterday.month)) - for lid, lname in all_lists: - for year, month in monthrange: - fullpath = os.path.join(args.destination, lname, 'files/public/archive') - if not os.path.isdir(fullpath): - os.makedirs(fullpath) - if not args.quiet: - print("Generating {0}-{1} for {2}".format(year, month, lname)) - generate_single_mbox(conn, lid, year, month, - os.path.join(fullpath, "{0}.{0:04d}{1:02d}".format(year, month))) - else: - # Parse year and month - m = re.match('^(\d{4})-(\d{2})$', args.month) - if not m: - print("Month must be specified on format YYYY-MM, not {0}".format(args.month)) - sys.exit(1) - year = int(m.group(1)) - month = int(m.group(2)) + curs.execute("SELECT listid FROM lists WHERE listname=%(name)s", { + 'name': args.list, + }) + if curs.rowcount != 1: + print("List {0} not found.".format(args.list)) + sys.exit(1) - curs.execute("SELECT listid FROM lists WHERE listname=%(name)s", { - 'name': args.list, - }) - if curs.rowcount != 1: - print("List {0} not found.".format(args.list)) - sys.exit(1) - - if not args.quiet: - print("Generating {0}-{1} for {2}".format(year, month, args.list)) - generate_single_mbox(conn, curs.fetchone()[0], year, month, args.destination) + if not args.quiet: + print("Generating {0}-{1} for {2}".format(year, month, args.list)) + generate_single_mbox(conn, curs.fetchone()[0], year, month, args.destination) diff --git a/loader/hide_message.py b/loader/hide_message.py index 8bb9359..4a4d10a 100755 --- a/loader/hide_message.py +++ b/loader/hide_message.py @@ -15,85 +15,85 @@ from lib.varnish import VarnishPurger reasons = [ - None, # Placeholder for 0 - "virus", - "violates policies", - "privacy", - "corrupt", + None, # Placeholder for 0 + "virus", + "violates policies", + "privacy", + "corrupt", ] if __name__ == "__main__": - optparser = OptionParser() - optparser.add_option('-m', '--msgid', dest='msgid', help='Messageid to hide') - - (opt, args) = optparser.parse_args() - - if (len(args)): - print("No bare arguments accepted") - optparser.print_help() - sys.exit(1) - - if not opt.msgid: - print("Message-id must be specified") - optparser.print_help() - sys.exit(1) - - cfg = ConfigParser() - cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) - try: - connstr = cfg.get('db','connstr') - except: - connstr = 'need_connstr' - - conn = psycopg2.connect(connstr) - curs = conn.cursor() - - curs.execute("SELECT id, threadid, hiddenstatus FROM messages WHERE messageid=%(msgid)s", { - 'msgid': opt.msgid, - }) - if curs.rowcount <= 0: - print("Message not found.") - sys.exit(1) - - id, threadid, previous = curs.fetchone() - - # Message found, ask for reason - reason = 0 - print("Current status: %s" % reasons[previous or 0]) - print("\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons)))) - while True: - reason = input('Reason for hiding message? ') - try: - reason = int(reason) - except ValueError: - continue - - if reason == 0: - print("Un-hiding message") - reason = None - break - else: - try: - print("Hiding message for reason: %s" % reasons[reason]) - except: - continue - break - if previous == reason: - print("No change in status, not updating") - conn.close() - sys.exit(0) - - curs.execute("UPDATE messages SET hiddenstatus=%(new)s WHERE id=%(id)s", { - 'new': reason, - 'id': id, - }) - if curs.rowcount != 1: - print("Failed to update! Not hiding!") - conn.rollback() - sys.exit(0) - conn.commit() - - VarnishPurger(cfg).purge([int(threadid), ]) - conn.close() - - print("Message hidden and varnish purge triggered.") + optparser = OptionParser() + optparser.add_option('-m', '--msgid', dest='msgid', help='Messageid to hide') + + (opt, args) = optparser.parse_args() + + if (len(args)): + print("No bare arguments accepted") + optparser.print_help() + sys.exit(1) + + if not opt.msgid: + print("Message-id must be specified") + optparser.print_help() + sys.exit(1) + + cfg = ConfigParser() + cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) + try: + connstr = cfg.get('db', 'connstr') + except: + connstr = 'need_connstr' + + conn = psycopg2.connect(connstr) + curs = conn.cursor() + + curs.execute("SELECT id, threadid, hiddenstatus FROM messages WHERE messageid=%(msgid)s", { + 'msgid': opt.msgid, + }) + if curs.rowcount <= 0: + print("Message not found.") + sys.exit(1) + + id, threadid, previous = curs.fetchone() + + # Message found, ask for reason + reason = 0 + print("Current status: %s" % reasons[previous or 0]) + print("\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons)))) + while True: + reason = input('Reason for hiding message? ') + try: + reason = int(reason) + except ValueError: + continue + + if reason == 0: + print("Un-hiding message") + reason = None + break + else: + try: + print("Hiding message for reason: %s" % reasons[reason]) + except: + continue + break + if previous == reason: + print("No change in status, not updating") + conn.close() + sys.exit(0) + + curs.execute("UPDATE messages SET hiddenstatus=%(new)s WHERE id=%(id)s", { + 'new': reason, + 'id': id, + }) + if curs.rowcount != 1: + print("Failed to update! Not hiding!") + conn.rollback() + sys.exit(0) + conn.commit() + + VarnishPurger(cfg).purge([int(threadid), ]) + conn.close() + + print("Message hidden and varnish purge triggered.") diff --git a/loader/legacy/scan_old_archives.py b/loader/legacy/scan_old_archives.py index 5d9a242..53d339a 100755 --- a/loader/legacy/scan_old_archives.py +++ b/loader/legacy/scan_old_archives.py @@ -101,23 +101,23 @@ hp = HTMLParser() def get_messageid(fn): - with open(fn) as f: - for l in f: - if l.startswith('