From: Magnus Hagander <magnus@hagander.net>
Date: Thu, 3 Jan 2019 10:04:29 +0000 (+0100)
Subject: Update loader scripts to use python3 syntax
X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=bb5775efe5f938461537e0c95c7c110875e4718b;p=pgarchives.git

Update loader scripts to use python3 syntax

Some minor cleanups as well, but mostly just the output of the 2to3 tool
and some manual changes.
---

diff --git a/loader/clean_date.py b/loader/clean_date.py
index faac7ab..4ea2951 100755
--- a/loader/clean_date.py
+++ b/loader/clean_date.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # Clean up old, broken, dates
 #
@@ -7,17 +7,17 @@ import os
 import sys
 import re
 
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
 
 from email.parser import Parser
-from urllib import urlopen
+from urllib.request import urlopen
 import dateutil.parser
 
 import psycopg2
 
 def scan_message(messageid, olddate, curs):
 	u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid
-	print "Scanning message at %s (date reported as %s)..." % (u, olddate)
+	print("Scanning message at %s (date reported as %s)..." % (u, olddate))
 
 	f = urlopen(u)
 	p = Parser()
@@ -26,10 +26,10 @@ def scan_message(messageid, olddate, curs):
 
 	# Can be either one of them, but we really don't care...
 	ds = None
-	for k,r in msg.items():
+	for k,r in list(msg.items()):
 		if k != 'Received': continue
 
-		print "Trying on %s" % r
+		print("Trying on %s" % r)
 		m = re.search(';\s*(.*)$', r)
 		if m:
 			ds = m.group(1)
@@ -40,23 +40,23 @@ def scan_message(messageid, olddate, curs):
 			break
 
 	if not ds:
-		print "Could not find date. Sorry."
+		print("Could not find date. Sorry.")
 		return False
 	d = None
 	try:
 		d = dateutil.parser.parse(ds)
 	except:
-		print "Could not parse date '%s', sorry." % ds
+		print("Could not parse date '%s', sorry." % ds)
 		return
 
 	while True:
-		x = raw_input("Parsed this as date %s. Update? " % d)
+		x = input("Parsed this as date %s. Update? " % d)
 		if x.upper() == 'Y':
 			curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", {
 					'd': d,
 					'm': messageid,
 					})
-			print "Updated."
+			print("Updated.")
 			break
 		elif x.upper() == 'N':
 			break
@@ -74,4 +74,4 @@ if __name__ == "__main__":
 		scan_message(messageid, date, curs)
 
 	conn.commit()
-	print "Done."
+	print("Done.")
diff --git a/loader/generate_mbox.py b/loader/generate_mbox.py
index 42404c4..c2299e1 100755
--- a/loader/generate_mbox.py
+++ b/loader/generate_mbox.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # generate_mbox.py - generate an mbox file from the rawtxt stored
 #                    in the datatabase.
@@ -11,27 +11,34 @@ import calendar
 import re
 
 import argparse
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
 import email.parser
+import email.policy
 import email.generator
-from StringIO import StringIO
+from io import BytesIO
 
 import psycopg2
 
 
 def generate_single_mbox(conn, listid, year, month, destination):
 	curs = conn.cursor()
-	curs.execute("SELECT rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", {
+	curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", {
 		'listid': listid,
 		'startdate': date(year, month, 1),
 		'enddate': date(year, month, calendar.monthrange(year, month)[1]),
 	})
-	with open(destination, 'w') as f:
-		for raw, in curs:
-			s = StringIO(raw)
-			parser = email.parser.Parser()
+	with open(destination, 'w', encoding='utf8') as f:
+		for id, raw, in curs:
+			s = BytesIO(raw)
+			parser = email.parser.BytesParser(policy=email.policy.compat32)
 			msg = parser.parse(s)
-			f.write(msg.as_string(unixfrom=True))
+			try:
+				x = msg.as_string(unixfrom=True)
+				f.write(x)
+			except UnicodeEncodeError as e:
+				print("Not including {0}, unicode error".format(msg['message-id']))
+			except Exception as e:
+				print("Not including {0}, exception {1}".format(msg['message-id'], e))
 
 
 if __name__ == "__main__":
@@ -46,14 +53,14 @@ if __name__ == "__main__":
 
 	if args.auto:
 		if (args.list or args.month):
-			print "Must not specify list and month when auto-generating!"
+			print("Must not specify list and month when auto-generating!")
 			sys.exit(1)
 		if not os.path.isdir(args.destination):
-			print "Destination must be a directory, and exist, when auto-generating"
+			print("Destination must be a directory, and exist, when auto-generating")
 			sys.exit(1)
 	else:
 		if not (args.list and args.month and args.destination):
-			print "Must specify list, month and destination when generating a single mailbox"
+			print("Must specify list, month and destination when generating a single mailbox")
 			parser.print_help()
 			sys.exit(1)
 
@@ -85,14 +92,14 @@ if __name__ == "__main__":
 				if not os.path.isdir(fullpath):
 					os.makedirs(fullpath)
 				if not args.quiet:
-					print "Generating {0}-{1} for {2}".format(year, month, lname)
+					print("Generating {0}-{1} for {2}".format(year, month, lname))
 				generate_single_mbox(conn, lid, year, month,
 									 os.path.join(fullpath, "{0}.{0:04d}{1:02d}".format(year, month)))
 	else:
 		# Parse year and month
 		m = re.match('^(\d{4})-(\d{2})$', args.month)
 		if not m:
-			print "Month must be specified on format YYYY-MM, not {0}".format(args.month)
+			print("Month must be specified on format YYYY-MM, not {0}".format(args.month))
 			sys.exit(1)
 		year = int(m.group(1))
 		month = int(m.group(2))
@@ -101,9 +108,9 @@ if __name__ == "__main__":
 			'name': args.list,
 		})
 		if curs.rowcount != 1:
-			print "List {0} not found.".format(args.list)
+			print("List {0} not found.".format(args.list))
 			sys.exit(1)
 
 		if not args.quiet:
-			print "Generating {0}-{1} for {2}".format(year, month, args.list)
+			print("Generating {0}-{1} for {2}".format(year, month, args.list))
 		generate_single_mbox(conn, curs.fetchone()[0], year, month, args.destination)
diff --git a/loader/hide_message.py b/loader/hide_message.py
index 51bffc6..8bb9359 100755
--- a/loader/hide_message.py
+++ b/loader/hide_message.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # hide_message.py - hide a message (spam etc) in the archives, including
 # frontend expiry.
@@ -8,7 +8,7 @@ import os
 import sys
 
 from optparse import OptionParser
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
 
 import psycopg2
 
@@ -29,12 +29,12 @@ if __name__ == "__main__":
 	(opt, args) = optparser.parse_args()
 
 	if (len(args)):
-		print "No bare arguments accepted"
+		print("No bare arguments accepted")
 		optparser.print_help()
 		sys.exit(1)
 
 	if not opt.msgid:
-		print "Message-id must be specified"
+		print("Message-id must be specified")
 		optparser.print_help()
 		sys.exit(1)
 
@@ -52,34 +52,34 @@ if __name__ == "__main__":
 		'msgid': opt.msgid,
 	})
 	if curs.rowcount <= 0:
-		print "Message not found."
+		print("Message not found.")
 		sys.exit(1)
 
 	id, threadid, previous = curs.fetchone()
 
 	# Message found, ask for reason
 	reason = 0
-	print "Current status: %s" % reasons[previous or 0]
-	print "\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons)))
+	print("Current status: %s" % reasons[previous or 0])
+	print("\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons))))
 	while True:
-		reason = raw_input('Reason for hiding message? ')
+		reason = input('Reason for hiding message? ')
 		try:
 			reason = int(reason)
 		except ValueError:
 			continue
 
 		if reason == 0:
-			print "Un-hiding message"
+			print("Un-hiding message")
 			reason = None
 			break
 		else:
 			try:
-				print "Hiding message for reason: %s" % reasons[reason]
+				print("Hiding message for reason: %s" % reasons[reason])
 			except:
 				continue
 			break
 	if previous == reason:
-		print "No change in status, not updating"
+		print("No change in status, not updating")
 		conn.close()
 		sys.exit(0)
 
@@ -88,7 +88,7 @@ if __name__ == "__main__":
 		'id': id,
 	})
 	if curs.rowcount != 1:
-		print "Failed to update! Not hiding!"
+		print("Failed to update! Not hiding!")
 		conn.rollback()
 		sys.exit(0)
 	conn.commit()
@@ -96,4 +96,4 @@ if __name__ == "__main__":
 	VarnishPurger(cfg).purge([int(threadid), ])
 	conn.close()
 
-	print "Message hidden and varnish purge triggered."
+	print("Message hidden and varnish purge triggered.")
diff --git a/loader/lib/log.py b/loader/lib/log.py
index 82e72fb..5b6379a 100644
--- a/loader/lib/log.py
+++ b/loader/lib/log.py
@@ -7,13 +7,13 @@ class Log(object):
 
 	def status(self, msg):
 		if self.verbose:
-			print msg
+			print(msg)
 
 	def log(self, msg):
-		print msg
+		print(msg)
 
 	def error(self, msg):
-		print msg
+		print(msg)
 
 	def print_status(self):
 		opstatus.print_status()
@@ -27,7 +27,7 @@ class OpStatus(object):
 		self.overwritten = 0
 
 	def print_status(self):
-		print "%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten)
+		print("%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten))
 
 
 log = Log()
diff --git a/loader/lib/mbox.py b/loader/lib/mbox.py
index c4982ed..77c83b0 100644
--- a/loader/lib/mbox.py
+++ b/loader/lib/mbox.py
@@ -1,5 +1,5 @@
 from subprocess import Popen, PIPE
-import cStringIO as StringIO
+from io import BytesIO
 
 # The hack of all hacks...
 # The python mbox parser fails to split some messages from mj2
@@ -8,6 +8,7 @@ import cStringIO as StringIO
 # reassemble it to one long stream with a unique separator,
 # and then split it apart again in python.. Isn't it cute?
 SEPARATOR = "ABCARCHBREAK123" * 50
+bSEPARATOR = bytes(SEPARATOR, 'ascii')
 
 class MailboxBreakupParser(object):
 	def __init__(self, fn):
@@ -27,21 +28,21 @@ class MailboxBreakupParser(object):
 	def stderr_output(self):
 		return self.pipe.stderr.read()
 
-	def next(self):
-		sio = StringIO.StringIO()
+	def __next__(self):
+		sio = BytesIO()
 		while True:
 			try:
-				l = self.pipe.stdout.next()
+				l = next(self.pipe.stdout)
 			except StopIteration:
 				# End of file!
 				self.EOF = True
 				if sio.tell() == 0:
 					# Nothing read yet, so return None instead of an empty
-					# stringio
+					# bytesio
 					return None
 				sio.seek(0)
 				return sio
-			if l.rstrip() == SEPARATOR:
+			if l.rstrip() == bSEPARATOR:
 				# Reached a separator. Meaning we're not at end of file,
 				# but we're at end of message.
 				sio.seek(0)
diff --git a/loader/lib/parser.py b/loader/lib/parser.py
index 15009c4..8ee25c5 100644
--- a/loader/lib/parser.py
+++ b/loader/lib/parser.py
@@ -2,23 +2,24 @@ import re
 import datetime
 import dateutil.parser
 
-from email.parser import Parser
-from email.header import decode_header
+from email.parser import BytesParser
+from email.header import decode_header, Header
 from email.errors import HeaderParseError
-from HTMLParser import HTMLParser, HTMLParseError
+from email.policy import compat32
+from html.parser import HTMLParser
 import tidylib
-import StringIO
+import io
 
 from lib.exception import IgnorableException
 from lib.log import log
 
 class ArchivesParser(object):
 	def __init__(self):
-		self.parser = Parser()
+		self.parser = BytesParser(policy=compat32)
 
 	def parse(self, stream):
 		self.rawtxt = stream.read()
-		self.msg = self.parser.parse(StringIO.StringIO(self.rawtxt))
+		self.msg = self.parser.parse(io.BytesIO(self.rawtxt))
 
 	def is_msgid(self, msgid):
 		# Look for a specific messageid. This means we might parse it twice,
@@ -26,7 +27,7 @@ class ArchivesParser(object):
 		try:
 			if self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) == msgid:
 				return True
-		except Exception, e:
+		except Exception as e:
 			return False
 
 	def analyze(self, date_override=None):
@@ -49,13 +50,13 @@ class ArchivesParser(object):
 		self.parents = []
 		# The first one is in-reply-to, if it exists
 		if self.get_optional('in-reply-to'):
-			m = self.clean_messageid(self.get_optional('in-reply-to'), True)
+			m = self.clean_messageid(self.decode_mime_header(self.get_optional('in-reply-to')), True)
 			if m:
 				self.parents.append(m)
 
 		# Then we add all References values, in backwards order
 		if self.get_optional('references'):
-			cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.get_optional('references').split())]
+			cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.decode_mime_header(self.get_optional('references')).split())]
 			# Can't do this with a simple self.parents.extend() due to broken
 			# mailers that add the same reference more than once. And we can't
 			# use a set() to make it unique, because order is very important
@@ -130,19 +131,19 @@ class ArchivesParser(object):
 			params = msg.get_params()
 			if not params:
 				# No content-type, so we assume us-ascii
-				return unicode(b, 'us-ascii', errors='ignore')
+				return str(b, 'us-ascii', errors='ignore')
 			for k,v in params:
 				if k.lower() == 'charset':
 					charset = v
 					break
 			if charset:
 				try:
-					return unicode(b, self.clean_charset(charset), errors='ignore')
-				except LookupError, e:
+					return str(b, self.clean_charset(charset), errors='ignore')
+				except LookupError as e:
 					raise IgnorableException("Failed to get unicode payload: %s" % e)
 			else:
 				# XXX: reasonable default?
-				return unicode(b, errors='ignore')
+				return str(b, errors='ignore')
 		# Return None or empty string, depending on what we got back
 		return b
 
@@ -154,8 +155,8 @@ class ArchivesParser(object):
 		if b:
 			# Python bug 9133, allows unicode surrogate pairs - which PostgreSQL will
 			# later reject..
-			if b.find(u'\udbff\n\udef8'):
-				b = b.replace(u'\udbff\n\udef8', '')
+			if b.find('\udbff\n\udef8'):
+				b = b.replace('\udbff\n\udef8', '')
 
 		# Remove postgres specific mail footer - if it's there
 		m = self._re_footer.match(b)
@@ -249,15 +250,15 @@ class ArchivesParser(object):
 		# If this is a header-encoded filename, start by decoding that
 		if filename.startswith('=?'):
 			decoded, encoding = decode_header(filename)[0]
-			return unicode(decoded, encoding, errors='ignore')
+			return str(decoded, encoding, errors='ignore')
 
 		# If it's already unicode, just return it
-		if isinstance(filename, unicode):
+		if isinstance(filename, str):
 			return filename
 
 		# Anything that's not UTF8, we just get rid of. We can live with
 		# filenames slightly mangled in this case.
-		return unicode(filename, 'utf-8', errors='ignore')
+		return str(filename, 'utf-8', errors='ignore')
 
 	def _extract_filename(self, container):
 		# Try to get the filename for an attachment in the container.
@@ -324,7 +325,7 @@ class ArchivesParser(object):
 				# by majordomo with the footer. So if that one is present,
 				# we need to explicitly exclude it again.
 				b = container.get_payload(decode=True)
-				if not self._re_footer.match(b):
+				if isinstance(b, str) and not self._re_footer.match(b):
 					# We know there is no name for this one
 					self.attachments.append((None, container.get_content_type(), b))
 				return
@@ -423,9 +424,14 @@ class ArchivesParser(object):
 				# enough...
 				dp = datetime.datetime(*dp.utctimetuple()[:6])
 			return dp
-		except Exception, e:
+		except Exception as e:
 			raise IgnorableException("Failed to parse date '%s': %s" % (d, e))
 
+	def _maybe_decode(self, s, charset):
+		if isinstance(s, str):
+			return s.strip(' ')
+		return str(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore').strip(' ')
+
 	# Workaround for broken quoting in some MUAs (see below)
 	_re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE)
 	def _decode_mime_header(self, hdr, email_workaround):
@@ -449,28 +455,32 @@ class ArchivesParser(object):
 			hdr = self._re_mailworkaround.sub(r'\1', hdr)
 
 		try:
-			return " ".join([unicode(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore') for s,charset in decode_header(hdr)])
-		except HeaderParseError, e:
+			return " ".join([self._maybe_decode(s, charset) for s, charset in decode_header(hdr)])
+		except HeaderParseError as e:
 			# Parser error is typically someone specifying an encoding,
 			# but then not actually using that encoding. We'll do the best
 			# we can, which is cut it down to ascii and ignore errors
-			return unicode(hdr, 'us-ascii', errors='ignore')
+			return str(hdr, 'us-ascii', errors='ignore').strip(' ')
 
 	def decode_mime_header(self, hdr, email_workaround=False):
 		try:
+			if isinstance(hdr, Header):
+				hdr = hdr.encode()
+
 			h = self._decode_mime_header(hdr, email_workaround)
 			if h:
 				return h.replace("\0", "")
 			return ''
-		except LookupError, e:
+		except LookupError as e:
 			raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, e))
-		except ValueError, ve:
+		except ValueError as ve:
 			raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, ve))
 
 	def get_mandatory(self, fieldname):
 		try:
 			x = self.msg[fieldname]
-			if x==None: raise Exception()
+			if x==None:
+				raise Exception()
 			return x
 		except:
 			raise IgnorableException("Mandatory field '%s' is missing" % fieldname)
@@ -496,17 +506,15 @@ class ArchivesParser(object):
 												   'show-info': 0,
 												   })
 		if errors:
-			print("HTML tidy failed for %s!" % self.msgid)
+			print(("HTML tidy failed for %s!" % self.msgid))
 			print(errors)
 			return None
-		if type(html) == str:
-			html = unicode(html, 'utf8')
 
 		try:
 			cleaner = HTMLCleaner()
 			cleaner.feed(html)
 			return cleaner.get_text()
-		except HTMLParseError, e:
+		except Exception as e:
 			# Failed to parse the html, thus failed to clean it. so we must
 			# give up...
 			return None
@@ -515,7 +523,7 @@ class ArchivesParser(object):
 class HTMLCleaner(HTMLParser):
 	def __init__(self):
 		HTMLParser.__init__(self)
-		self.io = StringIO.StringIO()
+		self.io = io.StringIO()
 
 	def get_text(self):
 		return self.io.getvalue()
diff --git a/loader/lib/storage.py b/loader/lib/storage.py
index 92ffa45..8962b87 100644
--- a/loader/lib/storage.py
+++ b/loader/lib/storage.py
@@ -1,6 +1,6 @@
 import difflib
 
-from parser import ArchivesParser
+from .parser import ArchivesParser
 
 from lib.log import log, opstatus
 
@@ -144,9 +144,9 @@ class ArchivesParserStorage(ArchivesParser):
 			# holding other threads together.
 			if self.threadid:
 				# Already have a threadid, means that we have a glue message
-				print "Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid)
+				print("Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid))
 			else:
-				print "Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid
+				print("Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid)
 				# In this case, just pick the first thread from the list and merge into that
 				# one.
 				self.threadid = childrows[0][2]
@@ -254,31 +254,27 @@ class ArchivesParserStorage(ArchivesParser):
 			})
 		try:
 			id, _from, to, cc, subject, date, has_attachment, bodytxt = curs.fetchone()
-		except TypeError, e:
+		except TypeError as e:
 			f.write("---- %s ----\n" % self.msgid)
 			f.write("Could not re-find in archives (old id was %s): %s\n" % (oldid, e))
 			f.write("\n-------------------------------\n\n")
 			return
 
 
-		_from = _from.decode('utf8')
-		to = to.decode('utf8')
-		cc = cc.decode('utf8')
-		subject = subject.decode('utf8')
-		if (_from, to, cc, subject) != (self._from, self.to, self.cc, self.subject):
+		if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject):
 			log.status("Message %s has header changes " % self.msgid)
 			f.write("==== %s ====\n" % self.msgid)
 			for fn in ['_from', 'to', 'cc', 'subject']:
 				if getattr(self, fn) != eval(fn):
-					s = u"- {0}: {1}\n".format(fn, eval(fn))
-					d = u"+ {0}: {1}\n".format(fn, getattr(self, fn))
+					s = "- {0}: {1}\n".format(fn, eval(fn))
+					d = "+ {0}: {1}\n".format(fn, getattr(self, fn))
 					f.write(s)
 					f.write(d)
 			f.write("\n\n")
 
-		if bodytxt.decode('utf8') != self.bodytxt:
+		if bodytxt != self.bodytxt:
 			log.status("Message %s has body changes " % self.msgid)
-			tempdiff = list(difflib.unified_diff(bodytxt.decode('utf8').splitlines(),
+			tempdiff = list(difflib.unified_diff(bodytxt.splitlines(),
 												 self.bodytxt.splitlines(),
 												 fromfile='old',
 												 tofile='new',
@@ -289,7 +285,9 @@ class ArchivesParserStorage(ArchivesParser):
 				# Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From,
 				# which indicates the only change is in the From.
 				ok = True
-				for a,b,c in map(None, *([iter(tempdiff[2:])] * 3)):
+				tempdiff = tempdiff[2:]
+				while tempdiff:
+					a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0))
 					if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')):
 						ok=False
 						break
@@ -299,12 +297,12 @@ class ArchivesParserStorage(ArchivesParser):
 
 
 			# Generate a nicer diff
-			d = list(difflib.unified_diff(bodytxt.decode('utf8').splitlines(),
-												   self.bodytxt.splitlines(),
-												   fromfile='old',
-												   tofile='new',
-												   n=0,
-												   lineterm=''))
+			d = list(difflib.unified_diff(bodytxt.splitlines(),
+										  self.bodytxt.splitlines(),
+										  fromfile='old',
+										  tofile='new',
+										  n=0,
+										  lineterm=''))
 			if len(d) > 0:
 				f.write("---- %s ----\n" % self.msgid)
 				f.write("\n".join(d))
diff --git a/loader/lib/varnish.py b/loader/lib/varnish.py
index b49938b..f2a06c3 100644
--- a/loader/lib/varnish.py
+++ b/loader/lib/varnish.py
@@ -1,5 +1,4 @@
-import urllib
-import urllib2
+import requests
 
 from lib.log import log
 
@@ -23,13 +22,12 @@ class VarnishPurger(object):
 			else:
 				# Purging individual thread
 				exprlist.append('obj.http.x-pgthread ~ :%s:' % p)
-		purgedict = dict(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist))
+		purgedict = dict(list(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist)))
 		purgedict['n'] = len(exprlist)
-		r = urllib2.Request(purgeurl, data=urllib.urlencode(purgedict))
-		r.add_header('Content-type', 'application/x-www-form-urlencoded')
-		r.add_header('Host', 'www.postgresql.org')
-		r.get_method = lambda: 'POST'
-		u = urllib2.urlopen(r)
-		if u.getcode() != 200:
+		r = requests.post(purgeurl, data=purgedict, headers={
+			'Content-type': 'application/x-www-form-urlencoded',
+			'Host': 'www.postgresql.org',
+		})
+		if r.status_code != 200:
 			log.error("Failed to send purge request!")
 
diff --git a/loader/load_message.py b/loader/load_message.py
index b466860..efb8626 100755
--- a/loader/load_message.py
+++ b/loader/load_message.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # load_message.py - takes a single email or mbox formatted
 # file on stdin or in a file and reads it into the database.
@@ -8,9 +8,9 @@ import os
 import sys
 
 from optparse import OptionParser
-from ConfigParser import ConfigParser
-import urllib
-import urllib2
+from configparser import ConfigParser
+import urllib.request, urllib.parse, urllib.error
+import urllib.request, urllib.error, urllib.parse
 
 import psycopg2
 
@@ -25,7 +25,7 @@ def log_failed_message(listid, srctype, src, msg, err):
 		msgid = msg.msgid
 	except:
 		msgid = "<unknown>"
-	log.error("Failed to load message (msgid %s) from %s, spec %s: %s" % (msgid.encode('us-ascii', 'replace'), srctype, src, unicode(str(err), 'us-ascii', 'replace')))
+	log.error("Failed to load message (msgid %s) from %s, spec %s: %s" % (msgid.encode('us-ascii', 'replace'), srctype, src, str(str(err), 'us-ascii', 'replace')))
 
 	# We also put the data in the db. This happens in the main transaction
 	# so if the whole script dies, it goes away...
@@ -34,7 +34,7 @@ def log_failed_message(listid, srctype, src, msg, err):
 			'msgid': msgid,
 			'srctype': srctype,
 			'src': src,
-			'err': unicode(str(err), 'us-ascii', 'replace'),
+			'err': str(str(err), 'us-ascii', 'replace'),
 			})
 
 
@@ -51,27 +51,27 @@ if __name__ == "__main__":
 	(opt, args) = optparser.parse_args()
 
 	if (len(args)):
-		print "No bare arguments accepted"
+		print("No bare arguments accepted")
 		optparser.print_usage()
 		sys.exit(1)
 
 	if not opt.list:
-		print "List must be specified"
+		print("List must be specified")
 		optparser.print_usage()
 		sys.exit(1)
 
 	if opt.directory and opt.mbox:
-		print "Can't specify both directory and mbox!"
+		print("Can't specify both directory and mbox!")
 		optparser.print_usage()
 		sys.exit(1)
 
 	if opt.force_date and (opt.directory or opt.mbox) and not opt.filter_msgid:
-		print "Can't use force_date with directory or mbox - only individual messages"
+		print("Can't use force_date with directory or mbox - only individual messages")
 		optparser.print_usage()
 		sys.exit(1)
 
 	if opt.filter_msgid and not (opt.directory or opt.mbox):
-		print "filter_msgid makes no sense without directory or mbox!"
+		print("filter_msgid makes no sense without directory or mbox!")
 		optparser.print_usage()
 		sys.exit(1)
 
@@ -93,8 +93,8 @@ if __name__ == "__main__":
 	try:
 		curs.execute("SET statement_timeout='30s'")
 		curs.execute("SELECT pg_advisory_xact_lock(8059944559669076)")
-	except Exception, e:
-		print("Failed to wait on advisory lock: %s" % e)
+	except Exception as e:
+		print(("Failed to wait on advisory lock: %s" % e))
 		sys.exit(1)
 
 	# Get the listid we're working on
@@ -121,36 +121,37 @@ if __name__ == "__main__":
 					continue
 				try:
 					ap.analyze(date_override=opt.force_date)
-				except IgnorableException, e:
+				except IgnorableException as e:
 					log_failed_message(listid, "directory", os.path.join(opt.directory, x), ap, e)
 					opstatus.failed += 1
 					continue
 				ap.store(conn, listid)
 				purges.update(ap.purges)
 			if opt.interactive:
-				print "Interactive mode, committing transaction"
+				print("Interactive mode, committing transaction")
 				conn.commit()
-				print "Proceed to next message with Enter, or input a period (.) to stop processing"
-				x = raw_input()
+				print("Proceed to next message with Enter, or input a period (.) to stop processing")
+				x = input()
 				if x == '.':
-					print "Ok, aborting!"
+					print("Ok, aborting!")
 					break
-				print "---------------------------------"
+				print("---------------------------------")
 	elif opt.mbox:
 		if not os.path.isfile(opt.mbox):
-			print "File %s does not exist" % opt.mbox
+			print("File %s does not exist" % opt.mbox)
 			sys.exit(1)
 		mboxparser = MailboxBreakupParser(opt.mbox)
 		while not mboxparser.EOF:
 			ap = ArchivesParserStorage()
-			msg = mboxparser.next()
-			if not msg: break
+			msg = next(mboxparser)
+			if not msg:
+				break
 			ap.parse(msg)
 			if opt.filter_msgid and not ap.is_msgid(opt.filter_msgid):
 				continue
 			try:
 				ap.analyze(date_override=opt.force_date)
-			except IgnorableException, e:
+			except IgnorableException as e:
 				log_failed_message(listid, "mbox", opt.mbox, ap, e)
 				opstatus.failed += 1
 				continue
@@ -163,10 +164,10 @@ if __name__ == "__main__":
 	else:
 		# Parse single message on stdin
 		ap = ArchivesParserStorage()
-		ap.parse(sys.stdin)
+		ap.parse(sys.stdin.buffer)
 		try:
 			ap.analyze(date_override=opt.force_date)
-		except IgnorableException, e:
+		except IgnorableException as e:
 			log_failed_message(listid, "stdin","", ap, e)
 			conn.close()
 			sys.exit(1)
diff --git a/loader/pglister_sync.py b/loader/pglister_sync.py
index 32c6820..e38cdd4 100755
--- a/loader/pglister_sync.py
+++ b/loader/pglister_sync.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 # Synchronize list info from pglister
@@ -6,7 +6,7 @@
 import os
 import sys
 import argparse
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
 import psycopg2
 import requests
 
@@ -44,7 +44,7 @@ if __name__=="__main__":
 	# For groups, just add them if they don't exist
 	groups = {g['group']['id']:g['group']['groupname'] for g in obj}
 
-	for id,name in groups.items():
+	for id,name in list(groups.items()):
 		curs.execute("SELECT EXISTS (SELECT 1 FROM listgroups WHERE groupname=%(group)s)", {
 			'group': name,
 		})
@@ -52,7 +52,7 @@ if __name__=="__main__":
 			curs.execute("INSERT INTO listgroups (groupname, sortkey) VALUES (%(group)s, 100) RETURNING groupname", {
 				'group': name,
 			})
-			print "Added group %s" % name
+			print("Added group %s" % name)
 
 	# Add any missing lists, and synchronize their contents.
 	for l in obj:
@@ -66,7 +66,7 @@ if __name__=="__main__":
 				'groupname': l['group']['groupname'],
 			})
 			listid, name = curs.fetchone()
-			print "Added list %s" % name
+			print("Added list %s" % name)
 		else:
 			listid, name = curs.fetchone()
 			curs.execute("UPDATE lists SET shortdesc=%(name)s, description=%(desc)s, groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s), active=true WHERE listid=%(id)s AND NOT (active AND shortdesc=%(name)s AND description=%(desc)s AND groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s)) RETURNING listname", {
@@ -76,7 +76,7 @@ if __name__=="__main__":
 				'groupname': l['group']['groupname'],
 			})
 			for n, in curs.fetchall():
-				print "Updated list %s " % n
+				print("Updated list %s " % n)
 
 		if do_subscribers:
 			# If we synchronize subscribers, we do so on all lists for now.
@@ -86,9 +86,9 @@ if __name__=="__main__":
 			})
 			for what, who in curs.fetchall():
 				if what == 'ins':
-					print "Added subscriber %s to list %s" % (who, name)
+					print("Added subscriber %s to list %s" % (who, name))
 				else:
-					print "Removed subscriber %s from list %s" % (who, name)
+					print("Removed subscriber %s from list %s" % (who, name))
 
 
 	# We don't remove lists ever, because we probably want to keep archives around.
@@ -97,10 +97,10 @@ if __name__=="__main__":
 		'lists': [l['listname'] for l in obj],
 	})
 	for n, in curs.fetchall():
-		print "List %s exists in archives, but not in upstream! Should it be marked inactive?" % n
+		print("List %s exists in archives, but not in upstream! Should it be marked inactive?" % n)
 
 	if args.dryrun:
-		print "Dry-run, rolling back"
+		print("Dry-run, rolling back")
 		conn.rollback()
 	else:
 		conn.commit()
diff --git a/loader/purge_frontend_message.py b/loader/purge_frontend_message.py
index edab70c..72899e8 100755
--- a/loader/purge_frontend_message.py
+++ b/loader/purge_frontend_message.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # purge_frontend_message.py - issue varnish purge for the message
 # in question, to for example force an expire of a hidden message.
@@ -8,7 +8,7 @@ import os
 import sys
 
 from optparse import OptionParser
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
 
 import psycopg2
 
@@ -21,12 +21,12 @@ if __name__ == "__main__":
 	(opt, args) = optparser.parse_args()
 
 	if (len(args)):
-		print "No bare arguments accepted"
+		print("No bare arguments accepted")
 		optparser.print_help()
 		sys.exit(1)
 
 	if not opt.msgid:
-		print "Message-id must be specified"
+		print("Message-id must be specified")
 		optparser.print_help()
 		sys.exit(1)
 
diff --git a/loader/reparse_message.py b/loader/reparse_message.py
index 8027058..df4501a 100755
--- a/loader/reparse_message.py
+++ b/loader/reparse_message.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # reparse_message.py - using the rawtxt stored in the database,
 # redo the parsing of it and overwrite it with itself. Used when
@@ -10,8 +10,8 @@ import sys
 import codecs
 
 from optparse import OptionParser
-from ConfigParser import ConfigParser
-from StringIO import StringIO
+from configparser import ConfigParser
+from io import BytesIO
 from datetime import datetime, timedelta
 
 import psycopg2
@@ -45,16 +45,16 @@ if __name__ == "__main__":
 	(opt, args) = optparser.parse_args()
 
 	if (len(args)):
-		print "No bare arguments accepted"
+		print("No bare arguments accepted")
 		optparser.print_usage()
 		sys.exit(1)
 
 	if sum([1 for x in [opt.all, opt.sample, opt.msgid] if x]) != 1:
-		print "Must specify exactly one of --msgid, --all and --sample"
+		print("Must specify exactly one of --msgid, --all and --sample")
 		sys.exit(1)
 
 	if not opt.update and os.path.exists('reparse.diffs'):
-		print "File reparse.diffs already exists. Remove or rename and try again."
+		print("File reparse.diffs already exists. Remove or rename and try again.")
 		sys.exit(1)
 
 	log.set(opt.verbose)
@@ -97,10 +97,10 @@ if __name__ == "__main__":
 	for id, rawtxt in ResultIter(curs):
 		num += 1
 		ap = ArchivesParserStorage()
-		ap.parse(StringIO(rawtxt))
+		ap.parse(BytesIO(rawtxt))
 		try:
 			ap.analyze(date_override=opt.force_date)
-		except IgnorableException, e:
+		except IgnorableException as e:
 			if opt.update:
 				raise e
 			f.write("Exception loading %s: %s" % (id, e))
@@ -119,14 +119,14 @@ if __name__ == "__main__":
 			sys.stdout.flush()
 			laststatus = datetime.now()
 
-	print ""
+	print("")
 
 	if opt.update:
 		opstatus.print_status()
 		if not opt.commit:
 			while True:
 				print("OK to commit transaction? ")
-				a = raw_input().lower().strip()
+				a = input().lower().strip()
 				if a == 'y' or a == 'yes':
 					print("Ok, committing.")
 					break