From: Magnus Hagander Date: Thu, 3 Jan 2019 10:04:29 +0000 (+0100) Subject: Update loader scripts to use python3 syntax X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=bb5775efe5f938461537e0c95c7c110875e4718b;p=pgarchives.git Update loader scripts to use python3 syntax Some minor cleanups as well, but mostly just the output of the 2to3 tool and some manual changes. --- diff --git a/loader/clean_date.py b/loader/clean_date.py index faac7ab..4ea2951 100755 --- a/loader/clean_date.py +++ b/loader/clean_date.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Clean up old, broken, dates # @@ -7,17 +7,17 @@ import os import sys import re -from ConfigParser import ConfigParser +from configparser import ConfigParser from email.parser import Parser -from urllib import urlopen +from urllib.request import urlopen import dateutil.parser import psycopg2 def scan_message(messageid, olddate, curs): u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid - print "Scanning message at %s (date reported as %s)..." % (u, olddate) + print("Scanning message at %s (date reported as %s)..." % (u, olddate)) f = urlopen(u) p = Parser() @@ -26,10 +26,10 @@ def scan_message(messageid, olddate, curs): # Can be either one of them, but we really don't care... ds = None - for k,r in msg.items(): + for k,r in list(msg.items()): if k != 'Received': continue - print "Trying on %s" % r + print("Trying on %s" % r) m = re.search(';\s*(.*)$', r) if m: ds = m.group(1) @@ -40,23 +40,23 @@ def scan_message(messageid, olddate, curs): break if not ds: - print "Could not find date. Sorry." + print("Could not find date. Sorry.") return False d = None try: d = dateutil.parser.parse(ds) except: - print "Could not parse date '%s', sorry." % ds + print("Could not parse date '%s', sorry." % ds) return while True: - x = raw_input("Parsed this as date %s. Update? " % d) + x = input("Parsed this as date %s. Update? " % d) if x.upper() == 'Y': curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", { 'd': d, 'm': messageid, }) - print "Updated." + print("Updated.") break elif x.upper() == 'N': break @@ -74,4 +74,4 @@ if __name__ == "__main__": scan_message(messageid, date, curs) conn.commit() - print "Done." + print("Done.") diff --git a/loader/generate_mbox.py b/loader/generate_mbox.py index 42404c4..c2299e1 100755 --- a/loader/generate_mbox.py +++ b/loader/generate_mbox.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # generate_mbox.py - generate an mbox file from the rawtxt stored # in the datatabase. @@ -11,27 +11,34 @@ import calendar import re import argparse -from ConfigParser import ConfigParser +from configparser import ConfigParser import email.parser +import email.policy import email.generator -from StringIO import StringIO +from io import BytesIO import psycopg2 def generate_single_mbox(conn, listid, year, month, destination): curs = conn.cursor() - curs.execute("SELECT rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", { + curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", { 'listid': listid, 'startdate': date(year, month, 1), 'enddate': date(year, month, calendar.monthrange(year, month)[1]), }) - with open(destination, 'w') as f: - for raw, in curs: - s = StringIO(raw) - parser = email.parser.Parser() + with open(destination, 'w', encoding='utf8') as f: + for id, raw, in curs: + s = BytesIO(raw) + parser = email.parser.BytesParser(policy=email.policy.compat32) msg = parser.parse(s) - f.write(msg.as_string(unixfrom=True)) + try: + x = msg.as_string(unixfrom=True) + f.write(x) + except UnicodeEncodeError as e: + print("Not including {0}, unicode error".format(msg['message-id'])) + except Exception as e: + print("Not including {0}, exception {1}".format(msg['message-id'], e)) if __name__ == "__main__": @@ -46,14 +53,14 @@ if __name__ == "__main__": if args.auto: if (args.list or args.month): - print "Must not specify list and month when auto-generating!" + print("Must not specify list and month when auto-generating!") sys.exit(1) if not os.path.isdir(args.destination): - print "Destination must be a directory, and exist, when auto-generating" + print("Destination must be a directory, and exist, when auto-generating") sys.exit(1) else: if not (args.list and args.month and args.destination): - print "Must specify list, month and destination when generating a single mailbox" + print("Must specify list, month and destination when generating a single mailbox") parser.print_help() sys.exit(1) @@ -85,14 +92,14 @@ if __name__ == "__main__": if not os.path.isdir(fullpath): os.makedirs(fullpath) if not args.quiet: - print "Generating {0}-{1} for {2}".format(year, month, lname) + print("Generating {0}-{1} for {2}".format(year, month, lname)) generate_single_mbox(conn, lid, year, month, os.path.join(fullpath, "{0}.{0:04d}{1:02d}".format(year, month))) else: # Parse year and month m = re.match('^(\d{4})-(\d{2})$', args.month) if not m: - print "Month must be specified on format YYYY-MM, not {0}".format(args.month) + print("Month must be specified on format YYYY-MM, not {0}".format(args.month)) sys.exit(1) year = int(m.group(1)) month = int(m.group(2)) @@ -101,9 +108,9 @@ if __name__ == "__main__": 'name': args.list, }) if curs.rowcount != 1: - print "List {0} not found.".format(args.list) + print("List {0} not found.".format(args.list)) sys.exit(1) if not args.quiet: - print "Generating {0}-{1} for {2}".format(year, month, args.list) + print("Generating {0}-{1} for {2}".format(year, month, args.list)) generate_single_mbox(conn, curs.fetchone()[0], year, month, args.destination) diff --git a/loader/hide_message.py b/loader/hide_message.py index 51bffc6..8bb9359 100755 --- a/loader/hide_message.py +++ b/loader/hide_message.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # hide_message.py - hide a message (spam etc) in the archives, including # frontend expiry. @@ -8,7 +8,7 @@ import os import sys from optparse import OptionParser -from ConfigParser import ConfigParser +from configparser import ConfigParser import psycopg2 @@ -29,12 +29,12 @@ if __name__ == "__main__": (opt, args) = optparser.parse_args() if (len(args)): - print "No bare arguments accepted" + print("No bare arguments accepted") optparser.print_help() sys.exit(1) if not opt.msgid: - print "Message-id must be specified" + print("Message-id must be specified") optparser.print_help() sys.exit(1) @@ -52,34 +52,34 @@ if __name__ == "__main__": 'msgid': opt.msgid, }) if curs.rowcount <= 0: - print "Message not found." + print("Message not found.") sys.exit(1) id, threadid, previous = curs.fetchone() # Message found, ask for reason reason = 0 - print "Current status: %s" % reasons[previous or 0] - print "\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons))) + print("Current status: %s" % reasons[previous or 0]) + print("\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons)))) while True: - reason = raw_input('Reason for hiding message? ') + reason = input('Reason for hiding message? ') try: reason = int(reason) except ValueError: continue if reason == 0: - print "Un-hiding message" + print("Un-hiding message") reason = None break else: try: - print "Hiding message for reason: %s" % reasons[reason] + print("Hiding message for reason: %s" % reasons[reason]) except: continue break if previous == reason: - print "No change in status, not updating" + print("No change in status, not updating") conn.close() sys.exit(0) @@ -88,7 +88,7 @@ if __name__ == "__main__": 'id': id, }) if curs.rowcount != 1: - print "Failed to update! Not hiding!" + print("Failed to update! Not hiding!") conn.rollback() sys.exit(0) conn.commit() @@ -96,4 +96,4 @@ if __name__ == "__main__": VarnishPurger(cfg).purge([int(threadid), ]) conn.close() - print "Message hidden and varnish purge triggered." + print("Message hidden and varnish purge triggered.") diff --git a/loader/lib/log.py b/loader/lib/log.py index 82e72fb..5b6379a 100644 --- a/loader/lib/log.py +++ b/loader/lib/log.py @@ -7,13 +7,13 @@ class Log(object): def status(self, msg): if self.verbose: - print msg + print(msg) def log(self, msg): - print msg + print(msg) def error(self, msg): - print msg + print(msg) def print_status(self): opstatus.print_status() @@ -27,7 +27,7 @@ class OpStatus(object): self.overwritten = 0 def print_status(self): - print "%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten) + print("%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten)) log = Log() diff --git a/loader/lib/mbox.py b/loader/lib/mbox.py index c4982ed..77c83b0 100644 --- a/loader/lib/mbox.py +++ b/loader/lib/mbox.py @@ -1,5 +1,5 @@ from subprocess import Popen, PIPE -import cStringIO as StringIO +from io import BytesIO # The hack of all hacks... # The python mbox parser fails to split some messages from mj2 @@ -8,6 +8,7 @@ import cStringIO as StringIO # reassemble it to one long stream with a unique separator, # and then split it apart again in python.. Isn't it cute? SEPARATOR = "ABCARCHBREAK123" * 50 +bSEPARATOR = bytes(SEPARATOR, 'ascii') class MailboxBreakupParser(object): def __init__(self, fn): @@ -27,21 +28,21 @@ class MailboxBreakupParser(object): def stderr_output(self): return self.pipe.stderr.read() - def next(self): - sio = StringIO.StringIO() + def __next__(self): + sio = BytesIO() while True: try: - l = self.pipe.stdout.next() + l = next(self.pipe.stdout) except StopIteration: # End of file! self.EOF = True if sio.tell() == 0: # Nothing read yet, so return None instead of an empty - # stringio + # bytesio return None sio.seek(0) return sio - if l.rstrip() == SEPARATOR: + if l.rstrip() == bSEPARATOR: # Reached a separator. Meaning we're not at end of file, # but we're at end of message. sio.seek(0) diff --git a/loader/lib/parser.py b/loader/lib/parser.py index 15009c4..8ee25c5 100644 --- a/loader/lib/parser.py +++ b/loader/lib/parser.py @@ -2,23 +2,24 @@ import re import datetime import dateutil.parser -from email.parser import Parser -from email.header import decode_header +from email.parser import BytesParser +from email.header import decode_header, Header from email.errors import HeaderParseError -from HTMLParser import HTMLParser, HTMLParseError +from email.policy import compat32 +from html.parser import HTMLParser import tidylib -import StringIO +import io from lib.exception import IgnorableException from lib.log import log class ArchivesParser(object): def __init__(self): - self.parser = Parser() + self.parser = BytesParser(policy=compat32) def parse(self, stream): self.rawtxt = stream.read() - self.msg = self.parser.parse(StringIO.StringIO(self.rawtxt)) + self.msg = self.parser.parse(io.BytesIO(self.rawtxt)) def is_msgid(self, msgid): # Look for a specific messageid. This means we might parse it twice, @@ -26,7 +27,7 @@ class ArchivesParser(object): try: if self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) == msgid: return True - except Exception, e: + except Exception as e: return False def analyze(self, date_override=None): @@ -49,13 +50,13 @@ class ArchivesParser(object): self.parents = [] # The first one is in-reply-to, if it exists if self.get_optional('in-reply-to'): - m = self.clean_messageid(self.get_optional('in-reply-to'), True) + m = self.clean_messageid(self.decode_mime_header(self.get_optional('in-reply-to')), True) if m: self.parents.append(m) # Then we add all References values, in backwards order if self.get_optional('references'): - cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.get_optional('references').split())] + cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.decode_mime_header(self.get_optional('references')).split())] # Can't do this with a simple self.parents.extend() due to broken # mailers that add the same reference more than once. And we can't # use a set() to make it unique, because order is very important @@ -130,19 +131,19 @@ class ArchivesParser(object): params = msg.get_params() if not params: # No content-type, so we assume us-ascii - return unicode(b, 'us-ascii', errors='ignore') + return str(b, 'us-ascii', errors='ignore') for k,v in params: if k.lower() == 'charset': charset = v break if charset: try: - return unicode(b, self.clean_charset(charset), errors='ignore') - except LookupError, e: + return str(b, self.clean_charset(charset), errors='ignore') + except LookupError as e: raise IgnorableException("Failed to get unicode payload: %s" % e) else: # XXX: reasonable default? - return unicode(b, errors='ignore') + return str(b, errors='ignore') # Return None or empty string, depending on what we got back return b @@ -154,8 +155,8 @@ class ArchivesParser(object): if b: # Python bug 9133, allows unicode surrogate pairs - which PostgreSQL will # later reject.. - if b.find(u'\udbff\n\udef8'): - b = b.replace(u'\udbff\n\udef8', '') + if b.find('\udbff\n\udef8'): + b = b.replace('\udbff\n\udef8', '') # Remove postgres specific mail footer - if it's there m = self._re_footer.match(b) @@ -249,15 +250,15 @@ class ArchivesParser(object): # If this is a header-encoded filename, start by decoding that if filename.startswith('=?'): decoded, encoding = decode_header(filename)[0] - return unicode(decoded, encoding, errors='ignore') + return str(decoded, encoding, errors='ignore') # If it's already unicode, just return it - if isinstance(filename, unicode): + if isinstance(filename, str): return filename # Anything that's not UTF8, we just get rid of. We can live with # filenames slightly mangled in this case. - return unicode(filename, 'utf-8', errors='ignore') + return str(filename, 'utf-8', errors='ignore') def _extract_filename(self, container): # Try to get the filename for an attachment in the container. @@ -324,7 +325,7 @@ class ArchivesParser(object): # by majordomo with the footer. So if that one is present, # we need to explicitly exclude it again. b = container.get_payload(decode=True) - if not self._re_footer.match(b): + if isinstance(b, str) and not self._re_footer.match(b): # We know there is no name for this one self.attachments.append((None, container.get_content_type(), b)) return @@ -423,9 +424,14 @@ class ArchivesParser(object): # enough... dp = datetime.datetime(*dp.utctimetuple()[:6]) return dp - except Exception, e: + except Exception as e: raise IgnorableException("Failed to parse date '%s': %s" % (d, e)) + def _maybe_decode(self, s, charset): + if isinstance(s, str): + return s.strip(' ') + return str(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore').strip(' ') + # Workaround for broken quoting in some MUAs (see below) _re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE) def _decode_mime_header(self, hdr, email_workaround): @@ -449,28 +455,32 @@ class ArchivesParser(object): hdr = self._re_mailworkaround.sub(r'\1', hdr) try: - return " ".join([unicode(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore') for s,charset in decode_header(hdr)]) - except HeaderParseError, e: + return " ".join([self._maybe_decode(s, charset) for s, charset in decode_header(hdr)]) + except HeaderParseError as e: # Parser error is typically someone specifying an encoding, # but then not actually using that encoding. We'll do the best # we can, which is cut it down to ascii and ignore errors - return unicode(hdr, 'us-ascii', errors='ignore') + return str(hdr, 'us-ascii', errors='ignore').strip(' ') def decode_mime_header(self, hdr, email_workaround=False): try: + if isinstance(hdr, Header): + hdr = hdr.encode() + h = self._decode_mime_header(hdr, email_workaround) if h: return h.replace("\0", "") return '' - except LookupError, e: + except LookupError as e: raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, e)) - except ValueError, ve: + except ValueError as ve: raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, ve)) def get_mandatory(self, fieldname): try: x = self.msg[fieldname] - if x==None: raise Exception() + if x==None: + raise Exception() return x except: raise IgnorableException("Mandatory field '%s' is missing" % fieldname) @@ -496,17 +506,15 @@ class ArchivesParser(object): 'show-info': 0, }) if errors: - print("HTML tidy failed for %s!" % self.msgid) + print(("HTML tidy failed for %s!" % self.msgid)) print(errors) return None - if type(html) == str: - html = unicode(html, 'utf8') try: cleaner = HTMLCleaner() cleaner.feed(html) return cleaner.get_text() - except HTMLParseError, e: + except Exception as e: # Failed to parse the html, thus failed to clean it. so we must # give up... return None @@ -515,7 +523,7 @@ class ArchivesParser(object): class HTMLCleaner(HTMLParser): def __init__(self): HTMLParser.__init__(self) - self.io = StringIO.StringIO() + self.io = io.StringIO() def get_text(self): return self.io.getvalue() diff --git a/loader/lib/storage.py b/loader/lib/storage.py index 92ffa45..8962b87 100644 --- a/loader/lib/storage.py +++ b/loader/lib/storage.py @@ -1,6 +1,6 @@ import difflib -from parser import ArchivesParser +from .parser import ArchivesParser from lib.log import log, opstatus @@ -144,9 +144,9 @@ class ArchivesParserStorage(ArchivesParser): # holding other threads together. if self.threadid: # Already have a threadid, means that we have a glue message - print "Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid) + print("Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid)) else: - print "Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid + print("Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid) # In this case, just pick the first thread from the list and merge into that # one. self.threadid = childrows[0][2] @@ -254,31 +254,27 @@ class ArchivesParserStorage(ArchivesParser): }) try: id, _from, to, cc, subject, date, has_attachment, bodytxt = curs.fetchone() - except TypeError, e: + except TypeError as e: f.write("---- %s ----\n" % self.msgid) f.write("Could not re-find in archives (old id was %s): %s\n" % (oldid, e)) f.write("\n-------------------------------\n\n") return - _from = _from.decode('utf8') - to = to.decode('utf8') - cc = cc.decode('utf8') - subject = subject.decode('utf8') - if (_from, to, cc, subject) != (self._from, self.to, self.cc, self.subject): + if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject): log.status("Message %s has header changes " % self.msgid) f.write("==== %s ====\n" % self.msgid) for fn in ['_from', 'to', 'cc', 'subject']: if getattr(self, fn) != eval(fn): - s = u"- {0}: {1}\n".format(fn, eval(fn)) - d = u"+ {0}: {1}\n".format(fn, getattr(self, fn)) + s = "- {0}: {1}\n".format(fn, eval(fn)) + d = "+ {0}: {1}\n".format(fn, getattr(self, fn)) f.write(s) f.write(d) f.write("\n\n") - if bodytxt.decode('utf8') != self.bodytxt: + if bodytxt != self.bodytxt: log.status("Message %s has body changes " % self.msgid) - tempdiff = list(difflib.unified_diff(bodytxt.decode('utf8').splitlines(), + tempdiff = list(difflib.unified_diff(bodytxt.splitlines(), self.bodytxt.splitlines(), fromfile='old', tofile='new', @@ -289,7 +285,9 @@ class ArchivesParserStorage(ArchivesParser): # Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From, # which indicates the only change is in the From. ok = True - for a,b,c in map(None, *([iter(tempdiff[2:])] * 3)): + tempdiff = tempdiff[2:] + while tempdiff: + a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0)) if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')): ok=False break @@ -299,12 +297,12 @@ class ArchivesParserStorage(ArchivesParser): # Generate a nicer diff - d = list(difflib.unified_diff(bodytxt.decode('utf8').splitlines(), - self.bodytxt.splitlines(), - fromfile='old', - tofile='new', - n=0, - lineterm='')) + d = list(difflib.unified_diff(bodytxt.splitlines(), + self.bodytxt.splitlines(), + fromfile='old', + tofile='new', + n=0, + lineterm='')) if len(d) > 0: f.write("---- %s ----\n" % self.msgid) f.write("\n".join(d)) diff --git a/loader/lib/varnish.py b/loader/lib/varnish.py index b49938b..f2a06c3 100644 --- a/loader/lib/varnish.py +++ b/loader/lib/varnish.py @@ -1,5 +1,4 @@ -import urllib -import urllib2 +import requests from lib.log import log @@ -23,13 +22,12 @@ class VarnishPurger(object): else: # Purging individual thread exprlist.append('obj.http.x-pgthread ~ :%s:' % p) - purgedict = dict(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist)) + purgedict = dict(list(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist))) purgedict['n'] = len(exprlist) - r = urllib2.Request(purgeurl, data=urllib.urlencode(purgedict)) - r.add_header('Content-type', 'application/x-www-form-urlencoded') - r.add_header('Host', 'www.postgresql.org') - r.get_method = lambda: 'POST' - u = urllib2.urlopen(r) - if u.getcode() != 200: + r = requests.post(purgeurl, data=purgedict, headers={ + 'Content-type': 'application/x-www-form-urlencoded', + 'Host': 'www.postgresql.org', + }) + if r.status_code != 200: log.error("Failed to send purge request!") diff --git a/loader/load_message.py b/loader/load_message.py index b466860..efb8626 100755 --- a/loader/load_message.py +++ b/loader/load_message.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # load_message.py - takes a single email or mbox formatted # file on stdin or in a file and reads it into the database. @@ -8,9 +8,9 @@ import os import sys from optparse import OptionParser -from ConfigParser import ConfigParser -import urllib -import urllib2 +from configparser import ConfigParser +import urllib.request, urllib.parse, urllib.error +import urllib.request, urllib.error, urllib.parse import psycopg2 @@ -25,7 +25,7 @@ def log_failed_message(listid, srctype, src, msg, err): msgid = msg.msgid except: msgid = "" - log.error("Failed to load message (msgid %s) from %s, spec %s: %s" % (msgid.encode('us-ascii', 'replace'), srctype, src, unicode(str(err), 'us-ascii', 'replace'))) + log.error("Failed to load message (msgid %s) from %s, spec %s: %s" % (msgid.encode('us-ascii', 'replace'), srctype, src, str(str(err), 'us-ascii', 'replace'))) # We also put the data in the db. This happens in the main transaction # so if the whole script dies, it goes away... @@ -34,7 +34,7 @@ def log_failed_message(listid, srctype, src, msg, err): 'msgid': msgid, 'srctype': srctype, 'src': src, - 'err': unicode(str(err), 'us-ascii', 'replace'), + 'err': str(str(err), 'us-ascii', 'replace'), }) @@ -51,27 +51,27 @@ if __name__ == "__main__": (opt, args) = optparser.parse_args() if (len(args)): - print "No bare arguments accepted" + print("No bare arguments accepted") optparser.print_usage() sys.exit(1) if not opt.list: - print "List must be specified" + print("List must be specified") optparser.print_usage() sys.exit(1) if opt.directory and opt.mbox: - print "Can't specify both directory and mbox!" + print("Can't specify both directory and mbox!") optparser.print_usage() sys.exit(1) if opt.force_date and (opt.directory or opt.mbox) and not opt.filter_msgid: - print "Can't use force_date with directory or mbox - only individual messages" + print("Can't use force_date with directory or mbox - only individual messages") optparser.print_usage() sys.exit(1) if opt.filter_msgid and not (opt.directory or opt.mbox): - print "filter_msgid makes no sense without directory or mbox!" + print("filter_msgid makes no sense without directory or mbox!") optparser.print_usage() sys.exit(1) @@ -93,8 +93,8 @@ if __name__ == "__main__": try: curs.execute("SET statement_timeout='30s'") curs.execute("SELECT pg_advisory_xact_lock(8059944559669076)") - except Exception, e: - print("Failed to wait on advisory lock: %s" % e) + except Exception as e: + print(("Failed to wait on advisory lock: %s" % e)) sys.exit(1) # Get the listid we're working on @@ -121,36 +121,37 @@ if __name__ == "__main__": continue try: ap.analyze(date_override=opt.force_date) - except IgnorableException, e: + except IgnorableException as e: log_failed_message(listid, "directory", os.path.join(opt.directory, x), ap, e) opstatus.failed += 1 continue ap.store(conn, listid) purges.update(ap.purges) if opt.interactive: - print "Interactive mode, committing transaction" + print("Interactive mode, committing transaction") conn.commit() - print "Proceed to next message with Enter, or input a period (.) to stop processing" - x = raw_input() + print("Proceed to next message with Enter, or input a period (.) to stop processing") + x = input() if x == '.': - print "Ok, aborting!" + print("Ok, aborting!") break - print "---------------------------------" + print("---------------------------------") elif opt.mbox: if not os.path.isfile(opt.mbox): - print "File %s does not exist" % opt.mbox + print("File %s does not exist" % opt.mbox) sys.exit(1) mboxparser = MailboxBreakupParser(opt.mbox) while not mboxparser.EOF: ap = ArchivesParserStorage() - msg = mboxparser.next() - if not msg: break + msg = next(mboxparser) + if not msg: + break ap.parse(msg) if opt.filter_msgid and not ap.is_msgid(opt.filter_msgid): continue try: ap.analyze(date_override=opt.force_date) - except IgnorableException, e: + except IgnorableException as e: log_failed_message(listid, "mbox", opt.mbox, ap, e) opstatus.failed += 1 continue @@ -163,10 +164,10 @@ if __name__ == "__main__": else: # Parse single message on stdin ap = ArchivesParserStorage() - ap.parse(sys.stdin) + ap.parse(sys.stdin.buffer) try: ap.analyze(date_override=opt.force_date) - except IgnorableException, e: + except IgnorableException as e: log_failed_message(listid, "stdin","", ap, e) conn.close() sys.exit(1) diff --git a/loader/pglister_sync.py b/loader/pglister_sync.py index 32c6820..e38cdd4 100755 --- a/loader/pglister_sync.py +++ b/loader/pglister_sync.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Synchronize list info from pglister @@ -6,7 +6,7 @@ import os import sys import argparse -from ConfigParser import ConfigParser +from configparser import ConfigParser import psycopg2 import requests @@ -44,7 +44,7 @@ if __name__=="__main__": # For groups, just add them if they don't exist groups = {g['group']['id']:g['group']['groupname'] for g in obj} - for id,name in groups.items(): + for id,name in list(groups.items()): curs.execute("SELECT EXISTS (SELECT 1 FROM listgroups WHERE groupname=%(group)s)", { 'group': name, }) @@ -52,7 +52,7 @@ if __name__=="__main__": curs.execute("INSERT INTO listgroups (groupname, sortkey) VALUES (%(group)s, 100) RETURNING groupname", { 'group': name, }) - print "Added group %s" % name + print("Added group %s" % name) # Add any missing lists, and synchronize their contents. for l in obj: @@ -66,7 +66,7 @@ if __name__=="__main__": 'groupname': l['group']['groupname'], }) listid, name = curs.fetchone() - print "Added list %s" % name + print("Added list %s" % name) else: listid, name = curs.fetchone() curs.execute("UPDATE lists SET shortdesc=%(name)s, description=%(desc)s, groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s), active=true WHERE listid=%(id)s AND NOT (active AND shortdesc=%(name)s AND description=%(desc)s AND groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s)) RETURNING listname", { @@ -76,7 +76,7 @@ if __name__=="__main__": 'groupname': l['group']['groupname'], }) for n, in curs.fetchall(): - print "Updated list %s " % n + print("Updated list %s " % n) if do_subscribers: # If we synchronize subscribers, we do so on all lists for now. @@ -86,9 +86,9 @@ if __name__=="__main__": }) for what, who in curs.fetchall(): if what == 'ins': - print "Added subscriber %s to list %s" % (who, name) + print("Added subscriber %s to list %s" % (who, name)) else: - print "Removed subscriber %s from list %s" % (who, name) + print("Removed subscriber %s from list %s" % (who, name)) # We don't remove lists ever, because we probably want to keep archives around. @@ -97,10 +97,10 @@ if __name__=="__main__": 'lists': [l['listname'] for l in obj], }) for n, in curs.fetchall(): - print "List %s exists in archives, but not in upstream! Should it be marked inactive?" % n + print("List %s exists in archives, but not in upstream! Should it be marked inactive?" % n) if args.dryrun: - print "Dry-run, rolling back" + print("Dry-run, rolling back") conn.rollback() else: conn.commit() diff --git a/loader/purge_frontend_message.py b/loader/purge_frontend_message.py index edab70c..72899e8 100755 --- a/loader/purge_frontend_message.py +++ b/loader/purge_frontend_message.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # purge_frontend_message.py - issue varnish purge for the message # in question, to for example force an expire of a hidden message. @@ -8,7 +8,7 @@ import os import sys from optparse import OptionParser -from ConfigParser import ConfigParser +from configparser import ConfigParser import psycopg2 @@ -21,12 +21,12 @@ if __name__ == "__main__": (opt, args) = optparser.parse_args() if (len(args)): - print "No bare arguments accepted" + print("No bare arguments accepted") optparser.print_help() sys.exit(1) if not opt.msgid: - print "Message-id must be specified" + print("Message-id must be specified") optparser.print_help() sys.exit(1) diff --git a/loader/reparse_message.py b/loader/reparse_message.py index 8027058..df4501a 100755 --- a/loader/reparse_message.py +++ b/loader/reparse_message.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # reparse_message.py - using the rawtxt stored in the database, # redo the parsing of it and overwrite it with itself. Used when @@ -10,8 +10,8 @@ import sys import codecs from optparse import OptionParser -from ConfigParser import ConfigParser -from StringIO import StringIO +from configparser import ConfigParser +from io import BytesIO from datetime import datetime, timedelta import psycopg2 @@ -45,16 +45,16 @@ if __name__ == "__main__": (opt, args) = optparser.parse_args() if (len(args)): - print "No bare arguments accepted" + print("No bare arguments accepted") optparser.print_usage() sys.exit(1) if sum([1 for x in [opt.all, opt.sample, opt.msgid] if x]) != 1: - print "Must specify exactly one of --msgid, --all and --sample" + print("Must specify exactly one of --msgid, --all and --sample") sys.exit(1) if not opt.update and os.path.exists('reparse.diffs'): - print "File reparse.diffs already exists. Remove or rename and try again." + print("File reparse.diffs already exists. Remove or rename and try again.") sys.exit(1) log.set(opt.verbose) @@ -97,10 +97,10 @@ if __name__ == "__main__": for id, rawtxt in ResultIter(curs): num += 1 ap = ArchivesParserStorage() - ap.parse(StringIO(rawtxt)) + ap.parse(BytesIO(rawtxt)) try: ap.analyze(date_override=opt.force_date) - except IgnorableException, e: + except IgnorableException as e: if opt.update: raise e f.write("Exception loading %s: %s" % (id, e)) @@ -119,14 +119,14 @@ if __name__ == "__main__": sys.stdout.flush() laststatus = datetime.now() - print "" + print("") if opt.update: opstatus.print_status() if not opt.commit: while True: print("OK to commit transaction? ") - a = raw_input().lower().strip() + a = input().lower().strip() if a == 'y' or a == 'yes': print("Ok, committing.") break