From: Magnus Hagander Date: Mon, 1 Jun 2020 14:40:01 +0000 (+0200) Subject: Support overwriting messages in load_messages.py X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=4b435034062c1156ed4b7ca617ae820a6493008e;p=pgarchives.git Support overwriting messages in load_messages.py Previously overwriting was only allowed from reparse_messages.py, in which case it would only reparse the existing message. For the usecase of overwriting the raw contents and then also reparsing the result, the --overwrite switch can now be passed to load_messages.py. --- diff --git a/loader/lib/storage.py b/loader/lib/storage.py index d785418..5fbdc6f 100644 --- a/loader/lib/storage.py +++ b/loader/lib/storage.py @@ -16,7 +16,7 @@ class ArchivesParserStorage(ArchivesParser): def purge_thread(self, threadid): self.purges.add(int(threadid)) - def store(self, conn, listid, overwrite=False): + def store(self, conn, listid, overwrite=False, overwrite_raw=False): curs = conn.cursor() # Potentially add the information that there exists a mail for @@ -51,6 +51,15 @@ class ArchivesParserStorage(ArchivesParser): if overwrite: pk = r[0][2] self.purge_thread(r[0][0]) + if overwrite_raw: + # For full overwrite, we also update the raw text of the message. This is an + # uncommon enough operation that we'll just do it as a separate command. + log.status("Full raw overwrite of %s" % self.msgid) + curs.execute("UPDATE messages SET rawtxt=%(raw)s WHERE id=%(id)s", { + 'raw': bytearray(self.rawtxt), + 'id': pk, + }) + # Overwrite an existing message. We do not attempt to # "re-thread" a message, we just update the contents. We # do remove all attachments and rewrite them. Of course, we diff --git a/loader/load_message.py b/loader/load_message.py index 079480b..78f51b4 100755 --- a/loader/load_message.py +++ b/loader/load_message.py @@ -46,6 +46,7 @@ if __name__ == "__main__": optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', help='Verbose output') optparser.add_option('--force-date', dest='force_date', help='Override date (used for dates that can\'t be parsed)') optparser.add_option('--filter-msgid', dest='filter_msgid', help='Only process message with given msgid') + optparser.add_option('--overwrite', dest='overwrite', action='store_true', help='Overwrite full contents of message') (opt, args) = optparser.parse_args() @@ -124,7 +125,7 @@ if __name__ == "__main__": log_failed_message(listid, "directory", os.path.join(opt.directory, x), ap, e) opstatus.failed += 1 continue - ap.store(conn, listid) + ap.store(conn, listid, opt.overwrite, opt.overwrite) purges.update(ap.purges) if opt.interactive: print("Interactive mode, committing transaction") @@ -154,7 +155,7 @@ if __name__ == "__main__": log_failed_message(listid, "mbox", opt.mbox, ap, e) opstatus.failed += 1 continue - ap.store(conn, listid) + ap.store(conn, listid, opt.overwrite, opt.overwrite) purges.update(ap.purges) if mboxparser.returncode(): log.error("Failed to parse mbox:") @@ -170,7 +171,7 @@ if __name__ == "__main__": log_failed_message(listid, "stdin", "", ap, e) conn.close() sys.exit(1) - ap.store(conn, listid) + ap.store(conn, listid, opt.overwrite, opt.overwrite) purges.update(ap.purges) if opstatus.stored: log.log("Stored message with message-id %s" % ap.msgid)