You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2020/08/30 13:21:43 UTC

[incubator-ponymail-unit-tests] branch master updated: Allow sort by ezmlm number

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-unit-tests.git


The following commit(s) were added to refs/heads/master by this push:
     new 69972bb  Allow sort by ezmlm number
69972bb is described below

commit 69972bbee1b34f9b297f030526c3c2fbe6d4c7a6
Author: Sebb <se...@apache.org>
AuthorDate: Sun Aug 30 14:21:25 2020 +0100

    Allow sort by ezmlm number
---
 tools/collate-mboxes.py | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/tools/collate-mboxes.py b/tools/collate-mboxes.py
index 76fa9a7..839d925 100755
--- a/tools/collate-mboxes.py
+++ b/tools/collate-mboxes.py
@@ -7,30 +7,50 @@ Used for multi-import tests where you wish to check that multiple sources give t
 
 Emails with duplicate sort keys are logged and dropped
 """
+
+import argparse
 import mailbox
+import re
 import sys
 
-outmbox = sys.argv[1]
-msgfiles = sys.argv[2:] # multiple input files allowed
+parser = argparse.ArgumentParser(description='Command line options.')
+parser.add_argument('--ezmlm', dest='ezmlm', action='store_true',
+                    help="Use ezmlm numbering for sorting")
+parser.add_argument('args', nargs=argparse.REMAINDER)
+args = parser.parse_args()
+
+outmbox = args.args[0]
+msgfiles = args.args[1:] # multiple input files allowed
 
 allmessages = {}
 noid = 0
-dupes = 0
+skipped = 0
 crlf = None # assume that all emails have the same EOL
 for msgfile in msgfiles:
     messages = mailbox.mbox(
         msgfile, None, create=False
     )
+    sortkey = None
     for key in messages.iterkeys():
         message = messages.get(key)
-        msgid = message.get('message-id')
-        if msgid:
-            sortkey = msgid.strip()
+        if args.ezmlm:
+            from_ = message.get_from()
+            m = re.search(r"return-(\d+)-", from_)
+            if m:
+                sortkey = m.group(1)
+            else:
+                print("Failed to find ezmlm id in %s" % from_)
+                skipped += 1
+                continue
         else:
-            print("No message id, sorting by date or subject: ", message.get_from())
-            noid += 1
-            altid = message.get('date') or message.get('subject')
-            sortkey = "~" + altid.strip() # try to ensure it sorts last
+            msgid = message.get('message-id')
+            if msgid:
+                sortkey = msgid.strip()
+            else:
+                print("No message id, sorting by date or subject: ", message.get_from())
+                noid += 1
+                altid = message.get('date') or message.get('subject')
+                sortkey = "~" + altid.strip() # try to ensure it sorts last
         # store the data
         file = messages.get_file(key, True)
         message_raw = b''
@@ -41,7 +61,7 @@ for msgfile in msgfiles:
         file.close()
         if sortkey in allmessages:
             print("Duplicate sort key: %s" % sortkey)
-            dupes += 1
+            skipped += 1
         allmessages[sortkey] = message_raw
 
 
@@ -55,4 +75,4 @@ with open(outmbox, "wb") as f:
             f.write(b'\n')
         nw += 1
 
-print("Wrote %u emails to %s with CRLF %s (%u without message-id, %u dupes skipped)" % (nw, outmbox, crlf, noid, dupes))
+print("Wrote %u emails to %s with CRLF %s (%u without message-id) WARN: %u skipped" % (nw, outmbox, crlf, noid, skipped))