You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2020/08/30 13:21:43 UTC
[incubator-ponymail-unit-tests] branch master updated: Allow sort
by ezmlm number
This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-unit-tests.git
The following commit(s) were added to refs/heads/master by this push:
new 69972bb Allow sort by ezmlm number
69972bb is described below
commit 69972bbee1b34f9b297f030526c3c2fbe6d4c7a6
Author: Sebb <se...@apache.org>
AuthorDate: Sun Aug 30 14:21:25 2020 +0100
Allow sort by ezmlm number
---
tools/collate-mboxes.py | 44 ++++++++++++++++++++++++++++++++------------
1 file changed, 32 insertions(+), 12 deletions(-)
diff --git a/tools/collate-mboxes.py b/tools/collate-mboxes.py
index 76fa9a7..839d925 100755
--- a/tools/collate-mboxes.py
+++ b/tools/collate-mboxes.py
@@ -7,30 +7,50 @@ Used for multi-import tests where you wish to check that multiple sources give t
Emails with duplicate sort keys are logged and dropped
"""
+
+import argparse
import mailbox
+import re
import sys
-outmbox = sys.argv[1]
-msgfiles = sys.argv[2:] # multiple input files allowed
+parser = argparse.ArgumentParser(description='Command line options.')
+parser.add_argument('--ezmlm', dest='ezmlm', action='store_true',
+ help="Use ezmlm numbering for sorting")
+parser.add_argument('args', nargs=argparse.REMAINDER)
+args = parser.parse_args()
+
+outmbox = args.args[0]
+msgfiles = args.args[1:] # multiple input files allowed
allmessages = {}
noid = 0
-dupes = 0
+skipped = 0
crlf = None # assume that all emails have the same EOL
for msgfile in msgfiles:
messages = mailbox.mbox(
msgfile, None, create=False
)
+ sortkey = None
for key in messages.iterkeys():
message = messages.get(key)
- msgid = message.get('message-id')
- if msgid:
- sortkey = msgid.strip()
+ if args.ezmlm:
+ from_ = message.get_from()
+ m = re.search(r"return-(\d+)-", from_)
+ if m:
+ sortkey = m.group(1)
+ else:
+ print("Failed to find ezmlm id in %s" % from_)
+ skipped += 1
+ continue
else:
- print("No message id, sorting by date or subject: ", message.get_from())
- noid += 1
- altid = message.get('date') or message.get('subject')
- sortkey = "~" + altid.strip() # try to ensure it sorts last
+ msgid = message.get('message-id')
+ if msgid:
+ sortkey = msgid.strip()
+ else:
+ print("No message id, sorting by date or subject: ", message.get_from())
+ noid += 1
+ altid = message.get('date') or message.get('subject')
+ sortkey = "~" + altid.strip() # try to ensure it sorts last
# store the data
file = messages.get_file(key, True)
message_raw = b''
@@ -41,7 +61,7 @@ for msgfile in msgfiles:
file.close()
if sortkey in allmessages:
print("Duplicate sort key: %s" % sortkey)
- dupes += 1
+ skipped += 1
allmessages[sortkey] = message_raw
@@ -55,4 +75,4 @@ with open(outmbox, "wb") as f:
f.write(b'\n')
nw += 1
-print("Wrote %u emails to %s with CRLF %s (%u without message-id, %u dupes skipped)" % (nw, outmbox, crlf, noid, dupes))
+print("Wrote %u emails to %s with CRLF %s (%u without message-id) WARN: %u skipped" % (nw, outmbox, crlf, noid, skipped))