You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2020/08/30 11:22:12 UTC

[incubator-ponymail-unit-tests] branch master updated: Report duplicate Message Ids

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-unit-tests.git


The following commit(s) were added to refs/heads/master by this push:
     new b82c4b0  Report duplicate Message Ids
b82c4b0 is described below

commit b82c4b0e396bfb879e2ddf51c839d8cbcec6fef9
Author: Sebb <se...@apache.org>
AuthorDate: Sun Aug 30 12:21:53 2020 +0100

    Report duplicate Message Ids
---
 tools/collate-mboxes.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/collate-mboxes.py b/tools/collate-mboxes.py
index 3bb02d5..ecbb2b0 100755
--- a/tools/collate-mboxes.py
+++ b/tools/collate-mboxes.py
@@ -13,6 +13,7 @@ msgfiles = sys.argv[2:] # multiple input files allowed
 
 allmessages = {}
 noid = 0
+dupes = 0
 crlf = None # assume that all emails have the same EOL
 for msgfile in msgfiles:
     messages = mailbox.mbox(
@@ -30,6 +31,9 @@ for msgfile in msgfiles:
                 crlf = (message_raw.endswith(b'\r\n'))
             message_raw += file.read()
             file.close()
+            if msgid in allmessages:
+                print("Duplicate message id: %s" % msgid)
+                dupes += 1
             allmessages[msgid] = message_raw
         else:
             print("No message id: ", message.get_from())
@@ -46,4 +50,4 @@ with open(outmbox, "wb") as f:
             f.write(b'\n')
         nw += 1
 
-print("Wrote %u emails to %s with CRLF %s (%u skipped)" % (nw, outmbox, crlf, noid))
+print("Wrote %u emails to %s with CRLF %s (%u skipped, %u dupes)" % (nw, outmbox, crlf, noid, dupes))