You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2020/08/30 12:17:07 UTC

[incubator-ponymail-unit-tests] branch master updated: Don't drop mails without Message-Id

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-unit-tests.git


The following commit(s) were added to refs/heads/master by this push:
     new 6450f16  Don't drop mails without Message-Id
6450f16 is described below

commit 6450f16c73eef2d61b7ceaeb0d6317c6190f7c4d
Author: Sebb <se...@apache.org>
AuthorDate: Sun Aug 30 13:16:47 2020 +0100

    Don't drop mails without Message-Id
    
    Use the Date or Subject instead, with prefix to sort at end
---
 tools/collate-mboxes.py | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/tools/collate-mboxes.py b/tools/collate-mboxes.py
index ecbb2b0..ea8d202 100755
--- a/tools/collate-mboxes.py
+++ b/tools/collate-mboxes.py
@@ -23,21 +23,24 @@ for msgfile in msgfiles:
         message = messages.get(key)
         msgid = message.get('message-id')
         if msgid:
-            msgid = msgid.strip()
-            file = messages.get_file(key, True)
-            message_raw = b''
-            if crlf is None:
-                message_raw = file.readline()
-                crlf = (message_raw.endswith(b'\r\n'))
-            message_raw += file.read()
-            file.close()
-            if msgid in allmessages:
-                print("Duplicate message id: %s" % msgid)
-                dupes += 1
-            allmessages[msgid] = message_raw
+            sortkey = msgid.strip()
         else:
-            print("No message id: ", message.get_from())
+            print("No message id, sorting by date or subject: ", message.get_from())
             noid += 1
+            altid = message.get('date') or message.get('subject')
+            sortkey = "~" + altid.strip() # try to ensure it sorts last
+        # store the data
+        file = messages.get_file(key, True)
+        message_raw = b''
+        if crlf is None:
+            message_raw = file.readline()
+            crlf = (message_raw.endswith(b'\r\n'))
+        message_raw += file.read()
+        file.close()
+        if sortkey in allmessages:
+            print("Duplicate sort key: %s" % sortkey)
+            dupes += 1
+        allmessages[sortkey] = message_raw
 
 
 nw = 0
@@ -50,4 +53,4 @@ with open(outmbox, "wb") as f:
             f.write(b'\n')
         nw += 1
 
-print("Wrote %u emails to %s with CRLF %s (%u skipped, %u dupes)" % (nw, outmbox, crlf, noid, dupes))
+print("Wrote %u emails to %s with CRLF %s (%u without message-id, %u dupes skipped)" % (nw, outmbox, crlf, noid, dupes))