You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2020/08/29 14:07:31 UTC

[incubator-ponymail-unit-tests] branch master updated: Bug: collate-output.py should not use get_bytes

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-unit-tests.git


The following commit(s) were added to refs/heads/master by this push:
     new d67be3d  Bug: collate-output.py should not use get_bytes
d67be3d is described below

commit d67be3dd588eadd05bdbecb85d039691200c084a
Author: Sebb <se...@apache.org>
AuthorDate: Sat Aug 29 15:07:15 2020 +0100

    Bug: collate-output.py should not use get_bytes
    
    ... and should preserve From line #4
    Reports (but drops) mails with no Message-Id
---
 tools/collate-mboxes.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/tools/collate-mboxes.py b/tools/collate-mboxes.py
index ecda970..eed73ef 100644
--- a/tools/collate-mboxes.py
+++ b/tools/collate-mboxes.py
@@ -3,35 +3,46 @@
 Simple tool for collating multiple mbox files into a single one, sorted by message ID.
 Used for multi-import tests where you wish to check that multiple sources give the same ID
 
-WARNING: emails without a Message-ID are currently silently dropped
-The code also assumes that mboxes have CRLF line-endings
+WARNING: emails without a Message-ID are currently dropped
 """
 import mailbox
 import sys
 
 outmbox = sys.argv[1]
-msgfiles = sys.argv[2:]
+msgfiles = sys.argv[2:] # multiple input files allowed
 
 allmessages = {}
+noid = 0
 for msgfile in msgfiles:
     messages = mailbox.mbox(
         msgfile, None, create=False
     )
     for key in messages.iterkeys():
         message = messages.get(key)
-        message_raw = messages.get_bytes(key)
         msgid = message.get('message-id')
         if msgid:
             msgid = msgid.strip()
-            allmessages[msgid] = message_raw
+            allmessages[msgid] = key
+        else:
+            print("No message id: ", message.get_from())
+            noid += 1
 
 
 nw = 0
+crlf = None # assume that all emails have the same EOL
 with open(outmbox, "wb") as f:
     for key in sorted(allmessages.keys()):
-        f.write(b"From TEST@TEST\r\n")
-        f.write(allmessages[key])
-        f.write(b"\r\n")
+        file=messages.get_file(allmessages[key], True)
+        if crlf is None:
+            from_ = file.readline()
+            f.write(from_)
+            crlf = (from_.endswith(b'\r\n'))
+        f.write(file.read())
+        if crlf:
+            f.write(b'\r\n')
+        else:
+            f.write(b'\n')
+        file.close()
         nw += 1
 
-print("Wrote %u emails to %s" % (nw, outmbox))
+print("Wrote %u emails to %s with CRLF %s (%u skipped)" % (nw, outmbox, crlf, noid))