You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2020/08/29 14:07:31 UTC
[incubator-ponymail-unit-tests] branch master updated: Bug:
collate-output.py should not use get_bytes
This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-unit-tests.git
The following commit(s) were added to refs/heads/master by this push:
new d67be3d Bug: collate-output.py should not use get_bytes
d67be3d is described below
commit d67be3dd588eadd05bdbecb85d039691200c084a
Author: Sebb <se...@apache.org>
AuthorDate: Sat Aug 29 15:07:15 2020 +0100
Bug: collate-output.py should not use get_bytes
... and should preserve From line #4
Reports (but drops) mails with no Message-Id
---
tools/collate-mboxes.py | 29 ++++++++++++++++++++---------
1 file changed, 20 insertions(+), 9 deletions(-)
diff --git a/tools/collate-mboxes.py b/tools/collate-mboxes.py
index ecda970..eed73ef 100644
--- a/tools/collate-mboxes.py
+++ b/tools/collate-mboxes.py
@@ -3,35 +3,46 @@
Simple tool for collating multiple mbox files into a single one, sorted by message ID.
Used for multi-import tests where you wish to check that multiple sources give the same ID
-WARNING: emails without a Message-ID are currently silently dropped
-The code also assumes that mboxes have CRLF line-endings
+WARNING: emails without a Message-ID are currently dropped
"""
import mailbox
import sys
outmbox = sys.argv[1]
-msgfiles = sys.argv[2:]
+msgfiles = sys.argv[2:] # multiple input files allowed
allmessages = {}
+noid = 0
for msgfile in msgfiles:
messages = mailbox.mbox(
msgfile, None, create=False
)
for key in messages.iterkeys():
message = messages.get(key)
- message_raw = messages.get_bytes(key)
msgid = message.get('message-id')
if msgid:
msgid = msgid.strip()
- allmessages[msgid] = message_raw
+ allmessages[msgid] = key
+ else:
+ print("No message id: ", message.get_from())
+ noid += 1
nw = 0
+crlf = None # assume that all emails have the same EOL
with open(outmbox, "wb") as f:
for key in sorted(allmessages.keys()):
- f.write(b"From TEST@TEST\r\n")
- f.write(allmessages[key])
- f.write(b"\r\n")
+ file=messages.get_file(allmessages[key], True)
+ if crlf is None:
+ from_ = file.readline()
+ f.write(from_)
+ crlf = (from_.endswith(b'\r\n'))
+ f.write(file.read())
+ if crlf:
+ f.write(b'\r\n')
+ else:
+ f.write(b'\n')
+ file.close()
nw += 1
-print("Wrote %u emails to %s" % (nw, outmbox))
+print("Wrote %u emails to %s with CRLF %s (%u skipped)" % (nw, outmbox, crlf, noid))