You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2020/08/26 11:41:39 UTC
[incubator-ponymail-foal] branch master updated: add back mboxo
file in the right place
This is an automated email from the ASF dual-hosted git repository.
humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
The following commit(s) were added to refs/heads/master by this push:
new 63392e7 add back mboxo file in the right place
63392e7 is described below
commit 63392e76b9c28c4ecccc1b6204786e9aecbea4b9
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Wed Aug 26 13:41:29 2020 +0200
add back mboxo file in the right place
---
tools/plugins/mboxo_patch.py | 97 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 97 insertions(+)
diff --git a/tools/plugins/mboxo_patch.py b/tools/plugins/mboxo_patch.py
new file mode 100644
index 0000000..a577567
--- /dev/null
+++ b/tools/plugins/mboxo_patch.py
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Byte stream reader to process mboxo style mailbox files.
+These are not currently handled by the Python email package.
+
+It replaces any occurrence of b'\n>From ' with b'\nFrom '
+
+The class handles matching across read boundaries.
+
+To use:
+
+from mboxo_patch import MboxoFactory
+...
+messages = mailbox.mbox(filename, MboxoFactory)
+
+N.B.
+To simplify the code, the MboxoReader class changes the
+size parameter to 7 if (and only if): 0 <= size < 7
+The return byte buffer can thus be larger than expected.
+However this is only a theoretical possibility
+as the mailbox code uses a size of 8192 (or None)
+
+"""
+import mailbox
+
+FROM_MANGLED =b'\n>From '
+FROM_MANGLED_LEN=len(FROM_MANGLED)
+FROM_UNMANGLED=b'\nFrom '
+# We want to match the 7 bytes b'\n>From ' in the input stream
+# However this can be split over multiple reads.
+# The split can occur anywhere after the leading b'\n'
+# and the trailing b' '. If we match any of these
+# we keep the trailing part of the buffer for next time
+# The following are all the possible prefixes for a split:
+FROMS=(FROM_MANGLED[:-1],
+ FROM_MANGLED[:-2],
+ FROM_MANGLED[:-3],
+ FROM_MANGLED[:-4],
+ FROM_MANGLED[:-5],
+ FROM_MANGLED[:-6],
+ )
+
+class MboxoReader(mailbox._PartialFile): # pylint: disable=W0212
+ def __init__(self, f, start=None, stop=None):
+ self.remain=0 # number of bytes to keep for next read
+ super().__init__(f._file, start=f._start, stop=f._stop) # pylint: disable=W0212
+
+ # Override the read method to provide mboxo filtering
+ def _read(self, size, read_method):
+ # get the next chunk, resetting if necessary
+ if self.remain != 0:
+ super().seek(whence=1, offset=-self.remain)
+ # if size is None or negative, then read returns everything.
+ # in which case there is no need to wory about matching across reads
+ limited_read = size and size >= 0
+ # ensure we get enough to match successfully when refilling
+ if limited_read and size < FROM_MANGLED_LEN:
+ size = FROM_MANGLED_LEN
+ buff = super()._read(size, read_method)
+ bufflen=len(buff)
+ # did we get anything new?
+ if limited_read and bufflen > self.remain:
+ # is there a potential cross-boundary match?
+ if buff.endswith(FROMS):
+ # yes, work out what to keep
+ # N.B. rindex will fail if it cannot find the LF;
+ # this should be impossible
+ self.remain=bufflen - buff.rindex(b'\n')
+ else:
+ # don't need to keep anything back
+ self.remain=0
+ else:
+ # EOF
+ self.remain=0
+ # we cannot use -0 to mean end of array...
+ end = bufflen if self.remain == 0 else -self.remain
+ # exclude the potential split match from the return
+ return buff[:end].replace(FROM_MANGLED, FROM_UNMANGLED)
+
+class MboxoFactory(mailbox.mboxMessage):
+ def __init__(self, message=None):
+ super().__init__(message=MboxoReader(message))