You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2017/09/03 21:19:09 UTC
incubator-ponymail git commit: Document the generators
Repository: incubator-ponymail
Updated Branches:
refs/heads/master 70eda3276 -> c038ec329
Document the generators
Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/c038ec32
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/c038ec32
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/c038ec32
Branch: refs/heads/master
Commit: c038ec3291b9faf380ec4b825cb77dacf5fea35f
Parents: 70eda32
Author: Sebb <se...@apache.org>
Authored: Sun Sep 3 22:19:05 2017 +0100
Committer: Sebb <se...@apache.org>
Committed: Sun Sep 3 22:19:05 2017 +0100
----------------------------------------------------------------------
tools/generators.py | 44 ++++++++++++++++++++++++++++++++++++++------
1 file changed, 38 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/c038ec32/tools/generators.py
----------------------------------------------------------------------
diff --git a/tools/generators.py b/tools/generators.py
index 5d5e1e8..aef9e77 100644
--- a/tools/generators.py
+++ b/tools/generators.py
@@ -38,6 +38,8 @@ def full(msg, body, lid, attachments):
body - the parsed text content (not used)
lid - list id
attachments - list of attachments (not used)
+
+ Returns: "<hash>@<lid>" where hash is sha224 of message bytes
"""
mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
return mid
@@ -48,13 +50,24 @@ def medium(msg, body, lid, attachments):
"""
Standard 0.9 generator - Not recommended for future installations.
(does not generate sufficiently unique ids)
-
+
+ The following message fields are concatenated to form the hash input:
+ - body: if bytes as is else encoded ascii, ignoring invalid characters; if the body is null an Exception is thrown
+ - lid
+ - Date header if it exists and parses OK; failing that
+ - archived-at header if it exists and parses OK; failing that
+ - current time.
+ The resulting date is converted to YYYY/MM/DD HH:MM:SS (using UTC)
+
Parameters:
msg - the parsed message (used to get the date)
- body - the parsed text content
+ body - the parsed text content (may be null)
lid - list id
attachments - list of attachments (not used)
+
+ Returns: "<hash>@<lid>" where hash is sha224 of the message items noted above
"""
+
# Use text body
xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
# Use List ID
@@ -67,7 +80,7 @@ def medium(msg, body, lid, attachments):
# In keeping with preserving the past, we have kept this next section(s).
# For all intents and purposes, this is not a proper way of maintaining
# a consistent ID in case of missing dates. It is recommended to use
- # another generator such as full or redundant here.
+ # another generator
if not mdate and msg.get('archived-at'):
mdate = email.utils.parsedate_tz(msg.get('archived-at'))
elif not mdate:
@@ -86,12 +99,24 @@ def redundant(msg, body, lid, attachments):
"""
Use data that is guaranteed to be the same across redundant setups
(does not guarantee to create unique ids)
-
+
+ The following message fields are concatenated to form the hash input:
+ - body as is if bytes else encoded ascii, ignoring invalid characters; if the body is null it is treated as an empty string
+ (currently trailing whitespace is dropped)
+ - lid
+ - Date header converted to YYYY/MM/DD HH:MM:SS (UTC)
+ or "(null)" if the date does not exist or cannot be converted
+ - sender, encoded as ascii (if the field exists)
+ - subject, encoded as ascii (if the field exists)
+ - the hashes of any attachments
+
Parameters:
msg - the parsed message (used to get the date)
body - the parsed text content
lid - list id
attachments - list of attachments (uses the hashes)
+
+ Returns: "r<hash>@<lid>" where hash is sha224 of the message items noted above
"""
# Use text body
if not body: # Make sure body is not None, which will fail.
@@ -137,12 +162,19 @@ def legacy(msg, body, lid, attachments):
"""
Original generator - DO NOT USE
(does not generate unique ids)
-
+
+ The hash input is created from
+ - body: if bytes as is else encoded ascii, ignoring invalid characters; if the body is null an Exception is thrown
+
+ The uid_mdate for the id is the Date converted to UTC epoch else 0
+
Parameters:
msg - the parsed message (used to get the date)
- body - the parsed text content
+ body - the parsed text content (may be null)
lid - list id
attachments - list of attachments (not used)
+
+ Returns: "<hash>@<uid_mdate>@<lid>" where hash is sha224 of the message items noted above
"""
uid_mdate = 0 # Default if no date found
try: