You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2017/06/05 08:33:01 UTC
incubator-ponymail git commit: Add the missing bits from last commit
Repository: incubator-ponymail
Updated Branches:
refs/heads/master 2802e2905 -> fda07b8d7
Add the missing bits from last commit
- Adds back date munging for 'medium'
- Removes archived-at and no date as an option for 'redundant'
(only Date: header is guaranteed to be consistent here)
- Adds the subject variable that was missing.
- Some additional comments
- Adds missing import
Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/fda07b8d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/fda07b8d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/fda07b8d
Branch: refs/heads/master
Commit: fda07b8d73decb0943c817d6fee69416c2016714
Parents: 2802e29
Author: Daniel Gruno <hu...@apache.org>
Authored: Mon Jun 5 10:32:36 2017 +0200
Committer: Daniel Gruno <hu...@apache.org>
Committed: Mon Jun 5 10:32:36 2017 +0200
----------------------------------------------------------------------
tools/generators.py | 33 ++++++++++++++++++++++++++++++---
1 file changed, 30 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/fda07b8d/tools/generators.py
----------------------------------------------------------------------
diff --git a/tools/generators.py b/tools/generators.py
index 3f9c213..73a8210 100644
--- a/tools/generators.py
+++ b/tools/generators.py
@@ -21,6 +21,7 @@ This file contains the various ID generators for Pony Mail's archivers.
import hashlib
import email.utils
+import time
# Full generator: uses the entire email (including server-dependent data)
# This is the recommended generator for single-node setups.
@@ -28,31 +29,57 @@ def full(msg, body, lid, attachments):
mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
return mid
-# Medium: Standard generator
+# Medium: Standard 0.9 generator - Not recommended for future installations.
+# See 'full' or 'redundant' generators instead.
def medium(msg, body, lid, attachments):
# Use text body
xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
# Use List ID
xbody += bytes(lid, encoding='ascii')
# Use Date header
+ mdate = None
+ try:
+ mdate = email.utils.parsedate_tz(msg.get('date'))
+ except:
+ pass
+ # In keeping with preserving the past, we have kept this next section(s).
+ # For all intents and purposes, this is not a proper way of maintaining
+ # a consistent ID in case of missing dates. It is recommended to use
+ # another generator such as full or redundant here.
+ if not mdate and msg_metadata.get('archived-at'):
+ mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
+ elif not mdate:
+ mdate = time.gmtime() # Get a standard 9-tuple
+ mdate = mdate + (0, ) # Fake a TZ (10th element)
+ mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
xbody += bytes(mdatestring, encoding='ascii')
mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
return mid
# Redundant: Use data that is guaranteed to be the same across redundant setups
-# This is the recommended generator for redundant cluster setups
+# This is the recommended generator for redundant cluster setups.
+# Unlike 'medium', this only makes use of the Date: header and not the archived-at,
+# as the archived-at may change from node to node (and will change if not in the raw mbox file)
def redundant(msg, body, lid, attachments):
# Use text body
xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
# Use List ID
xbody += bytes(lid, encoding='ascii')
- # Use Date header
+ # Use Date header. Don't use archived-at, as the archiver sets this if not present.
+ mdate = None
+ mdatestring = "(null)" # Default to null, ONLY changed if replicable across imports
+ try:
+ mdate = email.utils.parsedate_tz(msg.get('date'))
+ mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
+ except:
+ pass
xbody += bytes(mdatestring, encoding='ascii')
# Use sender
sender = msg.get('from', None)
if sender:
xbody += bytes(sender, encoding = 'ascii')
# Use subject
+ subject = msg.get('subject', None)
if subject:
xbody += bytes(subject, encoding = 'ascii')
# Use attachment hashes if present
Re: incubator-ponymail git commit: Add the missing bits from last
commit
Posted by Daniel Gruno <hu...@apache.org>.
On 06/05/2017 11:14 AM, sebb wrote:
> On 5 June 2017 at 09:33, <hu...@apache.org> wrote:
>> Repository: incubator-ponymail
>> Updated Branches:
>> refs/heads/master 2802e2905 -> fda07b8d7
>>
>>
>> Add the missing bits from last commit
>>
>> - Adds back date munging for 'medium'
>> - Removes archived-at and no date as an option for 'redundant'
>> (only Date: header is guaranteed to be consistent here)
>> - Adds the subject variable that was missing.
>> - Some additional comments
>> - Adds missing import
>>
>>
>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/fda07b8d
>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/fda07b8d
>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/fda07b8d
>>
>> Branch: refs/heads/master
>> Commit: fda07b8d73decb0943c817d6fee69416c2016714
>> Parents: 2802e29
>> Author: Daniel Gruno <hu...@apache.org>
>> Authored: Mon Jun 5 10:32:36 2017 +0200
>> Committer: Daniel Gruno <hu...@apache.org>
>> Committed: Mon Jun 5 10:32:36 2017 +0200
>>
>> ----------------------------------------------------------------------
>> tools/generators.py | 33 ++++++++++++++++++++++++++++++---
>> 1 file changed, 30 insertions(+), 3 deletions(-)
>> ----------------------------------------------------------------------
>>
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/fda07b8d/tools/generators.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/generators.py b/tools/generators.py
>> index 3f9c213..73a8210 100644
>> --- a/tools/generators.py
>> +++ b/tools/generators.py
>> @@ -21,6 +21,7 @@ This file contains the various ID generators for Pony Mail's archivers.
>>
>> import hashlib
>> import email.utils
>> +import time
>>
>> # Full generator: uses the entire email (including server-dependent data)
>> # This is the recommended generator for single-node setups.
>> @@ -28,31 +29,57 @@ def full(msg, body, lid, attachments):
>> mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>> return mid
>>
>> -# Medium: Standard generator
>> +# Medium: Standard 0.9 generator - Not recommended for future installations.
>> +# See 'full' or 'redundant' generators instead.
>> def medium(msg, body, lid, attachments):
>> # Use text body
>> xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> # Use List ID
>> xbody += bytes(lid, encoding='ascii')
>> # Use Date header
>> + mdate = None
>> + try:
>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>> + except:
>> + pass
>> + # In keeping with preserving the past, we have kept this next section(s).
>> + # For all intents and purposes, this is not a proper way of maintaining
>> + # a consistent ID in case of missing dates. It is recommended to use
>> + # another generator such as full or redundant here.
>> + if not mdate and msg_metadata.get('archived-at'):
>> + mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
>> + elif not mdate:
>
> The original code has a print() command here to warn about the missing date
That is still there, in archiver.py. I did not put it in the generators,
as that would just duplicate it.
>
>> + mdate = time.gmtime() # Get a standard 9-tuple
>> + mdate = mdate + (0, ) # Fake a TZ (10th element)
>> + mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
>> xbody += bytes(mdatestring, encoding='ascii')
>> mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> return mid
>>
>> # Redundant: Use data that is guaranteed to be the same across redundant setups
>> -# This is the recommended generator for redundant cluster setups
>> +# This is the recommended generator for redundant cluster setups.
>> +# Unlike 'medium', this only makes use of the Date: header and not the archived-at,
>> +# as the archived-at may change from node to node (and will change if not in the raw mbox file)
>> def redundant(msg, body, lid, attachments):
>> # Use text body
>> xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> # Use List ID
>> xbody += bytes(lid, encoding='ascii')
>> - # Use Date header
>> + # Use Date header. Don't use archived-at, as the archiver sets this if not present.
>> + mdate = None
>> + mdatestring = "(null)" # Default to null, ONLY changed if replicable across imports
>> + try:
>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>> + mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
>> + except:
>> + pass
>> xbody += bytes(mdatestring, encoding='ascii')
>> # Use sender
>> sender = msg.get('from', None)
>> if sender:
>> xbody += bytes(sender, encoding = 'ascii')
>> # Use subject
>> + subject = msg.get('subject', None)
>> if subject:
>> xbody += bytes(subject, encoding = 'ascii')
>> # Use attachment hashes if present
>>
Re: incubator-ponymail git commit: Add the missing bits from last commit
Posted by sebb <se...@gmail.com>.
On 5 June 2017 at 09:33, <hu...@apache.org> wrote:
> Repository: incubator-ponymail
> Updated Branches:
> refs/heads/master 2802e2905 -> fda07b8d7
>
>
> Add the missing bits from last commit
>
> - Adds back date munging for 'medium'
> - Removes archived-at and no date as an option for 'redundant'
> (only Date: header is guaranteed to be consistent here)
> - Adds the subject variable that was missing.
> - Some additional comments
> - Adds missing import
>
>
> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/fda07b8d
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/fda07b8d
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/fda07b8d
>
> Branch: refs/heads/master
> Commit: fda07b8d73decb0943c817d6fee69416c2016714
> Parents: 2802e29
> Author: Daniel Gruno <hu...@apache.org>
> Authored: Mon Jun 5 10:32:36 2017 +0200
> Committer: Daniel Gruno <hu...@apache.org>
> Committed: Mon Jun 5 10:32:36 2017 +0200
>
> ----------------------------------------------------------------------
> tools/generators.py | 33 ++++++++++++++++++++++++++++++---
> 1 file changed, 30 insertions(+), 3 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/fda07b8d/tools/generators.py
> ----------------------------------------------------------------------
> diff --git a/tools/generators.py b/tools/generators.py
> index 3f9c213..73a8210 100644
> --- a/tools/generators.py
> +++ b/tools/generators.py
> @@ -21,6 +21,7 @@ This file contains the various ID generators for Pony Mail's archivers.
>
> import hashlib
> import email.utils
> +import time
>
> # Full generator: uses the entire email (including server-dependent data)
> # This is the recommended generator for single-node setups.
> @@ -28,31 +29,57 @@ def full(msg, body, lid, attachments):
> mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
> return mid
>
> -# Medium: Standard generator
> +# Medium: Standard 0.9 generator - Not recommended for future installations.
> +# See 'full' or 'redundant' generators instead.
> def medium(msg, body, lid, attachments):
> # Use text body
> xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> # Use List ID
> xbody += bytes(lid, encoding='ascii')
> # Use Date header
> + mdate = None
> + try:
> + mdate = email.utils.parsedate_tz(msg.get('date'))
> + except:
> + pass
> + # In keeping with preserving the past, we have kept this next section(s).
> + # For all intents and purposes, this is not a proper way of maintaining
> + # a consistent ID in case of missing dates. It is recommended to use
> + # another generator such as full or redundant here.
> + if not mdate and msg_metadata.get('archived-at'):
> + mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
> + elif not mdate:
The original code has a print() command here to warn about the missing date
> + mdate = time.gmtime() # Get a standard 9-tuple
> + mdate = mdate + (0, ) # Fake a TZ (10th element)
> + mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
> xbody += bytes(mdatestring, encoding='ascii')
> mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> return mid
>
> # Redundant: Use data that is guaranteed to be the same across redundant setups
> -# This is the recommended generator for redundant cluster setups
> +# This is the recommended generator for redundant cluster setups.
> +# Unlike 'medium', this only makes use of the Date: header and not the archived-at,
> +# as the archived-at may change from node to node (and will change if not in the raw mbox file)
> def redundant(msg, body, lid, attachments):
> # Use text body
> xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> # Use List ID
> xbody += bytes(lid, encoding='ascii')
> - # Use Date header
> + # Use Date header. Don't use archived-at, as the archiver sets this if not present.
> + mdate = None
> + mdatestring = "(null)" # Default to null, ONLY changed if replicable across imports
> + try:
> + mdate = email.utils.parsedate_tz(msg.get('date'))
> + mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
> + except:
> + pass
> xbody += bytes(mdatestring, encoding='ascii')
> # Use sender
> sender = msg.get('from', None)
> if sender:
> xbody += bytes(sender, encoding = 'ascii')
> # Use subject
> + subject = msg.get('subject', None)
> if subject:
> xbody += bytes(subject, encoding = 'ascii')
> # Use attachment hashes if present
>