You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@ponymail.apache.org by sebb <se...@gmail.com> on 2017/06/05 09:14:53 UTC

Re: incubator-ponymail git commit: Add the missing bits from last commit

On 5 June 2017 at 09:33,  <hu...@apache.org> wrote:
> Repository: incubator-ponymail
> Updated Branches:
>   refs/heads/master 2802e2905 -> fda07b8d7
>
>
> Add the missing bits from last commit
>
> - Adds back date munging for 'medium'
> - Removes archived-at and no date as an option for 'redundant'
> (only Date: header is guaranteed to be consistent here)
> - Adds the subject variable that was missing.
> - Some additional comments
> - Adds missing import
>
>
> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/fda07b8d
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/fda07b8d
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/fda07b8d
>
> Branch: refs/heads/master
> Commit: fda07b8d73decb0943c817d6fee69416c2016714
> Parents: 2802e29
> Author: Daniel Gruno <hu...@apache.org>
> Authored: Mon Jun 5 10:32:36 2017 +0200
> Committer: Daniel Gruno <hu...@apache.org>
> Committed: Mon Jun 5 10:32:36 2017 +0200
>
> ----------------------------------------------------------------------
>  tools/generators.py | 33 ++++++++++++++++++++++++++++++---
>  1 file changed, 30 insertions(+), 3 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/fda07b8d/tools/generators.py
> ----------------------------------------------------------------------
> diff --git a/tools/generators.py b/tools/generators.py
> index 3f9c213..73a8210 100644
> --- a/tools/generators.py
> +++ b/tools/generators.py
> @@ -21,6 +21,7 @@ This file contains the various ID generators for Pony Mail's archivers.
>
>  import hashlib
>  import email.utils
> +import time
>
>  # Full generator: uses the entire email (including server-dependent data)
>  # This is the recommended generator for single-node setups.
> @@ -28,31 +29,57 @@ def full(msg, body, lid, attachments):
>      mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>      return mid
>
> -# Medium: Standard generator
> +# Medium: Standard 0.9 generator - Not recommended for future installations.
> +# See 'full' or 'redundant' generators instead.
>  def medium(msg, body, lid, attachments):
>      # Use text body
>      xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>      # Use List ID
>      xbody += bytes(lid, encoding='ascii')
>      # Use Date header
> +    mdate = None
> +    try:
> +        mdate = email.utils.parsedate_tz(msg.get('date'))
> +    except:
> +        pass
> +    # In keeping with preserving the past, we have kept this next section(s).
> +    # For all intents and purposes, this is not a proper way of maintaining
> +    # a consistent ID in case of missing dates. It is recommended to use
> +    # another generator such as full or redundant here.
> +    if not mdate and msg_metadata.get('archived-at'):
> +        mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
> +    elif not mdate:

The original code has a print() command here to warn about the missing date

> +        mdate = time.gmtime() # Get a standard 9-tuple
> +        mdate = mdate + (0, ) # Fake a TZ (10th element)
> +    mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
>      xbody += bytes(mdatestring, encoding='ascii')
>      mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>      return mid
>
>  # Redundant: Use data that is guaranteed to be the same across redundant setups
> -# This is the recommended generator for redundant cluster setups
> +# This is the recommended generator for redundant cluster setups.
> +# Unlike 'medium', this only makes use of the Date: header and not the archived-at,
> +# as the archived-at may change from node to node (and will change if not in the raw mbox file)
>  def redundant(msg, body, lid, attachments):
>      # Use text body
>      xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>      # Use List ID
>      xbody += bytes(lid, encoding='ascii')
> -    # Use Date header
> +    # Use Date header. Don't use archived-at, as the archiver sets this if not present.
> +    mdate = None
> +    mdatestring = "(null)" # Default to null, ONLY changed if replicable across imports
> +    try:
> +        mdate = email.utils.parsedate_tz(msg.get('date'))
> +        mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
> +    except:
> +        pass
>      xbody += bytes(mdatestring, encoding='ascii')
>      # Use sender
>      sender = msg.get('from', None)
>      if sender:
>          xbody += bytes(sender, encoding = 'ascii')
>      # Use subject
> +    subject = msg.get('subject', None)
>      if subject:
>          xbody += bytes(subject, encoding = 'ascii')
>      # Use attachment hashes if present
>

Re: incubator-ponymail git commit: Add the missing bits from last commit

Posted by Daniel Gruno <hu...@apache.org>.
On 06/05/2017 11:14 AM, sebb wrote:
> On 5 June 2017 at 09:33,  <hu...@apache.org> wrote:
>> Repository: incubator-ponymail
>> Updated Branches:
>>   refs/heads/master 2802e2905 -> fda07b8d7
>>
>>
>> Add the missing bits from last commit
>>
>> - Adds back date munging for 'medium'
>> - Removes archived-at and no date as an option for 'redundant'
>> (only Date: header is guaranteed to be consistent here)
>> - Adds the subject variable that was missing.
>> - Some additional comments
>> - Adds missing import
>>
>>
>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/fda07b8d
>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/fda07b8d
>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/fda07b8d
>>
>> Branch: refs/heads/master
>> Commit: fda07b8d73decb0943c817d6fee69416c2016714
>> Parents: 2802e29
>> Author: Daniel Gruno <hu...@apache.org>
>> Authored: Mon Jun 5 10:32:36 2017 +0200
>> Committer: Daniel Gruno <hu...@apache.org>
>> Committed: Mon Jun 5 10:32:36 2017 +0200
>>
>> ----------------------------------------------------------------------
>>  tools/generators.py | 33 ++++++++++++++++++++++++++++++---
>>  1 file changed, 30 insertions(+), 3 deletions(-)
>> ----------------------------------------------------------------------
>>
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/fda07b8d/tools/generators.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/generators.py b/tools/generators.py
>> index 3f9c213..73a8210 100644
>> --- a/tools/generators.py
>> +++ b/tools/generators.py
>> @@ -21,6 +21,7 @@ This file contains the various ID generators for Pony Mail's archivers.
>>
>>  import hashlib
>>  import email.utils
>> +import time
>>
>>  # Full generator: uses the entire email (including server-dependent data)
>>  # This is the recommended generator for single-node setups.
>> @@ -28,31 +29,57 @@ def full(msg, body, lid, attachments):
>>      mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>      return mid
>>
>> -# Medium: Standard generator
>> +# Medium: Standard 0.9 generator - Not recommended for future installations.
>> +# See 'full' or 'redundant' generators instead.
>>  def medium(msg, body, lid, attachments):
>>      # Use text body
>>      xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>      # Use List ID
>>      xbody += bytes(lid, encoding='ascii')
>>      # Use Date header
>> +    mdate = None
>> +    try:
>> +        mdate = email.utils.parsedate_tz(msg.get('date'))
>> +    except:
>> +        pass
>> +    # In keeping with preserving the past, we have kept this next section(s).
>> +    # For all intents and purposes, this is not a proper way of maintaining
>> +    # a consistent ID in case of missing dates. It is recommended to use
>> +    # another generator such as full or redundant here.
>> +    if not mdate and msg_metadata.get('archived-at'):
>> +        mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
>> +    elif not mdate:
> 
> The original code has a print() command here to warn about the missing date

That is still there, in archiver.py. I did not put it in the generators,
as that would just duplicate it.

> 
>> +        mdate = time.gmtime() # Get a standard 9-tuple
>> +        mdate = mdate + (0, ) # Fake a TZ (10th element)
>> +    mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
>>      xbody += bytes(mdatestring, encoding='ascii')
>>      mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>      return mid
>>
>>  # Redundant: Use data that is guaranteed to be the same across redundant setups
>> -# This is the recommended generator for redundant cluster setups
>> +# This is the recommended generator for redundant cluster setups.
>> +# Unlike 'medium', this only makes use of the Date: header and not the archived-at,
>> +# as the archived-at may change from node to node (and will change if not in the raw mbox file)
>>  def redundant(msg, body, lid, attachments):
>>      # Use text body
>>      xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>      # Use List ID
>>      xbody += bytes(lid, encoding='ascii')
>> -    # Use Date header
>> +    # Use Date header. Don't use archived-at, as the archiver sets this if not present.
>> +    mdate = None
>> +    mdatestring = "(null)" # Default to null, ONLY changed if replicable across imports
>> +    try:
>> +        mdate = email.utils.parsedate_tz(msg.get('date'))
>> +        mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
>> +    except:
>> +        pass
>>      xbody += bytes(mdatestring, encoding='ascii')
>>      # Use sender
>>      sender = msg.get('from', None)
>>      if sender:
>>          xbody += bytes(sender, encoding = 'ascii')
>>      # Use subject
>> +    subject = msg.get('subject', None)
>>      if subject:
>>          xbody += bytes(subject, encoding = 'ascii')
>>      # Use attachment hashes if present
>>