You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2017/06/05 08:33:01 UTC

incubator-ponymail git commit: Add the missing bits from last commit

Repository: incubator-ponymail
Updated Branches:
  refs/heads/master 2802e2905 -> fda07b8d7


Add the missing bits from last commit

- Adds back date munging for 'medium'
- Removes archived-at and no date as an option for 'redundant'
(only Date: header is guaranteed to be consistent here)
- Adds the subject variable that was missing.
- Some additional comments
- Adds missing import


Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/fda07b8d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/fda07b8d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/fda07b8d

Branch: refs/heads/master
Commit: fda07b8d73decb0943c817d6fee69416c2016714
Parents: 2802e29
Author: Daniel Gruno <hu...@apache.org>
Authored: Mon Jun 5 10:32:36 2017 +0200
Committer: Daniel Gruno <hu...@apache.org>
Committed: Mon Jun 5 10:32:36 2017 +0200

----------------------------------------------------------------------
 tools/generators.py | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/fda07b8d/tools/generators.py
----------------------------------------------------------------------
diff --git a/tools/generators.py b/tools/generators.py
index 3f9c213..73a8210 100644
--- a/tools/generators.py
+++ b/tools/generators.py
@@ -21,6 +21,7 @@ This file contains the various ID generators for Pony Mail's archivers.
 
 import hashlib
 import email.utils
+import time
 
 # Full generator: uses the entire email (including server-dependent data)
 # This is the recommended generator for single-node setups.
@@ -28,31 +29,57 @@ def full(msg, body, lid, attachments):
     mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
     return mid
 
-# Medium: Standard generator
+# Medium: Standard 0.9 generator - Not recommended for future installations.
+# See 'full' or 'redundant' generators instead.
 def medium(msg, body, lid, attachments):
     # Use text body
     xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
     # Use List ID
     xbody += bytes(lid, encoding='ascii')
     # Use Date header
+    mdate = None
+    try:
+        mdate = email.utils.parsedate_tz(msg.get('date'))
+    except:
+        pass
+    # In keeping with preserving the past, we have kept this next section(s).
+    # For all intents and purposes, this is not a proper way of maintaining
+    # a consistent ID in case of missing dates. It is recommended to use
+    # another generator such as full or redundant here.
+    if not mdate and msg_metadata.get('archived-at'):
+        mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
+    elif not mdate:
+        mdate = time.gmtime() # Get a standard 9-tuple
+        mdate = mdate + (0, ) # Fake a TZ (10th element)
+    mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
     xbody += bytes(mdatestring, encoding='ascii')
     mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
     return mid
 
 # Redundant: Use data that is guaranteed to be the same across redundant setups
-# This is the recommended generator for redundant cluster setups
+# This is the recommended generator for redundant cluster setups.
+# Unlike 'medium', this only makes use of the Date: header and not the archived-at,
+# as the archived-at may change from node to node (and will change if not in the raw mbox file)
 def redundant(msg, body, lid, attachments):
     # Use text body
     xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
     # Use List ID
     xbody += bytes(lid, encoding='ascii')
-    # Use Date header
+    # Use Date header. Don't use archived-at, as the archiver sets this if not present.
+    mdate = None
+    mdatestring = "(null)" # Default to null, ONLY changed if replicable across imports
+    try:
+        mdate = email.utils.parsedate_tz(msg.get('date'))
+        mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
+    except:
+        pass
     xbody += bytes(mdatestring, encoding='ascii')
     # Use sender
     sender = msg.get('from', None)
     if sender:
         xbody += bytes(sender, encoding = 'ascii')
     # Use subject
+    subject = msg.get('subject', None)
     if subject:
         xbody += bytes(subject, encoding = 'ascii')
     # Use attachment hashes if present


Re: incubator-ponymail git commit: Add the missing bits from last commit

Posted by Daniel Gruno <hu...@apache.org>.
On 06/05/2017 11:14 AM, sebb wrote:
> On 5 June 2017 at 09:33,  <hu...@apache.org> wrote:
>> Repository: incubator-ponymail
>> Updated Branches:
>>   refs/heads/master 2802e2905 -> fda07b8d7
>>
>>
>> Add the missing bits from last commit
>>
>> - Adds back date munging for 'medium'
>> - Removes archived-at and no date as an option for 'redundant'
>> (only Date: header is guaranteed to be consistent here)
>> - Adds the subject variable that was missing.
>> - Some additional comments
>> - Adds missing import
>>
>>
>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/fda07b8d
>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/fda07b8d
>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/fda07b8d
>>
>> Branch: refs/heads/master
>> Commit: fda07b8d73decb0943c817d6fee69416c2016714
>> Parents: 2802e29
>> Author: Daniel Gruno <hu...@apache.org>
>> Authored: Mon Jun 5 10:32:36 2017 +0200
>> Committer: Daniel Gruno <hu...@apache.org>
>> Committed: Mon Jun 5 10:32:36 2017 +0200
>>
>> ----------------------------------------------------------------------
>>  tools/generators.py | 33 ++++++++++++++++++++++++++++++---
>>  1 file changed, 30 insertions(+), 3 deletions(-)
>> ----------------------------------------------------------------------
>>
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/fda07b8d/tools/generators.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/generators.py b/tools/generators.py
>> index 3f9c213..73a8210 100644
>> --- a/tools/generators.py
>> +++ b/tools/generators.py
>> @@ -21,6 +21,7 @@ This file contains the various ID generators for Pony Mail's archivers.
>>
>>  import hashlib
>>  import email.utils
>> +import time
>>
>>  # Full generator: uses the entire email (including server-dependent data)
>>  # This is the recommended generator for single-node setups.
>> @@ -28,31 +29,57 @@ def full(msg, body, lid, attachments):
>>      mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>      return mid
>>
>> -# Medium: Standard generator
>> +# Medium: Standard 0.9 generator - Not recommended for future installations.
>> +# See 'full' or 'redundant' generators instead.
>>  def medium(msg, body, lid, attachments):
>>      # Use text body
>>      xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>      # Use List ID
>>      xbody += bytes(lid, encoding='ascii')
>>      # Use Date header
>> +    mdate = None
>> +    try:
>> +        mdate = email.utils.parsedate_tz(msg.get('date'))
>> +    except:
>> +        pass
>> +    # In keeping with preserving the past, we have kept this next section(s).
>> +    # For all intents and purposes, this is not a proper way of maintaining
>> +    # a consistent ID in case of missing dates. It is recommended to use
>> +    # another generator such as full or redundant here.
>> +    if not mdate and msg_metadata.get('archived-at'):
>> +        mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
>> +    elif not mdate:
> 
> The original code has a print() command here to warn about the missing date

That is still there, in archiver.py. I did not put it in the generators,
as that would just duplicate it.

> 
>> +        mdate = time.gmtime() # Get a standard 9-tuple
>> +        mdate = mdate + (0, ) # Fake a TZ (10th element)
>> +    mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
>>      xbody += bytes(mdatestring, encoding='ascii')
>>      mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>      return mid
>>
>>  # Redundant: Use data that is guaranteed to be the same across redundant setups
>> -# This is the recommended generator for redundant cluster setups
>> +# This is the recommended generator for redundant cluster setups.
>> +# Unlike 'medium', this only makes use of the Date: header and not the archived-at,
>> +# as the archived-at may change from node to node (and will change if not in the raw mbox file)
>>  def redundant(msg, body, lid, attachments):
>>      # Use text body
>>      xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>      # Use List ID
>>      xbody += bytes(lid, encoding='ascii')
>> -    # Use Date header
>> +    # Use Date header. Don't use archived-at, as the archiver sets this if not present.
>> +    mdate = None
>> +    mdatestring = "(null)" # Default to null, ONLY changed if replicable across imports
>> +    try:
>> +        mdate = email.utils.parsedate_tz(msg.get('date'))
>> +        mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
>> +    except:
>> +        pass
>>      xbody += bytes(mdatestring, encoding='ascii')
>>      # Use sender
>>      sender = msg.get('from', None)
>>      if sender:
>>          xbody += bytes(sender, encoding = 'ascii')
>>      # Use subject
>> +    subject = msg.get('subject', None)
>>      if subject:
>>          xbody += bytes(subject, encoding = 'ascii')
>>      # Use attachment hashes if present
>>


Re: incubator-ponymail git commit: Add the missing bits from last commit

Posted by sebb <se...@gmail.com>.
On 5 June 2017 at 09:33,  <hu...@apache.org> wrote:
> Repository: incubator-ponymail
> Updated Branches:
>   refs/heads/master 2802e2905 -> fda07b8d7
>
>
> Add the missing bits from last commit
>
> - Adds back date munging for 'medium'
> - Removes archived-at and no date as an option for 'redundant'
> (only Date: header is guaranteed to be consistent here)
> - Adds the subject variable that was missing.
> - Some additional comments
> - Adds missing import
>
>
> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/fda07b8d
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/fda07b8d
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/fda07b8d
>
> Branch: refs/heads/master
> Commit: fda07b8d73decb0943c817d6fee69416c2016714
> Parents: 2802e29
> Author: Daniel Gruno <hu...@apache.org>
> Authored: Mon Jun 5 10:32:36 2017 +0200
> Committer: Daniel Gruno <hu...@apache.org>
> Committed: Mon Jun 5 10:32:36 2017 +0200
>
> ----------------------------------------------------------------------
>  tools/generators.py | 33 ++++++++++++++++++++++++++++++---
>  1 file changed, 30 insertions(+), 3 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/fda07b8d/tools/generators.py
> ----------------------------------------------------------------------
> diff --git a/tools/generators.py b/tools/generators.py
> index 3f9c213..73a8210 100644
> --- a/tools/generators.py
> +++ b/tools/generators.py
> @@ -21,6 +21,7 @@ This file contains the various ID generators for Pony Mail's archivers.
>
>  import hashlib
>  import email.utils
> +import time
>
>  # Full generator: uses the entire email (including server-dependent data)
>  # This is the recommended generator for single-node setups.
> @@ -28,31 +29,57 @@ def full(msg, body, lid, attachments):
>      mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>      return mid
>
> -# Medium: Standard generator
> +# Medium: Standard 0.9 generator - Not recommended for future installations.
> +# See 'full' or 'redundant' generators instead.
>  def medium(msg, body, lid, attachments):
>      # Use text body
>      xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>      # Use List ID
>      xbody += bytes(lid, encoding='ascii')
>      # Use Date header
> +    mdate = None
> +    try:
> +        mdate = email.utils.parsedate_tz(msg.get('date'))
> +    except:
> +        pass
> +    # In keeping with preserving the past, we have kept this next section(s).
> +    # For all intents and purposes, this is not a proper way of maintaining
> +    # a consistent ID in case of missing dates. It is recommended to use
> +    # another generator such as full or redundant here.
> +    if not mdate and msg_metadata.get('archived-at'):
> +        mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
> +    elif not mdate:

The original code has a print() command here to warn about the missing date

> +        mdate = time.gmtime() # Get a standard 9-tuple
> +        mdate = mdate + (0, ) # Fake a TZ (10th element)
> +    mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
>      xbody += bytes(mdatestring, encoding='ascii')
>      mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>      return mid
>
>  # Redundant: Use data that is guaranteed to be the same across redundant setups
> -# This is the recommended generator for redundant cluster setups
> +# This is the recommended generator for redundant cluster setups.
> +# Unlike 'medium', this only makes use of the Date: header and not the archived-at,
> +# as the archived-at may change from node to node (and will change if not in the raw mbox file)
>  def redundant(msg, body, lid, attachments):
>      # Use text body
>      xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>      # Use List ID
>      xbody += bytes(lid, encoding='ascii')
> -    # Use Date header
> +    # Use Date header. Don't use archived-at, as the archiver sets this if not present.
> +    mdate = None
> +    mdatestring = "(null)" # Default to null, ONLY changed if replicable across imports
> +    try:
> +        mdate = email.utils.parsedate_tz(msg.get('date'))
> +        mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate)))
> +    except:
> +        pass
>      xbody += bytes(mdatestring, encoding='ascii')
>      # Use sender
>      sender = msg.get('from', None)
>      if sender:
>          xbody += bytes(sender, encoding = 'ascii')
>      # Use subject
> +    subject = msg.get('subject', None)
>      if subject:
>          xbody += bytes(subject, encoding = 'ascii')
>      # Use attachment hashes if present
>