You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2017/06/04 14:02:01 UTC
[1/4] incubator-ponymail git commit: be more comprehensive in
generating IDs
Repository: incubator-ponymail
Updated Branches:
refs/heads/master 767d8f8c1 -> c58b23127
be more comprehensive in generating IDs
Include whatever metadata we can in generating IDs to lessen the
risk of theoretical ID collisions.
Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/8b7ede85
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/8b7ede85
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/8b7ede85
Branch: refs/heads/master
Commit: 8b7ede85f03bd738d42e8e87f2302e4c8ddb0ad3
Parents: 767d8f8
Author: Daniel Gruno <hu...@apache.org>
Authored: Sun Jun 4 15:21:23 2017 +0200
Committer: Daniel Gruno <hu...@apache.org>
Committed: Sun Jun 4 15:21:23 2017 +0200
----------------------------------------------------------------------
CHANGELOG.md | 1 +
tools/archiver.py | 3 +++
2 files changed, 4 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/8b7ede85/CHANGELOG.md
----------------------------------------------------------------------
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 275b0df..be595de 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
## CHANGES in 0.10:
+- more comprehensive ID generation mechanisms
- private messages are now included in downloads if the user has access to them (#169, #108)
- mbox export now generates valid From_ line (#190)
- mbox export now escapes 'From ' lines in body text (#188)
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/8b7ede85/tools/archiver.py
----------------------------------------------------------------------
diff --git a/tools/archiver.py b/tools/archiver.py
index 141a431..41933f7 100755
--- a/tools/archiver.py
+++ b/tools/archiver.py
@@ -324,6 +324,9 @@ class Archiver(object):
xbody += bytes(lid, encoding='ascii')
xbody += bytes(mdatestring, encoding='ascii')
mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
+ if attachments:
+ for a in attachments:
+ xbody += bytes(a['hash'], encoding = 'ascii')
else:
# Or revert to the old way?
mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
[3/4] incubator-ponymail git commit: make setup ask for generator
mechanism
Posted by hu...@apache.org.
make setup ask for generator mechanism
Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/23966d82
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/23966d82
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/23966d82
Branch: refs/heads/master
Commit: 23966d825801f0465fd00c881e1edddb5e529826
Parents: e2d8103
Author: Daniel Gruno <hu...@apache.org>
Authored: Sun Jun 4 15:58:33 2017 +0200
Committer: Daniel Gruno <hu...@apache.org>
Committed: Sun Jun 4 15:58:33 2017 +0200
----------------------------------------------------------------------
tools/setup.py | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/23966d82/tools/setup.py
----------------------------------------------------------------------
diff --git a/tools/setup.py b/tools/setup.py
index e65069a..d5ca140 100755
--- a/tools/setup.py
+++ b/tools/setup.py
@@ -94,6 +94,7 @@ dbname = ""
mlserver = ""
mldom = ""
wc = ""
+genname = ""
wce = False
shards = 0
replicas = -1
@@ -154,6 +155,20 @@ while wc == "":
if wc.lower() == "y":
wce = True
+while genname == "":
+ gens = ['legacy', 'medium', 'redundant', 'full']
+ print ("Please select a document ID generator:")
+ print("1 LEGACY: The original document generator for v/0.1-0.8 (no longer recommended)")
+ print("2 MEDIUM: The medium comprehensive generator for v/0.9 (no longer recommended)")
+ print("3 REDUNDANT: Near-full message digest, discard MTA trail (recommended for clustered setups)")
+ print("4 FULL: Full message digest with MTA trail (recommended for single-node setups).")
+ try:
+ gno = int(input("Please select a generator [1-4]: "))
+ if gno <= len(gens) and gens[gno-1]:
+ genname = gens[gno-1]
+ except ValueError:
+ pass
+
while shards < 1:
try:
shards = int(input("How many shards for the ElasticSearch index? "))
@@ -475,14 +490,14 @@ ssl: false
#backup: database name
[archiver]
-#generator: medium|full|other
+generator: %s
[debug]
#cropout: string to crop from list-id
###############################################################
""" % (hostname, dbname, port,
- 'wait: active shard count' if ES_MAJOR == 5 else 'write: consistency level (default quorum)'))
+ 'wait: active shard count' if ES_MAJOR == 5 else 'write: consistency level (default quorum)', genname))
f.close()
config_path = "../site/api/lib"
Re: [2/4] incubator-ponymail git commit: split generators into a file
of its own
Posted by Daniel Gruno <hu...@apache.org>.
On 06/05/2017 12:22 AM, sebb wrote:
> On 4 June 2017 at 15:02, <hu...@apache.org> wrote:
>> split generators into a file of its own
>>
>> Also fix up generators:
>> - medium goes back to the way it was
>> - a new 'redundant' generator for cluster setups
>>
>>
>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>>
>> Branch: refs/heads/master
>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
>> Parents: 8b7ede8
>> Author: Daniel Gruno <hu...@apache.org>
>> Authored: Sun Jun 4 15:45:18 2017 +0200
>> Committer: Daniel Gruno <hu...@apache.org>
>> Committed: Sun Jun 4 15:45:18 2017 +0200
>>
>> ----------------------------------------------------------------------
>> tools/archiver.py | 17 ++++-------
>> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 80 insertions(+), 11 deletions(-)
>> ----------------------------------------------------------------------
>>
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/archiver.py b/tools/archiver.py
>> index 41933f7..0966b13 100755
>> --- a/tools/archiver.py
>> +++ b/tools/archiver.py
>> @@ -58,6 +58,7 @@ import io
>> import logging
>> import traceback
>> import sys
>> +import generators
>>
>> # Fetch config
>> path = os.path.dirname(os.path.realpath(__file__))
>> @@ -316,20 +317,14 @@ class Archiver(object):
>> if body is not None or attachments:
>> pmid = mid
>> try:
>> - # Use full message as bytes for mid?
>> if archiver_generator == "full":
>> - mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>> + mid = generators.full(msg, body, lid, attachments)
>> elif archiver_generator == "medium":
>> - xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> - xbody += bytes(lid, encoding='ascii')
>> - xbody += bytes(mdatestring, encoding='ascii')
>> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> - if attachments:
>> - for a in attachments:
>> - xbody += bytes(a['hash'], encoding = 'ascii')
>> + mid = generators.medium(msg, body, lid, attachments)
>> + elif archiver_generator == "redundant":
>> + mid = generators.redundant(msg, body, lid, attachments)
>> else:
>> - # Or revert to the old way?
>> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>> + mid = generators.legacy(msg, body, lid, attachments)
>> except Exception as err:
>> if logger:
>> logger.warn("Could not generate MID: %s" % err)
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/generators.py b/tools/generators.py
>> new file mode 100644
>> index 0000000..af566fc
>> --- /dev/null
>> +++ b/tools/generators.py
>> @@ -0,0 +1,74 @@
>> +#!/usr/bin/env/python3
>> +# -*- coding: utf-8 -*-
>> +# Licensed to the Apache Software Foundation (ASF) under one or more
>> +# contributor license agreements. See the NOTICE file distributed with
>> +# this work for additional information regarding copyright ownership.
>> +# The ASF licenses this file to You under the Apache License, Version 2.0
>> +# (the "License"); you may not use this file except in compliance with
>> +# the License. You may obtain a copy of the License at
>> +#
>> +# http://www.apache.org/licenses/LICENSE-2.0
>> +#
>> +# Unless required by applicable law or agreed to in writing, software
>> +# distributed under the License is distributed on an "AS IS" BASIS,
>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> +# See the License for the specific language governing permissions and
>> +# limitations under the License.
>> +
>> +"""
>> +This file contains the various ID generators for Pony Mail's archivers.
>> +"""
>> +
>> +import hashlib
>> +import email.utils
>> +
>> +# Full generator: uses the entire email (including sever-depenent data)
>> +# This is the recommended generator for single-node setups.
>> +def full(msg, body, lid, attachments):
>> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>> + return mid
>> +
>> +# Medium: Standard generator
>> +def medium(msg, body, lid, attachments):
>> + # Use text body
>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> + # Use List ID
>> + xbody += bytes(lid, encoding='ascii')
>> + # Use Date header
>> + xbody += bytes(mdatestring, encoding='ascii')
>> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> + return mid
>> +
>> +# Redundant: Use data that is guaranteed to be the same across redundant setups
>> +# This is the recommended generator for redundant cluster setups
>> +def redundant(msg, body, lid, attachments):
>> + # Use text body
>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> + # Use List ID
>> + xbody += bytes(lid, encoding='ascii')
>> + # Use Date header
>> + xbody += bytes(mdatestring, encoding='ascii')
>> + # Use sender
>> + sender = msg.get('from', None)
>> + if sender:
>> + xbody += bytes(sender, encoding = 'ascii')
>> + # Use subject
>> + if subject:
>> + xbody += bytes(subject, encoding = 'ascii')
>> + # Use attachment hashes if present
>> + if attachments:
>> + for a in attachments:
>> + xbody += bytes(a['hash'], encoding = 'ascii')
>> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> + return mid
>> +
>> +
>> +# Old school way of making IDs
>> +def legacy(msg, body, lid, attachments):
>
> -1
>
> AFAICT this is not exactly the same as the original code.
>
>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
>
> What happens if either of the previous two lines throws an error?
Good catch! Fixed in 2802e2905.
>
>> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>> + return mid
>> +
>> +
>> +
>>
Re: [2/4] incubator-ponymail git commit: split generators into a file
of its own
Posted by sebb <se...@gmail.com>.
On 4 June 2017 at 15:02, <hu...@apache.org> wrote:
> split generators into a file of its own
>
> Also fix up generators:
> - medium goes back to the way it was
> - a new 'redundant' generator for cluster setups
>
>
> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>
> Branch: refs/heads/master
> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
> Parents: 8b7ede8
> Author: Daniel Gruno <hu...@apache.org>
> Authored: Sun Jun 4 15:45:18 2017 +0200
> Committer: Daniel Gruno <hu...@apache.org>
> Committed: Sun Jun 4 15:45:18 2017 +0200
>
> ----------------------------------------------------------------------
> tools/archiver.py | 17 ++++-------
> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 80 insertions(+), 11 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
> ----------------------------------------------------------------------
> diff --git a/tools/archiver.py b/tools/archiver.py
> index 41933f7..0966b13 100755
> --- a/tools/archiver.py
> +++ b/tools/archiver.py
> @@ -58,6 +58,7 @@ import io
> import logging
> import traceback
> import sys
> +import generators
>
> # Fetch config
> path = os.path.dirname(os.path.realpath(__file__))
> @@ -316,20 +317,14 @@ class Archiver(object):
> if body is not None or attachments:
> pmid = mid
> try:
> - # Use full message as bytes for mid?
> if archiver_generator == "full":
> - mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
> + mid = generators.full(msg, body, lid, attachments)
> elif archiver_generator == "medium":
> - xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> - xbody += bytes(lid, encoding='ascii')
> - xbody += bytes(mdatestring, encoding='ascii')
> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> - if attachments:
> - for a in attachments:
> - xbody += bytes(a['hash'], encoding = 'ascii')
> + mid = generators.medium(msg, body, lid, attachments)
> + elif archiver_generator == "redundant":
> + mid = generators.redundant(msg, body, lid, attachments)
> else:
> - # Or revert to the old way?
> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
> + mid = generators.legacy(msg, body, lid, attachments)
> except Exception as err:
> if logger:
> logger.warn("Could not generate MID: %s" % err)
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
> ----------------------------------------------------------------------
> diff --git a/tools/generators.py b/tools/generators.py
> new file mode 100644
> index 0000000..af566fc
> --- /dev/null
> +++ b/tools/generators.py
> @@ -0,0 +1,74 @@
> +#!/usr/bin/env/python3
> +# -*- coding: utf-8 -*-
> +# Licensed to the Apache Software Foundation (ASF) under one or more
> +# contributor license agreements. See the NOTICE file distributed with
> +# this work for additional information regarding copyright ownership.
> +# The ASF licenses this file to You under the Apache License, Version 2.0
> +# (the "License"); you may not use this file except in compliance with
> +# the License. You may obtain a copy of the License at
> +#
> +# http://www.apache.org/licenses/LICENSE-2.0
> +#
> +# Unless required by applicable law or agreed to in writing, software
> +# distributed under the License is distributed on an "AS IS" BASIS,
> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> +# See the License for the specific language governing permissions and
> +# limitations under the License.
> +
> +"""
> +This file contains the various ID generators for Pony Mail's archivers.
> +"""
> +
> +import hashlib
> +import email.utils
> +
> +# Full generator: uses the entire email (including sever-depenent data)
> +# This is the recommended generator for single-node setups.
> +def full(msg, body, lid, attachments):
> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
> + return mid
> +
> +# Medium: Standard generator
> +def medium(msg, body, lid, attachments):
> + # Use text body
> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> + # Use List ID
> + xbody += bytes(lid, encoding='ascii')
> + # Use Date header
> + xbody += bytes(mdatestring, encoding='ascii')
> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> + return mid
> +
> +# Redundant: Use data that is guaranteed to be the same across redundant setups
> +# This is the recommended generator for redundant cluster setups
> +def redundant(msg, body, lid, attachments):
> + # Use text body
> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> + # Use List ID
> + xbody += bytes(lid, encoding='ascii')
> + # Use Date header
> + xbody += bytes(mdatestring, encoding='ascii')
> + # Use sender
> + sender = msg.get('from', None)
> + if sender:
> + xbody += bytes(sender, encoding = 'ascii')
> + # Use subject
> + if subject:
> + xbody += bytes(subject, encoding = 'ascii')
> + # Use attachment hashes if present
> + if attachments:
> + for a in attachments:
> + xbody += bytes(a['hash'], encoding = 'ascii')
> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> + return mid
> +
> +
> +# Old school way of making IDs
> +def legacy(msg, body, lid, attachments):
-1
AFAICT this is not exactly the same as the original code.
> + mdate = email.utils.parsedate_tz(msg.get('date'))
> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
What happens if either of the previous two lines throws an error?
> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
> + return mid
> +
> +
> +
>
Re: [2/4] incubator-ponymail git commit: split generators into a file
of its own
Posted by sebb <se...@gmail.com>.
On 4 June 2017 at 23:16, Daniel Gruno <hu...@apache.org> wrote:
> On 06/05/2017 12:07 AM, sebb wrote:
>> On 4 June 2017 at 15:02, <hu...@apache.org> wrote:
>>> split generators into a file of its own
>>>
>>> Also fix up generators:
>>> - medium goes back to the way it was
>>
>> -1
>>
>> This is a very confusing change.
>>
>> The change to the medium generator should be reverted as a separate
>> commit, and the other changes added separately
>
> How would I go about dealing with that? I understand your objection to
> the commit style here, and I agree it should have been two separate
> commits, but I'm not sure I know how to rework that now.
Dunno, I'm not a Git expert.
>>
>>> - a new 'redundant' generator for cluster setups
>>>
>>>
>>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>>> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
>>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
>>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>>>
>>> Branch: refs/heads/master
>>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
>>> Parents: 8b7ede8
>>> Author: Daniel Gruno <hu...@apache.org>
>>> Authored: Sun Jun 4 15:45:18 2017 +0200
>>> Committer: Daniel Gruno <hu...@apache.org>
>>> Committed: Sun Jun 4 15:45:18 2017 +0200
>>>
>>> ----------------------------------------------------------------------
>>> tools/archiver.py | 17 ++++-------
>>> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 80 insertions(+), 11 deletions(-)
>>> ----------------------------------------------------------------------
>>>
>>>
>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
>>> ----------------------------------------------------------------------
>>> diff --git a/tools/archiver.py b/tools/archiver.py
>>> index 41933f7..0966b13 100755
>>> --- a/tools/archiver.py
>>> +++ b/tools/archiver.py
>>> @@ -58,6 +58,7 @@ import io
>>> import logging
>>> import traceback
>>> import sys
>>> +import generators
>>>
>>> # Fetch config
>>> path = os.path.dirname(os.path.realpath(__file__))
>>> @@ -316,20 +317,14 @@ class Archiver(object):
>>> if body is not None or attachments:
>>> pmid = mid
>>> try:
>>> - # Use full message as bytes for mid?
>>> if archiver_generator == "full":
>>> - mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>> + mid = generators.full(msg, body, lid, attachments)
>>> elif archiver_generator == "medium":
>>> - xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>> - xbody += bytes(lid, encoding='ascii')
>>> - xbody += bytes(mdatestring, encoding='ascii')
>>> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>> - if attachments:
>>> - for a in attachments:
>>> - xbody += bytes(a['hash'], encoding = 'ascii')
>>> + mid = generators.medium(msg, body, lid, attachments)
>>> + elif archiver_generator == "redundant":
>>> + mid = generators.redundant(msg, body, lid, attachments)
>>> else:
>>> - # Or revert to the old way?
>>> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>> + mid = generators.legacy(msg, body, lid, attachments)
>>> except Exception as err:
>>> if logger:
>>> logger.warn("Could not generate MID: %s" % err)
>>>
>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
>>> ----------------------------------------------------------------------
>>> diff --git a/tools/generators.py b/tools/generators.py
>>> new file mode 100644
>>> index 0000000..af566fc
>>> --- /dev/null
>>> +++ b/tools/generators.py
>>> @@ -0,0 +1,74 @@
>>> +#!/usr/bin/env/python3
>>> +# -*- coding: utf-8 -*-
>>> +# Licensed to the Apache Software Foundation (ASF) under one or more
>>> +# contributor license agreements. See the NOTICE file distributed with
>>> +# this work for additional information regarding copyright ownership.
>>> +# The ASF licenses this file to You under the Apache License, Version 2.0
>>> +# (the "License"); you may not use this file except in compliance with
>>> +# the License. You may obtain a copy of the License at
>>> +#
>>> +# http://www.apache.org/licenses/LICENSE-2.0
>>> +#
>>> +# Unless required by applicable law or agreed to in writing, software
>>> +# distributed under the License is distributed on an "AS IS" BASIS,
>>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>>> +# See the License for the specific language governing permissions and
>>> +# limitations under the License.
>>> +
>>> +"""
>>> +This file contains the various ID generators for Pony Mail's archivers.
>>> +"""
>>> +
>>> +import hashlib
>>> +import email.utils
>>> +
>>> +# Full generator: uses the entire email (including sever-depenent data)
>>> +# This is the recommended generator for single-node setups.
>>> +def full(msg, body, lid, attachments):
>>> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>> + return mid
>>> +
>>> +# Medium: Standard generator
>>> +def medium(msg, body, lid, attachments):
>>> + # Use text body
>>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>> + # Use List ID
>>> + xbody += bytes(lid, encoding='ascii')
>>> + # Use Date header
>>> + xbody += bytes(mdatestring, encoding='ascii')
>>> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>> + return mid
>>> +
>>> +# Redundant: Use data that is guaranteed to be the same across redundant setups
>>> +# This is the recommended generator for redundant cluster setups
>>> +def redundant(msg, body, lid, attachments):
>>> + # Use text body
>>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>> + # Use List ID
>>> + xbody += bytes(lid, encoding='ascii')
>>> + # Use Date header
>>> + xbody += bytes(mdatestring, encoding='ascii')
>>> + # Use sender
>>> + sender = msg.get('from', None)
>>> + if sender:
>>> + xbody += bytes(sender, encoding = 'ascii')
>>> + # Use subject
>>> + if subject:
>>> + xbody += bytes(subject, encoding = 'ascii')
>>> + # Use attachment hashes if present
>>> + if attachments:
>>> + for a in attachments:
>>> + xbody += bytes(a['hash'], encoding = 'ascii')
>>> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>> + return mid
>>> +
>>> +
>>> +# Old school way of making IDs
>>> +def legacy(msg, body, lid, attachments):
>>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>>> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
>>> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>> + return mid
>>> +
>>> +
>>> +
>>>
>
Re: [2/4] incubator-ponymail git commit: split generators into a file
of its own
Posted by Daniel Gruno <hu...@apache.org>.
On 06/05/2017 12:07 AM, sebb wrote:
> On 4 June 2017 at 15:02, <hu...@apache.org> wrote:
>> split generators into a file of its own
>>
>> Also fix up generators:
>> - medium goes back to the way it was
>
> -1
>
> This is a very confusing change.
>
> The change to the medium generator should be reverted as a separate
> commit, and the other changes added separately
How would I go about dealing with that? I understand your objection to
the commit style here, and I agree it should have been two separate
commits, but I'm not sure I know how to rework that now.
>
>> - a new 'redundant' generator for cluster setups
>>
>>
>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>>
>> Branch: refs/heads/master
>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
>> Parents: 8b7ede8
>> Author: Daniel Gruno <hu...@apache.org>
>> Authored: Sun Jun 4 15:45:18 2017 +0200
>> Committer: Daniel Gruno <hu...@apache.org>
>> Committed: Sun Jun 4 15:45:18 2017 +0200
>>
>> ----------------------------------------------------------------------
>> tools/archiver.py | 17 ++++-------
>> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 80 insertions(+), 11 deletions(-)
>> ----------------------------------------------------------------------
>>
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/archiver.py b/tools/archiver.py
>> index 41933f7..0966b13 100755
>> --- a/tools/archiver.py
>> +++ b/tools/archiver.py
>> @@ -58,6 +58,7 @@ import io
>> import logging
>> import traceback
>> import sys
>> +import generators
>>
>> # Fetch config
>> path = os.path.dirname(os.path.realpath(__file__))
>> @@ -316,20 +317,14 @@ class Archiver(object):
>> if body is not None or attachments:
>> pmid = mid
>> try:
>> - # Use full message as bytes for mid?
>> if archiver_generator == "full":
>> - mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>> + mid = generators.full(msg, body, lid, attachments)
>> elif archiver_generator == "medium":
>> - xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> - xbody += bytes(lid, encoding='ascii')
>> - xbody += bytes(mdatestring, encoding='ascii')
>> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> - if attachments:
>> - for a in attachments:
>> - xbody += bytes(a['hash'], encoding = 'ascii')
>> + mid = generators.medium(msg, body, lid, attachments)
>> + elif archiver_generator == "redundant":
>> + mid = generators.redundant(msg, body, lid, attachments)
>> else:
>> - # Or revert to the old way?
>> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>> + mid = generators.legacy(msg, body, lid, attachments)
>> except Exception as err:
>> if logger:
>> logger.warn("Could not generate MID: %s" % err)
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/generators.py b/tools/generators.py
>> new file mode 100644
>> index 0000000..af566fc
>> --- /dev/null
>> +++ b/tools/generators.py
>> @@ -0,0 +1,74 @@
>> +#!/usr/bin/env/python3
>> +# -*- coding: utf-8 -*-
>> +# Licensed to the Apache Software Foundation (ASF) under one or more
>> +# contributor license agreements. See the NOTICE file distributed with
>> +# this work for additional information regarding copyright ownership.
>> +# The ASF licenses this file to You under the Apache License, Version 2.0
>> +# (the "License"); you may not use this file except in compliance with
>> +# the License. You may obtain a copy of the License at
>> +#
>> +# http://www.apache.org/licenses/LICENSE-2.0
>> +#
>> +# Unless required by applicable law or agreed to in writing, software
>> +# distributed under the License is distributed on an "AS IS" BASIS,
>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> +# See the License for the specific language governing permissions and
>> +# limitations under the License.
>> +
>> +"""
>> +This file contains the various ID generators for Pony Mail's archivers.
>> +"""
>> +
>> +import hashlib
>> +import email.utils
>> +
>> +# Full generator: uses the entire email (including sever-depenent data)
>> +# This is the recommended generator for single-node setups.
>> +def full(msg, body, lid, attachments):
>> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>> + return mid
>> +
>> +# Medium: Standard generator
>> +def medium(msg, body, lid, attachments):
>> + # Use text body
>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> + # Use List ID
>> + xbody += bytes(lid, encoding='ascii')
>> + # Use Date header
>> + xbody += bytes(mdatestring, encoding='ascii')
>> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> + return mid
>> +
>> +# Redundant: Use data that is guaranteed to be the same across redundant setups
>> +# This is the recommended generator for redundant cluster setups
>> +def redundant(msg, body, lid, attachments):
>> + # Use text body
>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> + # Use List ID
>> + xbody += bytes(lid, encoding='ascii')
>> + # Use Date header
>> + xbody += bytes(mdatestring, encoding='ascii')
>> + # Use sender
>> + sender = msg.get('from', None)
>> + if sender:
>> + xbody += bytes(sender, encoding = 'ascii')
>> + # Use subject
>> + if subject:
>> + xbody += bytes(subject, encoding = 'ascii')
>> + # Use attachment hashes if present
>> + if attachments:
>> + for a in attachments:
>> + xbody += bytes(a['hash'], encoding = 'ascii')
>> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> + return mid
>> +
>> +
>> +# Old school way of making IDs
>> +def legacy(msg, body, lid, attachments):
>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
>> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>> + return mid
>> +
>> +
>> +
>>
Re: [2/4] incubator-ponymail git commit: split generators into a file
of its own
Posted by sebb <se...@gmail.com>.
On 4 June 2017 at 15:02, <hu...@apache.org> wrote:
> split generators into a file of its own
>
> Also fix up generators:
> - medium goes back to the way it was
-1
This is a very confusing change.
The change to the medium generator should be reverted as a separate
commit, and the other changes added separately
> - a new 'redundant' generator for cluster setups
>
>
> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>
> Branch: refs/heads/master
> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
> Parents: 8b7ede8
> Author: Daniel Gruno <hu...@apache.org>
> Authored: Sun Jun 4 15:45:18 2017 +0200
> Committer: Daniel Gruno <hu...@apache.org>
> Committed: Sun Jun 4 15:45:18 2017 +0200
>
> ----------------------------------------------------------------------
> tools/archiver.py | 17 ++++-------
> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 80 insertions(+), 11 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
> ----------------------------------------------------------------------
> diff --git a/tools/archiver.py b/tools/archiver.py
> index 41933f7..0966b13 100755
> --- a/tools/archiver.py
> +++ b/tools/archiver.py
> @@ -58,6 +58,7 @@ import io
> import logging
> import traceback
> import sys
> +import generators
>
> # Fetch config
> path = os.path.dirname(os.path.realpath(__file__))
> @@ -316,20 +317,14 @@ class Archiver(object):
> if body is not None or attachments:
> pmid = mid
> try:
> - # Use full message as bytes for mid?
> if archiver_generator == "full":
> - mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
> + mid = generators.full(msg, body, lid, attachments)
> elif archiver_generator == "medium":
> - xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> - xbody += bytes(lid, encoding='ascii')
> - xbody += bytes(mdatestring, encoding='ascii')
> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> - if attachments:
> - for a in attachments:
> - xbody += bytes(a['hash'], encoding = 'ascii')
> + mid = generators.medium(msg, body, lid, attachments)
> + elif archiver_generator == "redundant":
> + mid = generators.redundant(msg, body, lid, attachments)
> else:
> - # Or revert to the old way?
> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
> + mid = generators.legacy(msg, body, lid, attachments)
> except Exception as err:
> if logger:
> logger.warn("Could not generate MID: %s" % err)
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
> ----------------------------------------------------------------------
> diff --git a/tools/generators.py b/tools/generators.py
> new file mode 100644
> index 0000000..af566fc
> --- /dev/null
> +++ b/tools/generators.py
> @@ -0,0 +1,74 @@
> +#!/usr/bin/env/python3
> +# -*- coding: utf-8 -*-
> +# Licensed to the Apache Software Foundation (ASF) under one or more
> +# contributor license agreements. See the NOTICE file distributed with
> +# this work for additional information regarding copyright ownership.
> +# The ASF licenses this file to You under the Apache License, Version 2.0
> +# (the "License"); you may not use this file except in compliance with
> +# the License. You may obtain a copy of the License at
> +#
> +# http://www.apache.org/licenses/LICENSE-2.0
> +#
> +# Unless required by applicable law or agreed to in writing, software
> +# distributed under the License is distributed on an "AS IS" BASIS,
> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> +# See the License for the specific language governing permissions and
> +# limitations under the License.
> +
> +"""
> +This file contains the various ID generators for Pony Mail's archivers.
> +"""
> +
> +import hashlib
> +import email.utils
> +
> +# Full generator: uses the entire email (including sever-depenent data)
> +# This is the recommended generator for single-node setups.
> +def full(msg, body, lid, attachments):
> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
> + return mid
> +
> +# Medium: Standard generator
> +def medium(msg, body, lid, attachments):
> + # Use text body
> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> + # Use List ID
> + xbody += bytes(lid, encoding='ascii')
> + # Use Date header
> + xbody += bytes(mdatestring, encoding='ascii')
> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> + return mid
> +
> +# Redundant: Use data that is guaranteed to be the same across redundant setups
> +# This is the recommended generator for redundant cluster setups
> +def redundant(msg, body, lid, attachments):
> + # Use text body
> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> + # Use List ID
> + xbody += bytes(lid, encoding='ascii')
> + # Use Date header
> + xbody += bytes(mdatestring, encoding='ascii')
> + # Use sender
> + sender = msg.get('from', None)
> + if sender:
> + xbody += bytes(sender, encoding = 'ascii')
> + # Use subject
> + if subject:
> + xbody += bytes(subject, encoding = 'ascii')
> + # Use attachment hashes if present
> + if attachments:
> + for a in attachments:
> + xbody += bytes(a['hash'], encoding = 'ascii')
> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> + return mid
> +
> +
> +# Old school way of making IDs
> +def legacy(msg, body, lid, attachments):
> + mdate = email.utils.parsedate_tz(msg.get('date'))
> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
> + return mid
> +
> +
> +
>
Re: [2/4] incubator-ponymail git commit: split generators into a file
of its own
Posted by sebb <se...@gmail.com>.
On 5 June 2017 at 10:04, Daniel Gruno <hu...@apache.org> wrote:
> On 06/05/2017 11:01 AM, sebb wrote:
>> On 5 June 2017 at 09:17, Daniel Gruno <hu...@apache.org> wrote:
>>> I missed a git add in the last commit, sorry. Will add and recommit now.
>>
>> Have you tested that the change is complete?
>>
>> I'm still getting an error.
>
> Tested it with a bunch of mbox files, some with, some without headers,
> subjects, senders etc. All seemed to work.
>
> What is the specific error you are getting, and which generator are you
> using?
I am using the medium generator.
NameError: name 'msg_metadata' is not defined
>>
>>> On 06/05/2017 01:57 AM, sebb wrote:
>>>> On 4 June 2017 at 15:02, <hu...@apache.org> wrote:
>>>>> split generators into a file of its own
>>>>>
>>>>> Also fix up generators:
>>>>> - medium goes back to the way it was
>>>>> - a new 'redundant' generator for cluster setups
>>>>>
>>>>>
>>>>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>>>>> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
>>>>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
>>>>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>>>>>
>>>>> Branch: refs/heads/master
>>>>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
>>>>> Parents: 8b7ede8
>>>>> Author: Daniel Gruno <hu...@apache.org>
>>>>> Authored: Sun Jun 4 15:45:18 2017 +0200
>>>>> Committer: Daniel Gruno <hu...@apache.org>
>>>>> Committed: Sun Jun 4 15:45:18 2017 +0200
>>>>>
>>>>> ----------------------------------------------------------------------
>>>>> tools/archiver.py | 17 ++++-------
>>>>> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>> 2 files changed, 80 insertions(+), 11 deletions(-)
>>>>> ----------------------------------------------------------------------
>>>>>
>>>>>
>>>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
>>>>> ----------------------------------------------------------------------
>>>>> diff --git a/tools/archiver.py b/tools/archiver.py
>>>>> index 41933f7..0966b13 100755
>>>>> --- a/tools/archiver.py
>>>>> +++ b/tools/archiver.py
>>>>> @@ -58,6 +58,7 @@ import io
>>>>> import logging
>>>>> import traceback
>>>>> import sys
>>>>> +import generators
>>>>>
>>>>> # Fetch config
>>>>> path = os.path.dirname(os.path.realpath(__file__))
>>>>> @@ -316,20 +317,14 @@ class Archiver(object):
>>>>> if body is not None or attachments:
>>>>> pmid = mid
>>>>> try:
>>>>> - # Use full message as bytes for mid?
>>>>> if archiver_generator == "full":
>>>>> - mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>>>> + mid = generators.full(msg, body, lid, attachments)
>>>>> elif archiver_generator == "medium":
>>>>> - xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>>>> - xbody += bytes(lid, encoding='ascii')
>>>>> - xbody += bytes(mdatestring, encoding='ascii')
>>>>> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>>>> - if attachments:
>>>>> - for a in attachments:
>>>>> - xbody += bytes(a['hash'], encoding = 'ascii')
>>>>> + mid = generators.medium(msg, body, lid, attachments)
>>>>> + elif archiver_generator == "redundant":
>>>>> + mid = generators.redundant(msg, body, lid, attachments)
>>>>> else:
>>>>> - # Or revert to the old way?
>>>>> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>>>> + mid = generators.legacy(msg, body, lid, attachments)
>>>>> except Exception as err:
>>>>> if logger:
>>>>> logger.warn("Could not generate MID: %s" % err)
>>>>>
>>>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
>>>>> ----------------------------------------------------------------------
>>>>> diff --git a/tools/generators.py b/tools/generators.py
>>>>> new file mode 100644
>>>>> index 0000000..af566fc
>>>>> --- /dev/null
>>>>> +++ b/tools/generators.py
>>>>> @@ -0,0 +1,74 @@
>>>>> +#!/usr/bin/env/python3
>>>>> +# -*- coding: utf-8 -*-
>>>>> +# Licensed to the Apache Software Foundation (ASF) under one or more
>>>>> +# contributor license agreements. See the NOTICE file distributed with
>>>>> +# this work for additional information regarding copyright ownership.
>>>>> +# The ASF licenses this file to You under the Apache License, Version 2.0
>>>>> +# (the "License"); you may not use this file except in compliance with
>>>>> +# the License. You may obtain a copy of the License at
>>>>> +#
>>>>> +# http://www.apache.org/licenses/LICENSE-2.0
>>>>> +#
>>>>> +# Unless required by applicable law or agreed to in writing, software
>>>>> +# distributed under the License is distributed on an "AS IS" BASIS,
>>>>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>>>>> +# See the License for the specific language governing permissions and
>>>>> +# limitations under the License.
>>>>> +
>>>>> +"""
>>>>> +This file contains the various ID generators for Pony Mail's archivers.
>>>>> +"""
>>>>> +
>>>>> +import hashlib
>>>>> +import email.utils
>>>>> +
>>>>> +# Full generator: uses the entire email (including sever-depenent data)
>>>>> +# This is the recommended generator for single-node setups.
>>>>> +def full(msg, body, lid, attachments):
>>>>> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>>>> + return mid
>>>>> +
>>>>> +# Medium: Standard generator
>>>>> +def medium(msg, body, lid, attachments):
>>>>> + # Use text body
>>>>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>>>> + # Use List ID
>>>>> + xbody += bytes(lid, encoding='ascii')
>>>>> + # Use Date header
>>>>> + xbody += bytes(mdatestring, encoding='ascii')
>>>>
>>>> mdatestring is not defined
>>>>
>>>>> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>>>> + return mid
>>>>> +
>>>>> +# Redundant: Use data that is guaranteed to be the same across redundant setups
>>>>> +# This is the recommended generator for redundant cluster setups
>>>>> +def redundant(msg, body, lid, attachments):
>>>>> + # Use text body
>>>>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>>>> + # Use List ID
>>>>> + xbody += bytes(lid, encoding='ascii')
>>>>> + # Use Date header
>>>>> + xbody += bytes(mdatestring, encoding='ascii')
>>>>
>>>> mdatestring is not defined
>>>>
>>>>> + # Use sender
>>>>> + sender = msg.get('from', None)
>>>>> + if sender:
>>>>> + xbody += bytes(sender, encoding = 'ascii')
>>>>> + # Use subject
>>>>> + if subject:
>>>>> + xbody += bytes(subject, encoding = 'ascii')
>>>>> + # Use attachment hashes if present
>>>>> + if attachments:
>>>>> + for a in attachments:
>>>>> + xbody += bytes(a['hash'], encoding = 'ascii')
>>>>> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>>>> + return mid
>>>>> +
>>>>> +
>>>>> +# Old school way of making IDs
>>>>> +def legacy(msg, body, lid, attachments):
>>>>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>>>>> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
>>>>> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>>>> + return mid
>>>>> +
>>>>> +
>>>>> +
>>>>
>>>> Have the generators been tested?
>>>>
>>>
>
Re: [2/4] incubator-ponymail git commit: split generators into a file
of its own
Posted by Daniel Gruno <hu...@apache.org>.
On 06/05/2017 11:01 AM, sebb wrote:
> On 5 June 2017 at 09:17, Daniel Gruno <hu...@apache.org> wrote:
>> I missed a git add in the last commit, sorry. Will add and recommit now.
>
> Have you tested that the change is complete?
>
> I'm still getting an error.
Tested it with a bunch of mbox files, some with, some without headers,
subjects, senders etc. All seemed to work.
What is the specific error you are getting, and which generator are you
using?
>
>> On 06/05/2017 01:57 AM, sebb wrote:
>>> On 4 June 2017 at 15:02, <hu...@apache.org> wrote:
>>>> split generators into a file of its own
>>>>
>>>> Also fix up generators:
>>>> - medium goes back to the way it was
>>>> - a new 'redundant' generator for cluster setups
>>>>
>>>>
>>>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>>>> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
>>>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
>>>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>>>>
>>>> Branch: refs/heads/master
>>>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
>>>> Parents: 8b7ede8
>>>> Author: Daniel Gruno <hu...@apache.org>
>>>> Authored: Sun Jun 4 15:45:18 2017 +0200
>>>> Committer: Daniel Gruno <hu...@apache.org>
>>>> Committed: Sun Jun 4 15:45:18 2017 +0200
>>>>
>>>> ----------------------------------------------------------------------
>>>> tools/archiver.py | 17 ++++-------
>>>> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>> 2 files changed, 80 insertions(+), 11 deletions(-)
>>>> ----------------------------------------------------------------------
>>>>
>>>>
>>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
>>>> ----------------------------------------------------------------------
>>>> diff --git a/tools/archiver.py b/tools/archiver.py
>>>> index 41933f7..0966b13 100755
>>>> --- a/tools/archiver.py
>>>> +++ b/tools/archiver.py
>>>> @@ -58,6 +58,7 @@ import io
>>>> import logging
>>>> import traceback
>>>> import sys
>>>> +import generators
>>>>
>>>> # Fetch config
>>>> path = os.path.dirname(os.path.realpath(__file__))
>>>> @@ -316,20 +317,14 @@ class Archiver(object):
>>>> if body is not None or attachments:
>>>> pmid = mid
>>>> try:
>>>> - # Use full message as bytes for mid?
>>>> if archiver_generator == "full":
>>>> - mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>>> + mid = generators.full(msg, body, lid, attachments)
>>>> elif archiver_generator == "medium":
>>>> - xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>>> - xbody += bytes(lid, encoding='ascii')
>>>> - xbody += bytes(mdatestring, encoding='ascii')
>>>> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>>> - if attachments:
>>>> - for a in attachments:
>>>> - xbody += bytes(a['hash'], encoding = 'ascii')
>>>> + mid = generators.medium(msg, body, lid, attachments)
>>>> + elif archiver_generator == "redundant":
>>>> + mid = generators.redundant(msg, body, lid, attachments)
>>>> else:
>>>> - # Or revert to the old way?
>>>> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>>> + mid = generators.legacy(msg, body, lid, attachments)
>>>> except Exception as err:
>>>> if logger:
>>>> logger.warn("Could not generate MID: %s" % err)
>>>>
>>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
>>>> ----------------------------------------------------------------------
>>>> diff --git a/tools/generators.py b/tools/generators.py
>>>> new file mode 100644
>>>> index 0000000..af566fc
>>>> --- /dev/null
>>>> +++ b/tools/generators.py
>>>> @@ -0,0 +1,74 @@
>>>> +#!/usr/bin/env/python3
>>>> +# -*- coding: utf-8 -*-
>>>> +# Licensed to the Apache Software Foundation (ASF) under one or more
>>>> +# contributor license agreements. See the NOTICE file distributed with
>>>> +# this work for additional information regarding copyright ownership.
>>>> +# The ASF licenses this file to You under the Apache License, Version 2.0
>>>> +# (the "License"); you may not use this file except in compliance with
>>>> +# the License. You may obtain a copy of the License at
>>>> +#
>>>> +# http://www.apache.org/licenses/LICENSE-2.0
>>>> +#
>>>> +# Unless required by applicable law or agreed to in writing, software
>>>> +# distributed under the License is distributed on an "AS IS" BASIS,
>>>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>>>> +# See the License for the specific language governing permissions and
>>>> +# limitations under the License.
>>>> +
>>>> +"""
>>>> +This file contains the various ID generators for Pony Mail's archivers.
>>>> +"""
>>>> +
>>>> +import hashlib
>>>> +import email.utils
>>>> +
>>>> +# Full generator: uses the entire email (including sever-depenent data)
>>>> +# This is the recommended generator for single-node setups.
>>>> +def full(msg, body, lid, attachments):
>>>> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>>> + return mid
>>>> +
>>>> +# Medium: Standard generator
>>>> +def medium(msg, body, lid, attachments):
>>>> + # Use text body
>>>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>>> + # Use List ID
>>>> + xbody += bytes(lid, encoding='ascii')
>>>> + # Use Date header
>>>> + xbody += bytes(mdatestring, encoding='ascii')
>>>
>>> mdatestring is not defined
>>>
>>>> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>>> + return mid
>>>> +
>>>> +# Redundant: Use data that is guaranteed to be the same across redundant setups
>>>> +# This is the recommended generator for redundant cluster setups
>>>> +def redundant(msg, body, lid, attachments):
>>>> + # Use text body
>>>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>>> + # Use List ID
>>>> + xbody += bytes(lid, encoding='ascii')
>>>> + # Use Date header
>>>> + xbody += bytes(mdatestring, encoding='ascii')
>>>
>>> mdatestring is not defined
>>>
>>>> + # Use sender
>>>> + sender = msg.get('from', None)
>>>> + if sender:
>>>> + xbody += bytes(sender, encoding = 'ascii')
>>>> + # Use subject
>>>> + if subject:
>>>> + xbody += bytes(subject, encoding = 'ascii')
>>>> + # Use attachment hashes if present
>>>> + if attachments:
>>>> + for a in attachments:
>>>> + xbody += bytes(a['hash'], encoding = 'ascii')
>>>> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>>> + return mid
>>>> +
>>>> +
>>>> +# Old school way of making IDs
>>>> +def legacy(msg, body, lid, attachments):
>>>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>>>> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
>>>> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>>> + return mid
>>>> +
>>>> +
>>>> +
>>>
>>> Have the generators been tested?
>>>
>>
Re: [2/4] incubator-ponymail git commit: split generators into a file
of its own
Posted by sebb <se...@gmail.com>.
On 5 June 2017 at 09:17, Daniel Gruno <hu...@apache.org> wrote:
> I missed a git add in the last commit, sorry. Will add and recommit now.
Have you tested that the change is complete?
I'm still getting an error.
> On 06/05/2017 01:57 AM, sebb wrote:
>> On 4 June 2017 at 15:02, <hu...@apache.org> wrote:
>>> split generators into a file of its own
>>>
>>> Also fix up generators:
>>> - medium goes back to the way it was
>>> - a new 'redundant' generator for cluster setups
>>>
>>>
>>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>>> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
>>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
>>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>>>
>>> Branch: refs/heads/master
>>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
>>> Parents: 8b7ede8
>>> Author: Daniel Gruno <hu...@apache.org>
>>> Authored: Sun Jun 4 15:45:18 2017 +0200
>>> Committer: Daniel Gruno <hu...@apache.org>
>>> Committed: Sun Jun 4 15:45:18 2017 +0200
>>>
>>> ----------------------------------------------------------------------
>>> tools/archiver.py | 17 ++++-------
>>> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 80 insertions(+), 11 deletions(-)
>>> ----------------------------------------------------------------------
>>>
>>>
>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
>>> ----------------------------------------------------------------------
>>> diff --git a/tools/archiver.py b/tools/archiver.py
>>> index 41933f7..0966b13 100755
>>> --- a/tools/archiver.py
>>> +++ b/tools/archiver.py
>>> @@ -58,6 +58,7 @@ import io
>>> import logging
>>> import traceback
>>> import sys
>>> +import generators
>>>
>>> # Fetch config
>>> path = os.path.dirname(os.path.realpath(__file__))
>>> @@ -316,20 +317,14 @@ class Archiver(object):
>>> if body is not None or attachments:
>>> pmid = mid
>>> try:
>>> - # Use full message as bytes for mid?
>>> if archiver_generator == "full":
>>> - mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>> + mid = generators.full(msg, body, lid, attachments)
>>> elif archiver_generator == "medium":
>>> - xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>> - xbody += bytes(lid, encoding='ascii')
>>> - xbody += bytes(mdatestring, encoding='ascii')
>>> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>> - if attachments:
>>> - for a in attachments:
>>> - xbody += bytes(a['hash'], encoding = 'ascii')
>>> + mid = generators.medium(msg, body, lid, attachments)
>>> + elif archiver_generator == "redundant":
>>> + mid = generators.redundant(msg, body, lid, attachments)
>>> else:
>>> - # Or revert to the old way?
>>> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>> + mid = generators.legacy(msg, body, lid, attachments)
>>> except Exception as err:
>>> if logger:
>>> logger.warn("Could not generate MID: %s" % err)
>>>
>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
>>> ----------------------------------------------------------------------
>>> diff --git a/tools/generators.py b/tools/generators.py
>>> new file mode 100644
>>> index 0000000..af566fc
>>> --- /dev/null
>>> +++ b/tools/generators.py
>>> @@ -0,0 +1,74 @@
>>> +#!/usr/bin/env/python3
>>> +# -*- coding: utf-8 -*-
>>> +# Licensed to the Apache Software Foundation (ASF) under one or more
>>> +# contributor license agreements. See the NOTICE file distributed with
>>> +# this work for additional information regarding copyright ownership.
>>> +# The ASF licenses this file to You under the Apache License, Version 2.0
>>> +# (the "License"); you may not use this file except in compliance with
>>> +# the License. You may obtain a copy of the License at
>>> +#
>>> +# http://www.apache.org/licenses/LICENSE-2.0
>>> +#
>>> +# Unless required by applicable law or agreed to in writing, software
>>> +# distributed under the License is distributed on an "AS IS" BASIS,
>>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>>> +# See the License for the specific language governing permissions and
>>> +# limitations under the License.
>>> +
>>> +"""
>>> +This file contains the various ID generators for Pony Mail's archivers.
>>> +"""
>>> +
>>> +import hashlib
>>> +import email.utils
>>> +
>>> +# Full generator: uses the entire email (including sever-depenent data)
>>> +# This is the recommended generator for single-node setups.
>>> +def full(msg, body, lid, attachments):
>>> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>> + return mid
>>> +
>>> +# Medium: Standard generator
>>> +def medium(msg, body, lid, attachments):
>>> + # Use text body
>>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>> + # Use List ID
>>> + xbody += bytes(lid, encoding='ascii')
>>> + # Use Date header
>>> + xbody += bytes(mdatestring, encoding='ascii')
>>
>> mdatestring is not defined
>>
>>> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>> + return mid
>>> +
>>> +# Redundant: Use data that is guaranteed to be the same across redundant setups
>>> +# This is the recommended generator for redundant cluster setups
>>> +def redundant(msg, body, lid, attachments):
>>> + # Use text body
>>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>> + # Use List ID
>>> + xbody += bytes(lid, encoding='ascii')
>>> + # Use Date header
>>> + xbody += bytes(mdatestring, encoding='ascii')
>>
>> mdatestring is not defined
>>
>>> + # Use sender
>>> + sender = msg.get('from', None)
>>> + if sender:
>>> + xbody += bytes(sender, encoding = 'ascii')
>>> + # Use subject
>>> + if subject:
>>> + xbody += bytes(subject, encoding = 'ascii')
>>> + # Use attachment hashes if present
>>> + if attachments:
>>> + for a in attachments:
>>> + xbody += bytes(a['hash'], encoding = 'ascii')
>>> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>> + return mid
>>> +
>>> +
>>> +# Old school way of making IDs
>>> +def legacy(msg, body, lid, attachments):
>>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>>> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
>>> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>> + return mid
>>> +
>>> +
>>> +
>>
>> Have the generators been tested?
>>
>
Re: [2/4] incubator-ponymail git commit: split generators into a file
of its own
Posted by Daniel Gruno <hu...@apache.org>.
I missed a git add in the last commit, sorry. Will add and recommit now.
On 06/05/2017 01:57 AM, sebb wrote:
> On 4 June 2017 at 15:02, <hu...@apache.org> wrote:
>> split generators into a file of its own
>>
>> Also fix up generators:
>> - medium goes back to the way it was
>> - a new 'redundant' generator for cluster setups
>>
>>
>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>>
>> Branch: refs/heads/master
>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
>> Parents: 8b7ede8
>> Author: Daniel Gruno <hu...@apache.org>
>> Authored: Sun Jun 4 15:45:18 2017 +0200
>> Committer: Daniel Gruno <hu...@apache.org>
>> Committed: Sun Jun 4 15:45:18 2017 +0200
>>
>> ----------------------------------------------------------------------
>> tools/archiver.py | 17 ++++-------
>> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 80 insertions(+), 11 deletions(-)
>> ----------------------------------------------------------------------
>>
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/archiver.py b/tools/archiver.py
>> index 41933f7..0966b13 100755
>> --- a/tools/archiver.py
>> +++ b/tools/archiver.py
>> @@ -58,6 +58,7 @@ import io
>> import logging
>> import traceback
>> import sys
>> +import generators
>>
>> # Fetch config
>> path = os.path.dirname(os.path.realpath(__file__))
>> @@ -316,20 +317,14 @@ class Archiver(object):
>> if body is not None or attachments:
>> pmid = mid
>> try:
>> - # Use full message as bytes for mid?
>> if archiver_generator == "full":
>> - mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>> + mid = generators.full(msg, body, lid, attachments)
>> elif archiver_generator == "medium":
>> - xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> - xbody += bytes(lid, encoding='ascii')
>> - xbody += bytes(mdatestring, encoding='ascii')
>> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> - if attachments:
>> - for a in attachments:
>> - xbody += bytes(a['hash'], encoding = 'ascii')
>> + mid = generators.medium(msg, body, lid, attachments)
>> + elif archiver_generator == "redundant":
>> + mid = generators.redundant(msg, body, lid, attachments)
>> else:
>> - # Or revert to the old way?
>> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>> + mid = generators.legacy(msg, body, lid, attachments)
>> except Exception as err:
>> if logger:
>> logger.warn("Could not generate MID: %s" % err)
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/generators.py b/tools/generators.py
>> new file mode 100644
>> index 0000000..af566fc
>> --- /dev/null
>> +++ b/tools/generators.py
>> @@ -0,0 +1,74 @@
>> +#!/usr/bin/env/python3
>> +# -*- coding: utf-8 -*-
>> +# Licensed to the Apache Software Foundation (ASF) under one or more
>> +# contributor license agreements. See the NOTICE file distributed with
>> +# this work for additional information regarding copyright ownership.
>> +# The ASF licenses this file to You under the Apache License, Version 2.0
>> +# (the "License"); you may not use this file except in compliance with
>> +# the License. You may obtain a copy of the License at
>> +#
>> +# http://www.apache.org/licenses/LICENSE-2.0
>> +#
>> +# Unless required by applicable law or agreed to in writing, software
>> +# distributed under the License is distributed on an "AS IS" BASIS,
>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> +# See the License for the specific language governing permissions and
>> +# limitations under the License.
>> +
>> +"""
>> +This file contains the various ID generators for Pony Mail's archivers.
>> +"""
>> +
>> +import hashlib
>> +import email.utils
>> +
>> +# Full generator: uses the entire email (including sever-depenent data)
>> +# This is the recommended generator for single-node setups.
>> +def full(msg, body, lid, attachments):
>> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>> + return mid
>> +
>> +# Medium: Standard generator
>> +def medium(msg, body, lid, attachments):
>> + # Use text body
>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> + # Use List ID
>> + xbody += bytes(lid, encoding='ascii')
>> + # Use Date header
>> + xbody += bytes(mdatestring, encoding='ascii')
>
> mdatestring is not defined
>
>> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> + return mid
>> +
>> +# Redundant: Use data that is guaranteed to be the same across redundant setups
>> +# This is the recommended generator for redundant cluster setups
>> +def redundant(msg, body, lid, attachments):
>> + # Use text body
>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> + # Use List ID
>> + xbody += bytes(lid, encoding='ascii')
>> + # Use Date header
>> + xbody += bytes(mdatestring, encoding='ascii')
>
> mdatestring is not defined
>
>> + # Use sender
>> + sender = msg.get('from', None)
>> + if sender:
>> + xbody += bytes(sender, encoding = 'ascii')
>> + # Use subject
>> + if subject:
>> + xbody += bytes(subject, encoding = 'ascii')
>> + # Use attachment hashes if present
>> + if attachments:
>> + for a in attachments:
>> + xbody += bytes(a['hash'], encoding = 'ascii')
>> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> + return mid
>> +
>> +
>> +# Old school way of making IDs
>> +def legacy(msg, body, lid, attachments):
>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
>> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>> + return mid
>> +
>> +
>> +
>
> Have the generators been tested?
>
Re: [2/4] incubator-ponymail git commit: split generators into a file
of its own
Posted by sebb <se...@gmail.com>.
On 4 June 2017 at 15:02, <hu...@apache.org> wrote:
> split generators into a file of its own
>
> Also fix up generators:
> - medium goes back to the way it was
> - a new 'redundant' generator for cluster setups
>
>
> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
> Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>
> Branch: refs/heads/master
> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
> Parents: 8b7ede8
> Author: Daniel Gruno <hu...@apache.org>
> Authored: Sun Jun 4 15:45:18 2017 +0200
> Committer: Daniel Gruno <hu...@apache.org>
> Committed: Sun Jun 4 15:45:18 2017 +0200
>
> ----------------------------------------------------------------------
> tools/archiver.py | 17 ++++-------
> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 80 insertions(+), 11 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
> ----------------------------------------------------------------------
> diff --git a/tools/archiver.py b/tools/archiver.py
> index 41933f7..0966b13 100755
> --- a/tools/archiver.py
> +++ b/tools/archiver.py
> @@ -58,6 +58,7 @@ import io
> import logging
> import traceback
> import sys
> +import generators
>
> # Fetch config
> path = os.path.dirname(os.path.realpath(__file__))
> @@ -316,20 +317,14 @@ class Archiver(object):
> if body is not None or attachments:
> pmid = mid
> try:
> - # Use full message as bytes for mid?
> if archiver_generator == "full":
> - mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
> + mid = generators.full(msg, body, lid, attachments)
> elif archiver_generator == "medium":
> - xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> - xbody += bytes(lid, encoding='ascii')
> - xbody += bytes(mdatestring, encoding='ascii')
> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> - if attachments:
> - for a in attachments:
> - xbody += bytes(a['hash'], encoding = 'ascii')
> + mid = generators.medium(msg, body, lid, attachments)
> + elif archiver_generator == "redundant":
> + mid = generators.redundant(msg, body, lid, attachments)
> else:
> - # Or revert to the old way?
> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
> + mid = generators.legacy(msg, body, lid, attachments)
> except Exception as err:
> if logger:
> logger.warn("Could not generate MID: %s" % err)
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
> ----------------------------------------------------------------------
> diff --git a/tools/generators.py b/tools/generators.py
> new file mode 100644
> index 0000000..af566fc
> --- /dev/null
> +++ b/tools/generators.py
> @@ -0,0 +1,74 @@
> +#!/usr/bin/env/python3
> +# -*- coding: utf-8 -*-
> +# Licensed to the Apache Software Foundation (ASF) under one or more
> +# contributor license agreements. See the NOTICE file distributed with
> +# this work for additional information regarding copyright ownership.
> +# The ASF licenses this file to You under the Apache License, Version 2.0
> +# (the "License"); you may not use this file except in compliance with
> +# the License. You may obtain a copy of the License at
> +#
> +# http://www.apache.org/licenses/LICENSE-2.0
> +#
> +# Unless required by applicable law or agreed to in writing, software
> +# distributed under the License is distributed on an "AS IS" BASIS,
> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> +# See the License for the specific language governing permissions and
> +# limitations under the License.
> +
> +"""
> +This file contains the various ID generators for Pony Mail's archivers.
> +"""
> +
> +import hashlib
> +import email.utils
> +
> +# Full generator: uses the entire email (including sever-depenent data)
> +# This is the recommended generator for single-node setups.
> +def full(msg, body, lid, attachments):
> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
> + return mid
> +
> +# Medium: Standard generator
> +def medium(msg, body, lid, attachments):
> + # Use text body
> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> + # Use List ID
> + xbody += bytes(lid, encoding='ascii')
> + # Use Date header
> + xbody += bytes(mdatestring, encoding='ascii')
mdatestring is not defined
> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> + return mid
> +
> +# Redundant: Use data that is guaranteed to be the same across redundant setups
> +# This is the recommended generator for redundant cluster setups
> +def redundant(msg, body, lid, attachments):
> + # Use text body
> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> + # Use List ID
> + xbody += bytes(lid, encoding='ascii')
> + # Use Date header
> + xbody += bytes(mdatestring, encoding='ascii')
mdatestring is not defined
> + # Use sender
> + sender = msg.get('from', None)
> + if sender:
> + xbody += bytes(sender, encoding = 'ascii')
> + # Use subject
> + if subject:
> + xbody += bytes(subject, encoding = 'ascii')
> + # Use attachment hashes if present
> + if attachments:
> + for a in attachments:
> + xbody += bytes(a['hash'], encoding = 'ascii')
> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> + return mid
> +
> +
> +# Old school way of making IDs
> +def legacy(msg, body, lid, attachments):
> + mdate = email.utils.parsedate_tz(msg.get('date'))
> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
> + return mid
> +
> +
> +
Have the generators been tested?
[2/4] incubator-ponymail git commit: split generators into a file of
its own
Posted by hu...@apache.org.
split generators into a file of its own
Also fix up generators:
- medium goes back to the way it was
- a new 'redundant' generator for cluster setups
Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
Branch: refs/heads/master
Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
Parents: 8b7ede8
Author: Daniel Gruno <hu...@apache.org>
Authored: Sun Jun 4 15:45:18 2017 +0200
Committer: Daniel Gruno <hu...@apache.org>
Committed: Sun Jun 4 15:45:18 2017 +0200
----------------------------------------------------------------------
tools/archiver.py | 17 ++++-------
tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 80 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
----------------------------------------------------------------------
diff --git a/tools/archiver.py b/tools/archiver.py
index 41933f7..0966b13 100755
--- a/tools/archiver.py
+++ b/tools/archiver.py
@@ -58,6 +58,7 @@ import io
import logging
import traceback
import sys
+import generators
# Fetch config
path = os.path.dirname(os.path.realpath(__file__))
@@ -316,20 +317,14 @@ class Archiver(object):
if body is not None or attachments:
pmid = mid
try:
- # Use full message as bytes for mid?
if archiver_generator == "full":
- mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
+ mid = generators.full(msg, body, lid, attachments)
elif archiver_generator == "medium":
- xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
- xbody += bytes(lid, encoding='ascii')
- xbody += bytes(mdatestring, encoding='ascii')
- mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
- if attachments:
- for a in attachments:
- xbody += bytes(a['hash'], encoding = 'ascii')
+ mid = generators.medium(msg, body, lid, attachments)
+ elif archiver_generator == "redundant":
+ mid = generators.redundant(msg, body, lid, attachments)
else:
- # Or revert to the old way?
- mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
+ mid = generators.legacy(msg, body, lid, attachments)
except Exception as err:
if logger:
logger.warn("Could not generate MID: %s" % err)
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
----------------------------------------------------------------------
diff --git a/tools/generators.py b/tools/generators.py
new file mode 100644
index 0000000..af566fc
--- /dev/null
+++ b/tools/generators.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env/python3
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This file contains the various ID generators for Pony Mail's archivers.
+"""
+
+import hashlib
+import email.utils
+
+# Full generator: uses the entire email (including sever-depenent data)
+# This is the recommended generator for single-node setups.
+def full(msg, body, lid, attachments):
+ mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
+ return mid
+
+# Medium: Standard generator
+def medium(msg, body, lid, attachments):
+ # Use text body
+ xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
+ # Use List ID
+ xbody += bytes(lid, encoding='ascii')
+ # Use Date header
+ xbody += bytes(mdatestring, encoding='ascii')
+ mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
+ return mid
+
+# Redundant: Use data that is guaranteed to be the same across redundant setups
+# This is the recommended generator for redundant cluster setups
+def redundant(msg, body, lid, attachments):
+ # Use text body
+ xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
+ # Use List ID
+ xbody += bytes(lid, encoding='ascii')
+ # Use Date header
+ xbody += bytes(mdatestring, encoding='ascii')
+ # Use sender
+ sender = msg.get('from', None)
+ if sender:
+ xbody += bytes(sender, encoding = 'ascii')
+ # Use subject
+ if subject:
+ xbody += bytes(subject, encoding = 'ascii')
+ # Use attachment hashes if present
+ if attachments:
+ for a in attachments:
+ xbody += bytes(a['hash'], encoding = 'ascii')
+ mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
+ return mid
+
+
+# Old school way of making IDs
+def legacy(msg, body, lid, attachments):
+ mdate = email.utils.parsedate_tz(msg.get('date'))
+ uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is valid
+ mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
+ return mid
+
+
+
[4/4] incubator-ponymail git commit: update chglog
Posted by hu...@apache.org.
update chglog
Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/c58b2312
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/c58b2312
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/c58b2312
Branch: refs/heads/master
Commit: c58b23127db123916ce169b6ee12990a71c6d66e
Parents: 23966d8
Author: Daniel Gruno <hu...@apache.org>
Authored: Sun Jun 4 15:59:49 2017 +0200
Committer: Daniel Gruno <hu...@apache.org>
Committed: Sun Jun 4 15:59:49 2017 +0200
----------------------------------------------------------------------
CHANGELOG.md | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/c58b2312/CHANGELOG.md
----------------------------------------------------------------------
diff --git a/CHANGELOG.md b/CHANGELOG.md
index be595de..bbc497f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
## CHANGES in 0.10:
+- ID generators have now been split into a separate library (generators.py)
- more comprehensive ID generation mechanisms
- private messages are now included in downloads if the user has access to them (#169, #108)
- mbox export now generates valid From_ line (#190)