You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2021/10/17 20:40:23 UTC

[incubator-ponymail-foal] branch master updated (5e85a1e -> 356d1d3)

This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git.


    from 5e85a1e  exclude body, use shortened body only, for stats
     new 265c712  make constant
     new 356d1d3  make constant

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 tools/archiver.py | 5 ++++-
 tools/migrate.py  | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

[incubator-ponymail-foal] 01/02: make constant

Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git

commit 265c712c94a6cd411a90b7a1f72d5def44ac8565
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Sun Oct 17 22:39:00 2021 +0200

    make constant
---
 tools/archiver.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/archiver.py b/tools/archiver.py
index dc12e39..f14d7f6 100755
--- a/tools/archiver.py
+++ b/tools/archiver.py
@@ -70,6 +70,9 @@ else:
 # This is what we will default to if we are presented with emails without character sets and US-ASCII doesn't work.
 DEFAULT_CHARACTER_SET = 'utf-8'
 
+# Standard "short body" max length for email aggregations
+SHORT_BODY_MAX_LEN = 200
+
 # Fetch config from same dir as archiver.py
 config = ponymailconfig.PonymailConfig()
 
@@ -585,7 +588,7 @@ class Archiver(object):  # N.B. Also used by import-mbox.py
 
             notes.append(["ARCHIVE: Email archived as %s at %u" % (document_id, time.time())])
             body_unflowed = body.unflow() if body else ""
-            body_shortened = body_unflowed[:210]  # 210 so that we can tell if > 200.
+            body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+10]  # +10 so that we can tell if larger than std short body.
 
             output_json = {
                 "from_raw": msg_metadata["from"],

[incubator-ponymail-foal] 02/02: make constant

Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git

commit 356d1d320a245293d269935b6fbfc3eafc18f004
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Sun Oct 17 22:40:14 2021 +0200

    make constant
---
 tools/migrate.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/migrate.py b/tools/migrate.py
index e288330..8c431c3 100644
--- a/tools/migrate.py
+++ b/tools/migrate.py
@@ -40,6 +40,9 @@ import sys
 # Increment this number whenever breaking changes happen in the migration workflow:
 MIGRATION_MAGIC_NUMBER = "2"
 
+# Standard "short body" max length for email aggregations
+SHORT_BODY_MAX_LEN = 200
+
 # Max number of parallel conversions to perform before pushing. 75-ish percent of max cores.
 cores = len(os.sched_getaffinity(0))
 MAX_PARALLEL_OPS = max(min(int((cores + 1) * 0.75), cores - 1), 1)
@@ -199,7 +202,7 @@ def process_document(old_es, doc, old_dbname, dbname_source, dbname_mbox, do_dki
     doc["_source"]["dbid"] = hashlib.sha3_256(source_text).hexdigest()
 
     # Add in shortened body for search aggs
-    doc["_source"]["body_short"] = doc["_source"]["body"][:210]
+    doc["_source"]["body_short"] = doc["_source"]["body"][:SHORT_BODY_MAX_LEN+10]
 
     # Add in gravatar
     header_from = doc["_source"]["from"]