You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2021/10/17 22:19:49 UTC

[incubator-ponymail-foal] branch master updated: +1 will suffice

This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git


The following commit(s) were added to refs/heads/master by this push:
     new d843003  +1 will suffice
d843003 is described below

commit d8430036d92e8a89c693277a7f5c5c4c262f352c
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Mon Oct 18 00:19:40 2021 +0200

    +1 will suffice
---
 tools/archiver.py | 2 +-
 tools/migrate.py  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/archiver.py b/tools/archiver.py
index f14d7f6..93a29a8 100755
--- a/tools/archiver.py
+++ b/tools/archiver.py
@@ -588,7 +588,7 @@ class Archiver(object):  # N.B. Also used by import-mbox.py
 
             notes.append(["ARCHIVE: Email archived as %s at %u" % (document_id, time.time())])
             body_unflowed = body.unflow() if body else ""
-            body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+10]  # +10 so that we can tell if larger than std short body.
+            body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+1]  # +1 so that we can tell if larger than std short body.
 
             output_json = {
                 "from_raw": msg_metadata["from"],
diff --git a/tools/migrate.py b/tools/migrate.py
index 2493465..c46b8ef 100644
--- a/tools/migrate.py
+++ b/tools/migrate.py
@@ -201,7 +201,8 @@ def process_document(old_es, doc, old_dbname, dbname_source, dbname_mbox, do_dki
     doc["_source"]["dbid"] = hashlib.sha3_256(source_text).hexdigest()
 
     # Add in shortened body for search aggs
-    doc["_source"]["body_short"] = doc["_source"]["body"][:archiver.SHORT_BODY_MAX_LEN+10]
+    # We add +1 to know whether to use ellipsis in reports.
+    doc["_source"]["body_short"] = doc["_source"]["body"][:archiver.SHORT_BODY_MAX_LEN+1]
 
     # Add in gravatar
     header_from = doc["_source"]["from"]

Re: [incubator-ponymail-foal] branch master updated: +1 will suffice

Posted by sebb <se...@gmail.com>.
On Sun, 17 Oct 2021 at 23:29, Daniel Gruno <hu...@apache.org> wrote:
>
> On 18/10/2021 00.25, sebb wrote:
> > On Sun, 17 Oct 2021 at 23:19, <hu...@apache.org> wrote:
> >>
> >> This is an automated email from the ASF dual-hosted git repository.
> >>
> >> humbedooh pushed a commit to branch master
> >> in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
> >>
> >>
> >> The following commit(s) were added to refs/heads/master by this push:
> >>       new d843003  +1 will suffice
> >> d843003 is described below
> >>
> >> commit d8430036d92e8a89c693277a7f5c5c4c262f352c
> >> Author: Daniel Gruno <hu...@apache.org>
> >> AuthorDate: Mon Oct 18 00:19:40 2021 +0200
> >>
> >>      +1 will suffice
> >> ---
> >>   tools/archiver.py | 2 +-
> >>   tools/migrate.py  | 3 ++-
> >>   2 files changed, 3 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/tools/archiver.py b/tools/archiver.py
> >> index f14d7f6..93a29a8 100755
> >> --- a/tools/archiver.py
> >> +++ b/tools/archiver.py
> >> @@ -588,7 +588,7 @@ class Archiver(object):  # N.B. Also used by import-mbox.py
> >>
> >>               notes.append(["ARCHIVE: Email archived as %s at %u" % (document_id, time.time())])
> >>               body_unflowed = body.unflow() if body else ""
> >> -            body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+10]  # +10 so that we can tell if larger than std short body.
> >> +            body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+1]  # +1 so that we can tell if larger than std short body.
> >>
> >>               output_json = {
> >>                   "from_raw": msg_metadata["from"],
> >> diff --git a/tools/migrate.py b/tools/migrate.py
> >> index 2493465..c46b8ef 100644
> >> --- a/tools/migrate.py
> >> +++ b/tools/migrate.py
> >> @@ -201,7 +201,8 @@ def process_document(old_es, doc, old_dbname, dbname_source, dbname_mbox, do_dki
> >>       doc["_source"]["dbid"] = hashlib.sha3_256(source_text).hexdigest()
> >>
> >>       # Add in shortened body for search aggs
> >> -    doc["_source"]["body_short"] = doc["_source"]["body"][:archiver.SHORT_BODY_MAX_LEN+10]
> >> +    # We add +1 to know whether to use ellipsis in reports.
> >> +    doc["_source"]["body_short"] = doc["_source"]["body"][:archiver.SHORT_BODY_MAX_LEN+1]
> >
> > Why +1 here?
>
> Cosmetic reasons, we need to know whether to add '...' to the body when
> potentially shortening it. If we cap at 200, we won't know if it's >200,
> so we cap at 200+1. Alternatively, we would add a new field that had
> either a bool (shortened or not) or add a body_length field that we
> could work with, but that seems a tad overkill. HTH

Yes, I know.
But a person reading the code later might not, as this is not documented.

> >
> >>       # Add in gravatar
> >>       header_from = doc["_source"]["from"]
>

Re: [incubator-ponymail-foal] branch master updated: +1 will suffice

Posted by Daniel Gruno <hu...@apache.org>.
On 18/10/2021 00.25, sebb wrote:
> On Sun, 17 Oct 2021 at 23:19, <hu...@apache.org> wrote:
>>
>> This is an automated email from the ASF dual-hosted git repository.
>>
>> humbedooh pushed a commit to branch master
>> in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
>>
>>
>> The following commit(s) were added to refs/heads/master by this push:
>>       new d843003  +1 will suffice
>> d843003 is described below
>>
>> commit d8430036d92e8a89c693277a7f5c5c4c262f352c
>> Author: Daniel Gruno <hu...@apache.org>
>> AuthorDate: Mon Oct 18 00:19:40 2021 +0200
>>
>>      +1 will suffice
>> ---
>>   tools/archiver.py | 2 +-
>>   tools/migrate.py  | 3 ++-
>>   2 files changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/archiver.py b/tools/archiver.py
>> index f14d7f6..93a29a8 100755
>> --- a/tools/archiver.py
>> +++ b/tools/archiver.py
>> @@ -588,7 +588,7 @@ class Archiver(object):  # N.B. Also used by import-mbox.py
>>
>>               notes.append(["ARCHIVE: Email archived as %s at %u" % (document_id, time.time())])
>>               body_unflowed = body.unflow() if body else ""
>> -            body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+10]  # +10 so that we can tell if larger than std short body.
>> +            body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+1]  # +1 so that we can tell if larger than std short body.
>>
>>               output_json = {
>>                   "from_raw": msg_metadata["from"],
>> diff --git a/tools/migrate.py b/tools/migrate.py
>> index 2493465..c46b8ef 100644
>> --- a/tools/migrate.py
>> +++ b/tools/migrate.py
>> @@ -201,7 +201,8 @@ def process_document(old_es, doc, old_dbname, dbname_source, dbname_mbox, do_dki
>>       doc["_source"]["dbid"] = hashlib.sha3_256(source_text).hexdigest()
>>
>>       # Add in shortened body for search aggs
>> -    doc["_source"]["body_short"] = doc["_source"]["body"][:archiver.SHORT_BODY_MAX_LEN+10]
>> +    # We add +1 to know whether to use ellipsis in reports.
>> +    doc["_source"]["body_short"] = doc["_source"]["body"][:archiver.SHORT_BODY_MAX_LEN+1]
> 
> Why +1 here?

Cosmetic reasons, we need to know whether to add '...' to the body when 
potentially shortening it. If we cap at 200, we won't know if it's >200, 
so we cap at 200+1. Alternatively, we would add a new field that had 
either a bool (shortened or not) or add a body_length field that we 
could work with, but that seems a tad overkill. HTH

> 
>>       # Add in gravatar
>>       header_from = doc["_source"]["from"]


Re: [incubator-ponymail-foal] branch master updated: +1 will suffice

Posted by sebb <se...@gmail.com>.
On Sun, 17 Oct 2021 at 23:19, <hu...@apache.org> wrote:
>
> This is an automated email from the ASF dual-hosted git repository.
>
> humbedooh pushed a commit to branch master
> in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
>
>
> The following commit(s) were added to refs/heads/master by this push:
>      new d843003  +1 will suffice
> d843003 is described below
>
> commit d8430036d92e8a89c693277a7f5c5c4c262f352c
> Author: Daniel Gruno <hu...@apache.org>
> AuthorDate: Mon Oct 18 00:19:40 2021 +0200
>
>     +1 will suffice
> ---
>  tools/archiver.py | 2 +-
>  tools/migrate.py  | 3 ++-
>  2 files changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/tools/archiver.py b/tools/archiver.py
> index f14d7f6..93a29a8 100755
> --- a/tools/archiver.py
> +++ b/tools/archiver.py
> @@ -588,7 +588,7 @@ class Archiver(object):  # N.B. Also used by import-mbox.py
>
>              notes.append(["ARCHIVE: Email archived as %s at %u" % (document_id, time.time())])
>              body_unflowed = body.unflow() if body else ""
> -            body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+10]  # +10 so that we can tell if larger than std short body.
> +            body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+1]  # +1 so that we can tell if larger than std short body.
>
>              output_json = {
>                  "from_raw": msg_metadata["from"],
> diff --git a/tools/migrate.py b/tools/migrate.py
> index 2493465..c46b8ef 100644
> --- a/tools/migrate.py
> +++ b/tools/migrate.py
> @@ -201,7 +201,8 @@ def process_document(old_es, doc, old_dbname, dbname_source, dbname_mbox, do_dki
>      doc["_source"]["dbid"] = hashlib.sha3_256(source_text).hexdigest()
>
>      # Add in shortened body for search aggs
> -    doc["_source"]["body_short"] = doc["_source"]["body"][:archiver.SHORT_BODY_MAX_LEN+10]
> +    # We add +1 to know whether to use ellipsis in reports.
> +    doc["_source"]["body_short"] = doc["_source"]["body"][:archiver.SHORT_BODY_MAX_LEN+1]

Why +1 here?

>      # Add in gravatar
>      header_from = doc["_source"]["from"]