You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2020/09/06 07:09:32 UTC

[incubator-ponymail-foal] branch master updated (2885bfb -> d653fc5)

This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git.


    from 2885bfb  Move older gen generators into their own file
     new 5ed9549  Add a new field, specifying whether HTML from an HTML-only email was saved without conversion to text
     new d653fc5  Allow storing of html-only emails even if html2text is not enabled

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 tools/archiver.py   | 21 +++++++++++++++++----
 tools/mappings.yaml |  2 ++
 2 files changed, 19 insertions(+), 4 deletions(-)


[incubator-ponymail-foal] 01/02: Add a new field, specifying whether HTML from an HTML-only email was saved without conversion to text

Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git

commit 5ed95497d73c0531fe206b9f942c3338106255a9
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Sun Sep 6 09:05:04 2020 +0200

    Add a new field, specifying whether HTML from an HTML-only email was saved without conversion to text
---
 tools/mappings.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/mappings.yaml b/tools/mappings.yaml
index 6743a07..bd2b7b5 100644
--- a/tools/mappings.yaml
+++ b/tools/mappings.yaml
@@ -68,6 +68,8 @@ mbox:
       type: text
     from_raw:
       type: keyword
+    html_as_source:
+      type: boolean
     in-reply-to:
       type: keyword
     list:


[incubator-ponymail-foal] 02/02: Allow storing of html-only emails even if html2text is not enabled

Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git

commit d653fc5bd65d81e752507ca93b07429ec7185791
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Sun Sep 6 09:06:11 2020 +0200

    Allow storing of html-only emails even if html2text is not enabled
    
    This means storing the raw html source and setting a flag for the unit
    test so they won't break in these cases where foal and pony differ.
---
 tools/archiver.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/tools/archiver.py b/tools/archiver.py
index c52207b..9222a66 100755
--- a/tools/archiver.py
+++ b/tools/archiver.py
@@ -181,6 +181,7 @@ class Body:
         self.string: typing.Optional[str] = None
         self.flowed = "format=flowed" in part.get("content-type", "")
         self.bytes = part.get_payload(decode=True)
+        self.html_as_source = False
         if self.bytes is not None:
             valid_encodings = [x for x in self.charsets if x]
             if valid_encodings:
@@ -312,8 +313,7 @@ class Archiver(object):  # N.B. Also used by import-mbox.py
                 ]:
                     body = Body(part)
                 elif (
-                    self.html
-                    and not first_html
+                    not first_html
                     and part.get_content_type() == "text/html"
                 ):
                     first_html = Body(part)
@@ -327,7 +327,12 @@ class Archiver(object):  # N.B. Also used by import-mbox.py
             or (self.ignore_body and str(body).find(str(self.ignore_body)) != -1)
         ):
             body = first_html
-            body.assign(self.html2text(str(body)))
+            body.html_as_source = True
+
+            # Convert HTML to text if mod is installed and enabled, otherwise keep the source as-is
+            if self.html:
+                body.assign(self.html2text(str(body)))
+                body.html_as_source = False
         return body
 
     # N.B. this is also called by import-mbox.py
@@ -415,13 +420,20 @@ class Archiver(object):  # N.B. Also used by import-mbox.py
         if body is not None or attachments:
             pmid = mid
             id_set = set()  # Use a set to avoid duplicates
+            # The body used for generators differ from the body put into the meta doc,
+            # for historical reasons. In the older generators where it is actively used,
+            # it would be UTF-8 bytes in cases of charset-less message bodies. It would
+            # also be nothing in case of html-only emails where html2text is not enabled.
+            generator_body = body if body and body.character_set else body and body.bytes or ""
+            if body.html_as_source:
+                generator_body = ""
             for generator in self.generator.split(" "):
                 if generator:
                     try:
                         mid = plugins.generators.generate(
                             generator,
                             msg,
-                            body if body and body.character_set else body and body.bytes or "",
+                            generator_body,
                             lid,
                             attachments,
                             raw_msg,
@@ -469,6 +481,7 @@ class Archiver(object):  # N.B. Also used by import-mbox.py
                 "references": msg_metadata["references"],
                 "in-reply-to": irt,
                 "body": body.unflow() if body else "",
+                "html_source_only": body.html_as_source,
                 "attachments": attachments,
             }