You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2020/09/06 07:09:34 UTC
[incubator-ponymail-foal] 02/02: Allow storing of html-only emails
even if html2text is not enabled
This is an automated email from the ASF dual-hosted git repository.
humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
commit d653fc5bd65d81e752507ca93b07429ec7185791
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Sun Sep 6 09:06:11 2020 +0200
Allow storing of html-only emails even if html2text is not enabled
This means storing the raw html source and setting a flag for the unit
test so they won't break in these cases where foal and pony differ.
---
tools/archiver.py | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/tools/archiver.py b/tools/archiver.py
index c52207b..9222a66 100755
--- a/tools/archiver.py
+++ b/tools/archiver.py
@@ -181,6 +181,7 @@ class Body:
self.string: typing.Optional[str] = None
self.flowed = "format=flowed" in part.get("content-type", "")
self.bytes = part.get_payload(decode=True)
+ self.html_as_source = False
if self.bytes is not None:
valid_encodings = [x for x in self.charsets if x]
if valid_encodings:
@@ -312,8 +313,7 @@ class Archiver(object): # N.B. Also used by import-mbox.py
]:
body = Body(part)
elif (
- self.html
- and not first_html
+ not first_html
and part.get_content_type() == "text/html"
):
first_html = Body(part)
@@ -327,7 +327,12 @@ class Archiver(object): # N.B. Also used by import-mbox.py
or (self.ignore_body and str(body).find(str(self.ignore_body)) != -1)
):
body = first_html
- body.assign(self.html2text(str(body)))
+ body.html_as_source = True
+
+ # Convert HTML to text if mod is installed and enabled, otherwise keep the source as-is
+ if self.html:
+ body.assign(self.html2text(str(body)))
+ body.html_as_source = False
return body
# N.B. this is also called by import-mbox.py
@@ -415,13 +420,20 @@ class Archiver(object): # N.B. Also used by import-mbox.py
if body is not None or attachments:
pmid = mid
id_set = set() # Use a set to avoid duplicates
+ # The body used for generators differ from the body put into the meta doc,
+ # for historical reasons. In the older generators where it is actively used,
+ # it would be UTF-8 bytes in cases of charset-less message bodies. It would
+ # also be nothing in case of html-only emails where html2text is not enabled.
+ generator_body = body if body and body.character_set else body and body.bytes or ""
+ if body.html_as_source:
+ generator_body = ""
for generator in self.generator.split(" "):
if generator:
try:
mid = plugins.generators.generate(
generator,
msg,
- body if body and body.character_set else body and body.bytes or "",
+ generator_body,
lid,
attachments,
raw_msg,
@@ -469,6 +481,7 @@ class Archiver(object): # N.B. Also used by import-mbox.py
"references": msg_metadata["references"],
"in-reply-to": irt,
"body": body.unflow() if body else "",
+ "html_source_only": body.html_as_source,
"attachments": attachments,
}