You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2020/09/06 07:09:32 UTC
[incubator-ponymail-foal] branch master updated (2885bfb -> d653fc5)
This is an automated email from the ASF dual-hosted git repository.
humbedooh pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git.
from 2885bfb Move older gen generators into their own file
new 5ed9549 Add a new field, specifying whether HTML from an HTML-only email was saved without conversion to text
new d653fc5 Allow storing of html-only emails even if html2text is not enabled
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
tools/archiver.py | 21 +++++++++++++++++----
tools/mappings.yaml | 2 ++
2 files changed, 19 insertions(+), 4 deletions(-)
[incubator-ponymail-foal] 01/02: Add a new field,
specifying whether HTML from an HTML-only email was saved without
conversion to text
Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
commit 5ed95497d73c0531fe206b9f942c3338106255a9
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Sun Sep 6 09:05:04 2020 +0200
Add a new field, specifying whether HTML from an HTML-only email was saved without conversion to text
---
tools/mappings.yaml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tools/mappings.yaml b/tools/mappings.yaml
index 6743a07..bd2b7b5 100644
--- a/tools/mappings.yaml
+++ b/tools/mappings.yaml
@@ -68,6 +68,8 @@ mbox:
type: text
from_raw:
type: keyword
+ html_as_source:
+ type: boolean
in-reply-to:
type: keyword
list:
[incubator-ponymail-foal] 02/02: Allow storing of html-only emails
even if html2text is not enabled
Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
commit d653fc5bd65d81e752507ca93b07429ec7185791
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Sun Sep 6 09:06:11 2020 +0200
Allow storing of html-only emails even if html2text is not enabled
This means storing the raw html source and setting a flag for the unit
test so they won't break in these cases where foal and pony differ.
---
tools/archiver.py | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/tools/archiver.py b/tools/archiver.py
index c52207b..9222a66 100755
--- a/tools/archiver.py
+++ b/tools/archiver.py
@@ -181,6 +181,7 @@ class Body:
self.string: typing.Optional[str] = None
self.flowed = "format=flowed" in part.get("content-type", "")
self.bytes = part.get_payload(decode=True)
+ self.html_as_source = False
if self.bytes is not None:
valid_encodings = [x for x in self.charsets if x]
if valid_encodings:
@@ -312,8 +313,7 @@ class Archiver(object): # N.B. Also used by import-mbox.py
]:
body = Body(part)
elif (
- self.html
- and not first_html
+ not first_html
and part.get_content_type() == "text/html"
):
first_html = Body(part)
@@ -327,7 +327,12 @@ class Archiver(object): # N.B. Also used by import-mbox.py
or (self.ignore_body and str(body).find(str(self.ignore_body)) != -1)
):
body = first_html
- body.assign(self.html2text(str(body)))
+ body.html_as_source = True
+
+ # Convert HTML to text if mod is installed and enabled, otherwise keep the source as-is
+ if self.html:
+ body.assign(self.html2text(str(body)))
+ body.html_as_source = False
return body
# N.B. this is also called by import-mbox.py
@@ -415,13 +420,20 @@ class Archiver(object): # N.B. Also used by import-mbox.py
if body is not None or attachments:
pmid = mid
id_set = set() # Use a set to avoid duplicates
+ # The body used for generators differ from the body put into the meta doc,
+ # for historical reasons. In the older generators where it is actively used,
+ # it would be UTF-8 bytes in cases of charset-less message bodies. It would
+ # also be nothing in case of html-only emails where html2text is not enabled.
+ generator_body = body if body and body.character_set else body and body.bytes or ""
+ if body.html_as_source:
+ generator_body = ""
for generator in self.generator.split(" "):
if generator:
try:
mid = plugins.generators.generate(
generator,
msg,
- body if body and body.character_set else body and body.bytes or "",
+ generator_body,
lid,
attachments,
raw_msg,
@@ -469,6 +481,7 @@ class Archiver(object): # N.B. Also used by import-mbox.py
"references": msg_metadata["references"],
"in-reply-to": irt,
"body": body.unflow() if body else "",
+ "html_source_only": body.html_as_source,
"attachments": attachments,
}