You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2020/08/24 10:28:35 UTC
[incubator-ponymail-foal] branch master updated: Be more strict in
decoding
This is an automated email from the ASF dual-hosted git repository.
humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
The following commit(s) were added to refs/heads/master by this push:
new 8f92ba0 Be more strict in decoding
8f92ba0 is described below
commit 8f92ba0accb413fe13fbd9792d9f6b7d276890d8
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Mon Aug 24 12:28:22 2020 +0200
Be more strict in decoding
- Default should be US-ASCII, following RFC822 standards
- Only look in immediate message part or its parent for character sets
(don't look in other parts that aren't directly related)
---
tools/archiver.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/tools/archiver.py b/tools/archiver.py
index bb57f00..1caf72b 100755
--- a/tools/archiver.py
+++ b/tools/archiver.py
@@ -167,10 +167,10 @@ def message_attachments(msg: email.message.Message) -> typing.Tuple[list, dict]:
class Body:
def __init__(self, part: email.message.Message):
self.content_type = part.get_content_type()
- self.charsets = set([part.get_charset()]) # Part's charset
- self.charsets.update(part.get_charsets()) # Parent charsets as fallback
- self.character_set = "utf-8"
- self.string = None
+ self.charsets = set([part.get_content_charset()]) # Part's charset
+ self.charsets.update([part.get_charsets()[0]]) # Parent charset as fallback if any/different
+ self.character_set = "us-ascii"
+ self.string: typing.Optional[str] = None
self.flowed = "format=flowed" in part.get("content-type", "")
contents = part.get_payload(decode=True)
if contents is not None:
@@ -182,7 +182,7 @@ class Body:
except UnicodeDecodeError:
pass
if not self.string:
- self.string = contents.decode("utf-8", errors="replace")
+ self.string = contents.decode("us-ascii", errors="replace")
def __repr__(self):
return self.string