You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2020/08/24 10:28:35 UTC

[incubator-ponymail-foal] branch master updated: Be more strict in decoding

This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git


The following commit(s) were added to refs/heads/master by this push:
     new 8f92ba0  Be more strict in decoding
8f92ba0 is described below

commit 8f92ba0accb413fe13fbd9792d9f6b7d276890d8
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Mon Aug 24 12:28:22 2020 +0200

    Be more strict in decoding
    
    - Default should be US-ASCII, following RFC822 standards
    - Only look in immediate message part or its parent for character sets
    (don't look in other parts that aren't directly related)
---
 tools/archiver.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/archiver.py b/tools/archiver.py
index bb57f00..1caf72b 100755
--- a/tools/archiver.py
+++ b/tools/archiver.py
@@ -167,10 +167,10 @@ def message_attachments(msg: email.message.Message) -> typing.Tuple[list, dict]:
 class Body:
     def __init__(self, part: email.message.Message):
         self.content_type = part.get_content_type()
-        self.charsets = set([part.get_charset()])  # Part's charset
-        self.charsets.update(part.get_charsets())  # Parent charsets as fallback
-        self.character_set = "utf-8"
-        self.string = None
+        self.charsets = set([part.get_content_charset()])  # Part's charset
+        self.charsets.update([part.get_charsets()[0]])  # Parent charset as fallback if any/different
+        self.character_set = "us-ascii"
+        self.string: typing.Optional[str] = None
         self.flowed = "format=flowed" in part.get("content-type", "")
         contents = part.get_payload(decode=True)
         if contents is not None:
@@ -182,7 +182,7 @@ class Body:
                     except UnicodeDecodeError:
                         pass
             if not self.string:
-                self.string = contents.decode("utf-8", errors="replace")
+                self.string = contents.decode("us-ascii", errors="replace")
 
     def __repr__(self):
         return self.string