You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ponymail.apache.org by GitBox <gi...@apache.org> on 2020/08/28 15:11:45 UTC

[GitHub] [incubator-ponymail] sebbASF commented on a change in pull request #517: Add DKIM style ID generation

sebbASF commented on a change in pull request #517:
URL: https://github.com/apache/incubator-ponymail/pull/517#discussion_r479368516



##########
File path: tools/generators.py
##########
@@ -19,14 +19,137 @@
 This file contains the various ID generators for Pony Mail's archivers.
 """
 
+import base64
 import hashlib
 import email.utils
 import time
 import re
 
+# For optional nonce
+config = None
+
+# Headers from RFC 4871, the precursor to RFC 6376
+rfc4871_subset = {
+    b"from", b"sender", b"reply-to", b"subject", b"date",
+    b"message-id", b"to", b"cc", b"mime-version", b"content-type",
+    b"content-transfer-encoding", b"content-id",
+    b"content-description", b"resent-date", b"resent-from",
+    b"resent-sender", b"resent-to", b"resent-cc",
+    b"resent-message-id", b"in-reply-to", b"references", b"list-id",
+    b"list-help", b"list-unsubscribe", b"list-subscribe",
+    b"list-post", b"list-owner", b"list-archive", b"dkim-signature"
+}
+
+# Authenticity headers from RFC 8617
+rfc4871_and_rfc8617_subset = rfc4871_subset | {
+    b"arc-authentication-results", b"arc-message-signature",
+    b"arc-seal"
+}
+
+def rfc822_parse_dkim(suffix,
+        head_canon = False, body_canon = False,
+        head_subset = None, archive_list_id = None):
+    headers = []
+    keep = True
+    list_ids = set()
+
+    while suffix:
+        # Edge case: headers don't end LF (add LF)
+        line, suffix = (suffix.split(b"\n", 1) + [None])[:2]
+        if line in {b"\r", b"", None}:
+            break
+        lf = line.endswith(b"\r") and (suffix is not None)
+        end = b"\n" if lf else b"\r\n"
+        if line[0] in {0x09, 0x20}:
+            # Edge case: starts with a continuation (treat like From)
+            if headers and (keep is True):
+                headers[-1][1] += line + end
+        elif not line.startswith(b"From "):
+            # Edge case: header start contains no colon (use whole line)
+            # "A field-name MUST be contained on one line." (RFC 822 B.2)
+            k, v = (line.split(b":", 1) + [b""])[:2]
+            k_lower = k.lower()
+            if k_lower == "list-id":
+                list_ids.add(k_lower)
+            if (head_subset is None) or (k_lower in head_subset):
+                keep = True
+                headers.append([k, v + end])
+            else:
+                keep = False
+    # The remaining suffix is the body
+    body = (suffix or b"").replace(b"\r\n", b"\n")
+    body = body.replace(b"\n", b"\r\n")
+
+    # Optional X-Archive-List-ID augmentation
+    if (archive_list_id is not None) and (archive_list_id not in list_ids):
+        xali_value = b" " + bytes(archive_list_id, "ascii")
+        headers.append([b"X-Archive-List-ID", xali_value])
+    # Optional nonce from local config
+    if config is not None:
+        if (config.has_section("archiver") and
+            config.has_option("archiver", "nonce")):
+            nonce = config.get("archiver", "nonce")
+            headers.append([b"X-Archive-Nonce", nonce])
+    # Optional head canonicalisation (DKIM relaxed)
+    if head_canon is True:
+        for i in range(len(headers)):
+            k, v = headers[i]
+            crlf = v.endswith(b"\r\n")
+            if crlf is True:
+                v = v[:-2]
+            v = v.replace(b"\r\n", b"")
+            v = v.replace(b"\t", b" ")
+            v = v.strip(b" ")
+            v = b" ".join(vv for vv in v.split(b" ") if vv)
+            if crlf is True:
+                v = v + b"\r\n"
+            headers[i] = [k.lower(), v]
+    # Optional body canonicalisation (DKIM simple)
+    if body_canon is True:
+        while body.endswith(b"\r\n\r\n"):
+            body = body[:-2]

Review comment:
       It looks like most of our mbox files have only LF line-ends, so I doubt this will ever be triggered.
   
   AFAICT mails arrive at the archiver with LF only as well

##########
File path: tools/generators.py
##########
@@ -19,14 +19,137 @@
 This file contains the various ID generators for Pony Mail's archivers.
 """
 
+import base64
 import hashlib
 import email.utils
 import time
 import re
 
+# For optional nonce
+config = None
+
+# Headers from RFC 4871, the precursor to RFC 6376
+rfc4871_subset = {
+    b"from", b"sender", b"reply-to", b"subject", b"date",
+    b"message-id", b"to", b"cc", b"mime-version", b"content-type",
+    b"content-transfer-encoding", b"content-id",
+    b"content-description", b"resent-date", b"resent-from",
+    b"resent-sender", b"resent-to", b"resent-cc",
+    b"resent-message-id", b"in-reply-to", b"references", b"list-id",
+    b"list-help", b"list-unsubscribe", b"list-subscribe",
+    b"list-post", b"list-owner", b"list-archive", b"dkim-signature"
+}
+
+# Authenticity headers from RFC 8617
+rfc4871_and_rfc8617_subset = rfc4871_subset | {
+    b"arc-authentication-results", b"arc-message-signature",
+    b"arc-seal"
+}
+
+def rfc822_parse_dkim(suffix,
+        head_canon = False, body_canon = False,
+        head_subset = None, archive_list_id = None):
+    headers = []
+    keep = True
+    list_ids = set()
+
+    while suffix:
+        # Edge case: headers don't end LF (add LF)
+        line, suffix = (suffix.split(b"\n", 1) + [None])[:2]
+        if line in {b"\r", b"", None}:
+            break
+        lf = line.endswith(b"\r") and (suffix is not None)
+        end = b"\n" if lf else b"\r\n"
+        if line[0] in {0x09, 0x20}:
+            # Edge case: starts with a continuation (treat like From)
+            if headers and (keep is True):
+                headers[-1][1] += line + end
+        elif not line.startswith(b"From "):
+            # Edge case: header start contains no colon (use whole line)
+            # "A field-name MUST be contained on one line." (RFC 822 B.2)
+            k, v = (line.split(b":", 1) + [b""])[:2]
+            k_lower = k.lower()
+            if k_lower == "list-id":
+                list_ids.add(k_lower)

Review comment:
       The above line looks wrong, as does the comparison -- surely k_lower is bytes?
   
   There should ideally be some unit tests to detect such issues...




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org