You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ponymail.apache.org by GitBox <gi...@apache.org> on 2020/09/02 16:04:01 UTC

[GitHub] [incubator-ponymail] sebbASF commented on a change in pull request #517: Add DKIM style ID generation

sebbASF commented on a change in pull request #517:
URL: https://github.com/apache/incubator-ponymail/pull/517#discussion_r482187709



##########
File path: tools/generators.py
##########
@@ -19,14 +19,137 @@
 This file contains the various ID generators for Pony Mail's archivers.
 """
 
+import base64
 import hashlib
 import email.utils
 import time
 import re
 
+# For optional nonce
+config = None
+
+# Headers from RFC 4871, the precursor to RFC 6376
+rfc4871_subset = {
+    b"from", b"sender", b"reply-to", b"subject", b"date",
+    b"message-id", b"to", b"cc", b"mime-version", b"content-type",
+    b"content-transfer-encoding", b"content-id",
+    b"content-description", b"resent-date", b"resent-from",
+    b"resent-sender", b"resent-to", b"resent-cc",
+    b"resent-message-id", b"in-reply-to", b"references", b"list-id",
+    b"list-help", b"list-unsubscribe", b"list-subscribe",
+    b"list-post", b"list-owner", b"list-archive", b"dkim-signature"
+}
+
+# Authenticity headers from RFC 8617
+rfc4871_and_rfc8617_subset = rfc4871_subset | {
+    b"arc-authentication-results", b"arc-message-signature",
+    b"arc-seal"
+}
+
+def rfc822_parse_dkim(suffix,
+        head_canon = False, body_canon = False,
+        head_subset = None, archive_list_id = None):
+    headers = []
+    keep = True
+    list_ids = set()
+
+    while suffix:
+        # Edge case: headers don't end LF (add LF)
+        line, suffix = (suffix.split(b"\n", 1) + [None])[:2]
+        if line in {b"\r", b"", None}:
+            break
+        lf = line.endswith(b"\r") and (suffix is not None)
+        end = b"\n" if lf else b"\r\n"
+        if line[0] in {0x09, 0x20}:
+            # Edge case: starts with a continuation (treat like From)
+            if headers and (keep is True):
+                headers[-1][1] += line + end
+        elif not line.startswith(b"From "):
+            # Edge case: header start contains no colon (use whole line)
+            # "A field-name MUST be contained on one line." (RFC 822 B.2)
+            k, v = (line.split(b":", 1) + [b""])[:2]
+            k_lower = k.lower()
+            if k_lower == "list-id":
+                list_ids.add(k_lower)
+            if (head_subset is None) or (k_lower in head_subset):
+                keep = True
+                headers.append([k, v + end])
+            else:
+                keep = False
+    # The remaining suffix is the body
+    body = (suffix or b"").replace(b"\r\n", b"\n")
+    body = body.replace(b"\n", b"\r\n")
+
+    # Optional X-Archive-List-ID augmentation
+    if (archive_list_id is not None) and (archive_list_id not in list_ids):
+        xali_value = b" " + bytes(archive_list_id, "ascii")
+        headers.append([b"X-Archive-List-ID", xali_value])
+    # Optional nonce from local config
+    if config is not None:
+        if (config.has_section("archiver") and
+            config.has_option("archiver", "nonce")):
+            nonce = config.get("archiver", "nonce")
+            headers.append([b"X-Archive-Nonce", nonce])
+    # Optional head canonicalisation (DKIM relaxed)
+    if head_canon is True:
+        for i in range(len(headers)):
+            k, v = headers[i]
+            crlf = v.endswith(b"\r\n")
+            if crlf is True:
+                v = v[:-2]
+            v = v.replace(b"\r\n", b"")
+            v = v.replace(b"\t", b" ")
+            v = v.strip(b" ")
+            v = b" ".join(vv for vv in v.split(b" ") if vv)
+            if crlf is True:
+                v = v + b"\r\n"

Review comment:
       Whilst the standard requires the trailing CRLF to be kept, I'm not sure that is the best approach for a Permalink.
   
   This is because many/most of the mbox files use LF rather than CRLF, so they will end up without any line terminator.
   
   I think it would be better to strip all terminators.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org