You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2021/09/12 13:55:38 UTC
[incubator-ponymail-foal] branch master updated: switch to using
textlib.py for normalize_lid
This is an automated email from the ASF dual-hosted git repository.
humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
The following commit(s) were added to refs/heads/master by this push:
new e123412 switch to using textlib.py for normalize_lid
e123412 is described below
commit e123412e3ef0cf9421703b8a1a8daf73632f05ce
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Sun Sep 12 08:55:33 2021 -0500
switch to using textlib.py for normalize_lid
---
tools/migrate.py | 26 +++-----------------------
1 file changed, 3 insertions(+), 23 deletions(-)
diff --git a/tools/migrate.py b/tools/migrate.py
index 945af57..b24711d 100644
--- a/tools/migrate.py
+++ b/tools/migrate.py
@@ -23,9 +23,9 @@ from elasticsearch import AsyncElasticsearch, Elasticsearch, helpers
from elasticsearch.helpers import async_scan
if not __package__:
- from plugins import generators
+ from plugins import generators, textlib
else:
- from .plugins import generators
+ from .plugins import generators, textlib
import argparse
import base64
@@ -44,26 +44,6 @@ MIGRATION_MAGIC_NUMBER = "2"
cores = len(os.sched_getaffinity(0))
MAX_PARALLEL_OPS = max(min(int((cores + 1) * 0.75), cores - 1), 1)
-def normalize_lid(lid: str) -> str:
- """ Ensures that a List ID is in standard form, i.e. <a.b.c.d> """
- # If of format "list name" <foo.bar.baz>
- # we crop away the description (#511)
- m = re.match(r'".*"\s+(.+)', lid)
- if m:
- lid = m.group(1)
- # Drop <> and anything before/after, if found
- m = re.search(r"<(.+)>", lid)
- if m:
- lid = m.group(1)
- # Belt-and-braces: remove possible extraneous chars
- lid = "<%s>" % lid.strip(" <>").replace("@", ".")
- # Replace invalid characters with underscores so as to not invalidate doc IDs.
- lid = re.sub(r"[^-+~_<>.a-zA-Z0-9@]", "_", lid)
- # Finally, ensure we have a loosely valid list ID value
- if not re.match(r"^<.+\..+>$", lid):
- print("Warning: Invalid list-id %s" % lid)
- return lid
-
class MultiDocProcessor:
"""MultiProcess document processor"""
@@ -193,7 +173,7 @@ def bulk_push(json, es, graceful=False):
def process_document(old_es, doc, old_dbname, dbname_source, dbname_mbox, do_dkim):
now = time.time()
- list_id = normalize_lid(doc["_source"]["list_raw"])
+ list_id = textlib.normalize_lid(doc["_source"]["list_raw"])
try:
source = old_es.get(index=old_dbname, doc_type="mbox_source", id=doc["_id"])
# If we hit a 404 on a source, we have to fake an empty document, as we don't know the source.