You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2022/07/07 13:02:55 UTC

[incubator-ponymail-foal] branch master updated: Create bulk-edit.py

This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git


The following commit(s) were added to refs/heads/master by this push:
     new 789c7b6  Create bulk-edit.py
789c7b6 is described below

commit 789c7b6e06adf46e69adc69f705cad6f60118d28
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Thu Jul 7 15:02:52 2022 +0200

    Create bulk-edit.py
---
 tools/bulk-edit.py | 197 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 197 insertions(+)

diff --git a/tools/bulk-edit.py b/tools/bulk-edit.py
new file mode 100644
index 0000000..f49d4a3
--- /dev/null
+++ b/tools/bulk-edit.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+    bulk-edit.py: mbox bulk editor for Apache Pony Mail (Foal)
+
+    Examples:
+        - Move all email from <foo.bar.example.org> to <bar.baz.example.org>:
+            python3 bulk-edit.py --search 'list_raw:"<foo.bar.example.org>"' --action move --destination "<bar.baz.example.org>"
+        - Make all emails from gnome@example.org private:
+            python3 bulk-edit.py --search 'from:"<gn...@example.org>"' --action private
+        - Delete all emails on foo@bar.example.org with 'gnomes' in the subject:
+           python3 bulk-edit.py --search 'list_raw:"<foo.bar.example.org>" AND subject:gnomes' --action delete
+
+    Be sure to always run your query with --test first, to see which documents would be affected!
+"""
+
+import elasticsearch.exceptions
+import sys
+import asyncio
+import argparse
+import time
+import re
+import warnings
+from elasticsearch.helpers import async_scan
+
+
+if not __package__:
+    from plugins import ponymailconfig
+    from plugins.elastic import Elastic
+else:
+    from .plugins import ponymailconfig
+    from .plugins.elastic import Elastic
+
+
+def gen_args() -> argparse.Namespace:
+    """Generate/parse CLI arguments"""
+    parser = argparse.ArgumentParser(description="Command line options.")
+    parser.add_argument(
+        "--search",
+        dest="search",
+        nargs=1,
+        help="""Search parameters (Lucene query string) to narrow down what to edit (for instance: 'list_raw:"<dev.maven.apache.org>"')""",
+        default="*",
+    ),
+    parser.add_argument(
+        "--action",
+        dest="action",
+        type=str,
+        choices=["move", "delete", "private", "public", "list"],
+        help="The action to perform on each matching document",
+        default="list",
+    )
+    parser.add_argument(
+        "--destination",
+        dest="destination",
+        type=str,
+        help="If action is 'move', this sets the destination list-id to move the matching documents to",
+        default="",
+    ),
+    parser.add_argument(
+        "--test",
+        dest="test",
+        action="store_true",
+        help="Test mode, only scan database and report, but do not make any changes to it.",
+    )
+    parser.add_argument(
+        "--warn",
+        dest="warn",
+        action="store_true",
+        help="Enable ElasticSearch Warnings (defaults to disabled to suppress xpack nonsense)",
+        default=False,
+    )
+    args = parser.parse_args()
+    return args
+
+
+async def main():
+    start_time = time.time()
+    args = gen_args()
+    config = ponymailconfig.PonymailConfig()
+    es = Elastic(is_async=True)
+    if not args.warn:
+        warnings.filterwarnings("ignore", category=elasticsearch.exceptions.ElasticsearchWarning)
+    docs_changed = 0
+    if args.action == "move":
+        if not re.match(r"<([-a-z0-9_]+\.?)+>", args.destination):
+            sys.stderr.write("ERROR: Destination list (--destination) MUST be using the <foo.bar.baz> format!\n")
+            exit(-1)
+
+    async for doc in async_scan(client=es.es, q=args.search, index=es.db_mbox):
+        source = doc["_source"]
+        if args.action == "list":
+            docs_changed += 1
+            subject = source["subject"].replace("\n", "")
+            print(f"""found: {doc['_id']} {source['list_raw']}: {subject}""")
+        elif args.action == "move":
+            if args.test:
+                print(f"""[TEST] Would have moved {source["mid"]} from {source["list_raw"]} to {args.destination}""")
+            else:
+                sys.stdout.write(
+                    f"""[MOVE] Moving {source["mid"]} from {source["list_raw"]} to {args.destination}..."""
+                )
+                sys.stdout.flush()
+                await es.es.update(
+                    index=es.db_mbox,
+                    id=doc["_id"],
+                    body={
+                        "doc": {
+                            "list": args.destination,
+                            "list_raw": args.destination,
+                        }
+                    },
+                )
+                sys.stdout.write(" [DONE]\n")
+                sys.stdout.flush()
+            docs_changed += 1
+        elif args.action == "private":
+            if not source["private"]:
+                if args.test:
+                    print(f"""[TEST] Would have made {source["mid"]} from {source["list_raw"]} private""")
+                else:
+                    sys.stdout.write(f"""[HIDE] Turning {source["mid"]} from {source["list_raw"]} private...""")
+                    sys.stdout.flush()
+                    await es.es.update(
+                        index=es.db_mbox,
+                        id=doc["_id"],
+                        body={
+                            "doc": {
+                                "private": True,
+                            }
+                        },
+                    )
+                    sys.stdout.write(" [DONE]\n")
+                    sys.stdout.flush()
+                docs_changed += 1
+        elif args.action == "public":
+            if source["private"]:
+                if args.test:
+                    print(f"""[TEST] Would have made {source["mid"]} from {source["list_raw"]} public""")
+                else:
+                    sys.stdout.write(f"""[SHOW] Turning {source["mid"]} from {source["list_raw"]} public...""")
+                    sys.stdout.flush()
+                    await es.es.update(
+                        index=es.db_mbox,
+                        id=doc["_id"],
+                        body={
+                            "doc": {
+                                "private": False,
+                            }
+                        },
+                    )
+                    sys.stdout.write(" [DONE]\n")
+                    sys.stdout.flush()
+                docs_changed += 1
+        elif args.action == "delete":
+            if args.test:
+                print(
+                    f"""[TEST] Would have deleted {source["mid"]} (and source {source["dbid"]}) from {source["list_raw"]}"""
+                )
+            else:
+                sys.stdout.write(
+                    f"""[DELETE] Removing {source["mid"]} (and source {source["dbid"]}) from {source["list_raw"]}..."""
+                )
+                sys.stdout.flush()
+                await es.es.delete(
+                    index=es.db_mbox,
+                    id=doc["_id"],
+                )
+                await es.es.delete(
+                    index=es.db_source,
+                    id=source["dbid"],
+                )
+                sys.stdout.write(" [DONE]\n")
+                sys.stdout.flush()
+            docs_changed += 1
+    stop_time = time.time()
+    time_taken = int(stop_time - start_time)
+    print(f"Handled {docs_changed} document(s) in {time_taken} second(s).")
+    await es.es.close()
+
+if __name__ == "__main__":
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(main())