You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2019/04/27 00:43:16 UTC

[incubator-ponymail] branch master updated: Bug: Errors with Elasticsearch 5.x in missing.py

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail.git


The following commit(s) were added to refs/heads/master by this push:
     new e7bf105  Bug: Errors with Elasticsearch 5.x in missing.py
e7bf105 is described below

commit e7bf105b92ba1eee3d4240aa08be6e8f4844b4b3
Author: Sebb <se...@apache.org>
AuthorDate: Sat Apr 27 01:43:04 2019 +0100

    Bug: Errors with Elasticsearch 5.x in missing.py
    
    This fixes #489
---
 CHANGELOG.md     |  1 +
 tools/missing.py | 49 ++++++++++++++++++-------------------------------
 2 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 70abee7..a13a551 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 - Bug: Errors with Elasticsearch 5.x in edit-lists.py (#489)
 - Bug: Errors with Elasticsearch 5.x in nullfav.py (#489)
 - Bug: Errors with Elasticsearch 5.x in copy-list.py (#489)
+- Bug: Errors with Elasticsearch 5.x in missing.py (#489)
 - Enh: Support scan/scroll for all current versions of ES (#489-contd)
 - Bug: copy-list.py --target does not work (#491)
 - Bug/Enh: setup.py tries to install Python modules (#465)
diff --git a/tools/missing.py b/tools/missing.py
index 5871fd9..c09885a 100755
--- a/tools/missing.py
+++ b/tools/missing.py
@@ -59,11 +59,11 @@ def getField(src,name):
     except KeyError:
         return '(Uknown)'
 
-def update(elastic, js_arr):
+def update(es, arr):
     if args.debug:
-        print(js_arr)
+        print(arr)
     if not args.test:
-        elastic.bulk(js_arr)
+        es.bulk(arr)
 
 setField = len(args.missing) > 1
 field = args.missing[0]
@@ -74,45 +74,32 @@ if setField:
 else:
     print("List missing/null field %s" % field)
 count = 0
-scroll = '30m'
 then = time.time()
 elastic = Elastic()
 if args.source:
     sourceLID = ("%s" if args.notag else "<%s>")  % args.source.replace("@", ".").strip("<>")
-    page = elastic.scan(# defaults to mbox
-            scroll = scroll,
-            body = {
-                "_source" : ['subject','message-id'],
-                "query" : {
-                    "bool" : {
-                        "must" : {
-                            'wildcard' if args.wildcard else 'term': {
-                                'list_raw': sourceLID
-                                }
-                            },
-                        # missing is not supported in ES 5.x
-                        "must_not": {
-                            "exists" : {
-                                "field" : field
-                            }
+    query = {
+        "_source" : ['subject','message-id'],
+        "query" : {
+            "bool" : {
+                "must" : {
+                    'wildcard' if args.wildcard else 'term': {
+                        'list_raw': sourceLID
                         }
+                    },
+                # missing is not supported in ES 5.x
+                "must_not": {
+                    "exists" : {
+                        "field" : field
                     }
                 }
             }
-        )
-    print(page)
-    sid = page['_scroll_id']
-    scroll_size = page['hits']['total']
-    print("Found %d matches" % scroll_size)
-    if args.debug:
-        print(page)
+        }
+    }
     js_arr = []
-    while (scroll_size > 0):
-        page = elastic.scroll(scroll_id = sid, scroll = scroll)
+    for page in elastic.scan_and_scroll(body = query):
         if args.debug:
             print(page)
-        sid = page['_scroll_id']
-        scroll_size = len(page['hits']['hits'])
         for hit in page['hits']['hits']:
             doc = hit['_id']
             body = {}