You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2019/04/26 23:46:44 UTC
[incubator-ponymail] branch master updated: Bug: Errors with Elasticsearch 5.x in copy-list.py

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail.git


The following commit(s) were added to refs/heads/master by this push:
     new 2c87c17  Bug: Errors with Elasticsearch 5.x in copy-list.py
2c87c17 is described below

commit 2c87c17173b74b22cc48fb6070cb150879e63806
Author: Sebb <se...@apache.org>
AuthorDate: Sat Apr 27 00:46:34 2019 +0100

    Bug: Errors with Elasticsearch 5.x in copy-list.py
    
    This fixes #489
---
 CHANGELOG.md       |  1 +
 tools/copy-list.py | 32 +++++++++++---------------------
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 44e4a81..70abee7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 ## Changes in 0.12:
 - Bug: Errors with Elasticsearch 5.x in edit-lists.py (#489)
 - Bug: Errors with Elasticsearch 5.x in nullfav.py (#489)
+- Bug: Errors with Elasticsearch 5.x in copy-list.py (#489)
 - Enh: Support scan/scroll for all current versions of ES (#489-contd)
 - Bug: copy-list.py --target does not work (#491)
 - Bug/Enh: setup.py tries to install Python modules (#465)
diff --git a/tools/copy-list.py b/tools/copy-list.py
index bdd3405..bf6d905 100755
--- a/tools/copy-list.py
+++ b/tools/copy-list.py
@@ -95,32 +95,22 @@ count = 0
 
 print("Updating docs...")
 then = time.time()
-page = es.search(
-    doc_type="mbox",
-    scroll = '30m',
-    search_type = 'scan',
-    size = 100,
-    body = {
-        'query': {
-            'bool': {
-                'must': [
-                    {
-                        'wildcard' if wildcard else 'term': {
-                            'list_raw': sourceLID
-                        }
+query = {
+    'query': {
+        'bool': {
+            'must': [
+                {
+                    'wildcard' if wildcard else 'term': {
+                        'list_raw': sourceLID
                     }
-                ]
-            }
+                }
+            ]
         }
     }
-    )
-sid = page['_scroll_id']
-scroll_size = page['hits']['total']
+}
 js_arr = []
-while (scroll_size > 0):
-    page = es.scroll(scroll_id = sid, scroll = '30m')
+for page in es.scan_and_scroll(body = query):
     sid = page['_scroll_id']
-    scroll_size = len(page['hits']['hits'])
     for hit in page['hits']['hits']:
         doc = hit['_id']
         body = es.get(doc_type = 'mbox', id = doc)