You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2019/04/26 23:46:44 UTC
[incubator-ponymail] branch master updated: Bug: Errors with
Elasticsearch 5.x in copy-list.py
This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail.git
The following commit(s) were added to refs/heads/master by this push:
new 2c87c17 Bug: Errors with Elasticsearch 5.x in copy-list.py
2c87c17 is described below
commit 2c87c17173b74b22cc48fb6070cb150879e63806
Author: Sebb <se...@apache.org>
AuthorDate: Sat Apr 27 00:46:34 2019 +0100
Bug: Errors with Elasticsearch 5.x in copy-list.py
This fixes #489
---
CHANGELOG.md | 1 +
tools/copy-list.py | 32 +++++++++++---------------------
2 files changed, 12 insertions(+), 21 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 44e4a81..70abee7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
## Changes in 0.12:
- Bug: Errors with Elasticsearch 5.x in edit-lists.py (#489)
- Bug: Errors with Elasticsearch 5.x in nullfav.py (#489)
+- Bug: Errors with Elasticsearch 5.x in copy-list.py (#489)
- Enh: Support scan/scroll for all current versions of ES (#489-contd)
- Bug: copy-list.py --target does not work (#491)
- Bug/Enh: setup.py tries to install Python modules (#465)
diff --git a/tools/copy-list.py b/tools/copy-list.py
index bdd3405..bf6d905 100755
--- a/tools/copy-list.py
+++ b/tools/copy-list.py
@@ -95,32 +95,22 @@ count = 0
print("Updating docs...")
then = time.time()
-page = es.search(
- doc_type="mbox",
- scroll = '30m',
- search_type = 'scan',
- size = 100,
- body = {
- 'query': {
- 'bool': {
- 'must': [
- {
- 'wildcard' if wildcard else 'term': {
- 'list_raw': sourceLID
- }
+query = {
+ 'query': {
+ 'bool': {
+ 'must': [
+ {
+ 'wildcard' if wildcard else 'term': {
+ 'list_raw': sourceLID
}
- ]
- }
+ }
+ ]
}
}
- )
-sid = page['_scroll_id']
-scroll_size = page['hits']['total']
+}
js_arr = []
-while (scroll_size > 0):
- page = es.scroll(scroll_id = sid, scroll = '30m')
+for page in es.scan_and_scroll(body = query):
sid = page['_scroll_id']
- scroll_size = len(page['hits']['hits'])
for hit in page['hits']['hits']:
doc = hit['_id']
body = es.get(doc_type = 'mbox', id = doc)