You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2016/06/01 12:41:05 UTC

[03/11] incubator-ponymail git commit: bulk deletes

bulk deletes


Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/6bcb4912
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/6bcb4912
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/6bcb4912

Branch: refs/heads/master
Commit: 6bcb491206bd0b22218505fc96a80057b6166294
Parents: 9571469
Author: Sam Ruby <ru...@intertwingly.net>
Authored: Wed May 4 07:32:14 2016 -0400
Committer: Sam Ruby <ru...@intertwingly.net>
Committed: Wed May 4 07:32:14 2016 -0400

----------------------------------------------------------------------
 tools/sync-imap.py | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/6bcb4912/tools/sync-imap.py
----------------------------------------------------------------------
diff --git a/tools/sync-imap.py b/tools/sync-imap.py
index 43d457d..14d2711 100755
--- a/tools/sync-imap.py
+++ b/tools/sync-imap.py
@@ -27,6 +27,7 @@ See usage for instructions.
 import argparse
 import configparser
 import elasticsearch
+from elasticsearch import helpers as eshelper
 import imaplib
 import os
 import pwd
@@ -155,16 +156,37 @@ for result in results[1]:
 
 # delete items from elasticsearch that are not present in imap
 
+queue1 = []
+queue2 = []
 for mid, _id in db.items():
-    if not mid in mail:
-        es.delete(index=iname, id=_id, doc_type='mbox')
-        es.delete(index=iname, id=_id, doc_type='mbox_source')
-        print("deleted: " + mid)
+    if True: # not mid in mail:
+        queue1.append({
+            '_op_type': 'delete',
+            '_index': iname,
+            '_type': 'mbox',
+            '_id': _id
+        })
+        queue2.append({
+            '_op_type': 'delete',
+            '_index': iname,
+            '_type': 'mbox_source',
+            '_id': _id
+        })
+        print("deleting: " + mid)
+
+while len(queue1) > 0:
+    eshelper.bulk(es, queue1[0:1024])
+    del queue1[0:1024]
+
+while len(queue2) > 0:
+    eshelper.bulk(es, queue2[0:1024])
+    del queue2[0:1024]
 
 # add new items to elasticsearch from imap
 
 for mid, uid in mail.items():
     if not mid in db:
+        print("indexing %s" % mid)
         argv = [sys.executable, 'archiver.py', '--lid=%s' % es_list]
         if verbose: argv.append('--verbose')
         if html2text: argv.append('--html2text')
@@ -172,5 +194,4 @@ for mid, uid in mail.items():
         child.stdin.write(imap.uid('fetch', uid, '(RFC822)')[1][0][1])
         child.stdin.close()
         rc = child.wait()
-        print("inserted: %s, rc = %d" % (mid, rc))
-
+        if rc != 0: print("rc %d" % rc)