You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2016/06/01 12:41:10 UTC
[08/11] incubator-ponymail git commit: merge sync-imap into
import-mbox
merge sync-imap into import-mbox
Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/6127911a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/6127911a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/6127911a
Branch: refs/heads/master
Commit: 6127911a367a527423495c98f551d3939359eae9
Parents: 7135cd4
Author: Sam Ruby <ru...@intertwingly.net>
Authored: Fri May 6 12:46:31 2016 -0400
Committer: Sam Ruby <ru...@intertwingly.net>
Committed: Fri May 6 12:46:31 2016 -0400
----------------------------------------------------------------------
tools/import-mbox.py | 124 +++++++++++++++++++++++++++--
tools/sync-imap.py | 197 ----------------------------------------------
2 files changed, 118 insertions(+), 203 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/6127911a/tools/import-mbox.py
----------------------------------------------------------------------
diff --git a/tools/import-mbox.py b/tools/import-mbox.py
index 2220f72..0b59d1f 100644
--- a/tools/import-mbox.py
+++ b/tools/import-mbox.py
@@ -45,6 +45,10 @@ except:
print("Sorry, you need to install the elasticsearch and formatflowed modules from pip first.")
sys.exit(-1)
+# change working directory to location of this script
+
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+
y = 0
baddies = 0
block = Lock()
@@ -57,6 +61,7 @@ appender = "apache.org"
source = "./"
maildir = False
+imap = False
list_override = None
project = ""
filebased = False
@@ -174,7 +179,18 @@ class SlurpThread(Thread):
EM = 1
stime = time.time()
dFile = False
- if filebased:
+ if maildir:
+ messages = mailbox.Maildir(tmpname)
+ elif imap:
+ y -= 1 # TODO don't understand the increment above
+ imap4 = mla[2]
+ def mailgen(list):
+ for uid in list:
+ msgbytes = imap4.uid('fetch', uid, '(RFC822)')[1][0][1]
+ yield email.message_from_bytes(msgbytes)
+ messages = mailgen(mla[0])
+ xlist_override = mla[1]
+ elif filebased:
tmpname = mla[0]
filename = mla[0]
@@ -197,6 +213,8 @@ class SlurpThread(Thread):
except Exception as err:
print("This wasn't a gzip file: %s" % err )
print("Slurping %s" % filename)
+ messages = mailbox.mbox(tmpname)
+
else:
ml = mla[0]
mboxfile = mla[1]
@@ -215,15 +233,11 @@ class SlurpThread(Thread):
with open(tmpname, "w") as f:
f.write(inp)
f.close()
+ messages = mailbox.mbox(tmpname)
count = 0
LEY = EY
- if maildir:
- messages = mailbox.Maildir(tmpname)
- else:
- messages = mailbox.mbox(tmpname)
-
for message in messages:
if resendTo:
print("Delivering message %s via MTA" % message['message-id'] if 'message-id' in message else '??')
@@ -285,6 +299,8 @@ class SlurpThread(Thread):
print("Parsed %u records from %s" % (count, filename))
if dFile:
os.unlink(tmpname)
+ elif imap:
+ print("Parsed %u records from imap" % count)
else:
print("Parsed %s/%s: %u records from %s" % (ml, mboxfile, count, tmpname))
os.unlink(tmpname)
@@ -460,6 +476,102 @@ elif source[0] == "h":
if quickmode and qn >= 2:
break
+# IMAP(S) based import?
+elif source[0] == "i":
+ imap = True
+ import urllib, getpass, imaplib
+ url = urllib.parse.urlparse(source)
+
+ port = url.port or (143 if url.scheme == 'imap' else 993)
+ user = url.username or getpass.getuser()
+ password = url.password or getpass.getpass('IMAP Password: ')
+ folder = url.path.strip('/') or 'INBOX'
+ listname = list_override or "<%s/%s.%s>" % (user, folder, url.hostname)
+
+ # fetch message-id => _id pairs from elasticsearch
+
+ es = Elasticsearch()
+ result = es.search(scroll = '5m',
+ body = {
+ 'size': 1024,
+ 'fields': ['message-id'],
+ 'query': {'match': {'list': listname}}
+ }
+ )
+
+ db = {}
+ while len(result['hits']['hits']) > 0:
+ for hit in result['hits']['hits']:
+ db[hit['fields']['message-id'][0]] = hit['_id']
+ result = es.scroll(scroll='5m', scroll_id=result['_scroll_id'])
+
+ # fetch message-id => uid pairs from imap
+
+ if url.scheme == 'imaps':
+ imap4 = imaplib.IMAP4_SSL(url.hostname, port)
+ else:
+ imap4 = imaplib.IMAP4(url.hostname, port)
+ imap4.login(user, password)
+ imap4.select(folder, readonly=True)
+ results = imap4.uid('search', None, 'ALL')
+ uids = b','.join(results[1][0].split())
+ results = imap4.uid('fetch', uids, '(BODY[HEADER.FIELDS (MESSAGE-ID)])')
+
+ mail = {}
+ uid_re = re.compile(b'^\d+ \(UID (\d+) BODY\[')
+ mid_re = re.compile(b'^Message-ID:\s*(.*?)\s*$', re.I)
+ uid = None
+ for result in results[1]:
+ for line in result:
+ if isinstance(line, bytes):
+ match = uid_re.match(line)
+ if match:
+ uid = match.group(1)
+ else:
+ match = mid_re.match(line)
+ if match:
+ try:
+ mail[match.group(1).decode('utf-8')] = uid
+ uid = None
+ except ValueError:
+ pass
+
+ # delete items from elasticsearch that are not present in imap
+
+ queue1 = []
+ queue2 = []
+ for mid, _id in db.items():
+ if not mid in mail:
+ queue1.append({
+ '_op_type': 'delete',
+ '_index': dbname,
+ '_type': 'mbox',
+ '_id': _id
+ })
+ queue2.append({
+ '_op_type': 'delete',
+ '_index': dbname,
+ '_type': 'mbox_source',
+ '_id': _id
+ })
+ print("deleting: " + mid)
+
+ while len(queue1) > 0:
+ eshelper.bulk(es, queue1[0:1024])
+ del queue1[0:1024]
+
+ while len(queue2) > 0:
+ eshelper.bulk(es, queue2[0:1024])
+ del queue2[0:1024]
+
+ # add new items to elasticsearch from imap
+
+ uids = []
+ for mid, uid in mail.items():
+ if not mid in db:
+ uids.append(uid)
+ lists.append([uids, listname, imap4])
+
threads = []
cc = int( multiprocessing.cpu_count() / 2) + 1
print("Starting up to %u threads to fetch the %u %s lists" % (cc, len(lists), project))
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/6127911a/tools/sync-imap.py
----------------------------------------------------------------------
diff --git a/tools/sync-imap.py b/tools/sync-imap.py
deleted file mode 100755
index 14d2711..0000000
--- a/tools/sync-imap.py
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/usr/bin/env python3.4
-# -*- coding: utf-8 -*-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
- #the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-""" Syncronize ponymail with an imap server.
-
-Fetches message-ids from both ponymail and an imap server, and adds or
-deletes whatever is necessary from ponymail to make it match.
-
-See usage for instructions.
-
-"""
-
-import argparse
-import configparser
-import elasticsearch
-from elasticsearch import helpers as eshelper
-import imaplib
-import os
-import pwd
-import subprocess
-import sys
-import re
-
-# change working directory to location of this script
-
-os.chdir(os.path.dirname(os.path.abspath(__file__)))
-
-# global defaults
-
-es_list = None
-imap_host = 'localhost'
-imap_port = 993
-imap_user = pwd.getpwuid(os.getuid()).pw_name
-imap_password = None
-imap_folder = 'INBOX'
-html2text = False
-verbose = False
-
-# fetch config overrides
-
-config = configparser.RawConfigParser()
-config.read('ponymail.cfg')
-iname = config.get("elasticsearch", "dbname")
-if config.has_option('imap', 'host'):
- imap_host = config.get('imap', 'host')
-if config.has_option('imap', 'port'):
- imap_port = config.getint('imap', 'port')
-if config.has_option('imap', 'user'):
- imap_user = config.getint('imap', 'user')
-if config.has_option('imap', 'password'):
- imap_password = config.getint('imap', 'password')
-
-# fetch command line argument overrides
-
-parser = argparse.ArgumentParser(description='Command line options.')
-parser.add_argument('--list', dest='list', type=str, nargs=1,
- help='ElasticSearch list')
-parser.add_argument('--host', dest='host', type=str, nargs=1,
- help='IMAP host')
-parser.add_argument('--port', dest='port', type=int, nargs=1,
- help='IMAP port')
-parser.add_argument('--user', dest='user', type=str, nargs=1,
- help='IMAP user')
-parser.add_argument('--password', dest='password', type=str, nargs=1,
- help='IMAP password')
-parser.add_argument('--folder', dest='folder', type=str, nargs=1,
- help='IMAP folder')
-parser.add_argument('--html2text', dest='html2text', action='store_true',
- help='Try to convert HTML to text if no text/plain message is found')
-parser.add_argument('--verbose', dest='verbose', action='store_true',
- help='Output additional log messages')
-
-args = parser.parse_args()
-
-if args.list:
- es_list = args.list[0]
-if args.host:
- imap_host = args.host[0]
-if args.port:
- imap_port = args.port[0]
-if args.user:
- imap_user = args.user[0]
-if args.password:
- imap_password = args.password[0]
-if args.folder:
- imap_folder = args.folder[0]
-if args.html2text:
- html2text = True
-if args.verbose:
- verbose = True
-
-if not es_list or not imap_password:
- parser.print_help()
- sys.exit(-1)
-
-es_list = "<%s>" % es_list.strip("<>") # We need <> around it!
-
-# fetch message-id => _id pairs from elasticsearch
-
-es = elasticsearch.Elasticsearch()
-result = es.search(scroll = '5m',
- body = {
- 'size': 1024,
- 'fields': ['message-id'],
- 'query': {'match': {'list': es_list}}
- }
-)
-
-db = {}
-while len(result['hits']['hits']) > 0:
- for hit in result['hits']['hits']:
- db[hit['fields']['message-id'][0]] = hit['_id']
- result = es.scroll(scroll='5m', scroll_id=result['_scroll_id'])
-
-# fetch message-id => uid pairs from imap
-
-imap = imaplib.IMAP4_SSL(imap_host, imap_port)
-imap.login(imap_user, imap_password)
-imap.select(imap_folder, readonly=True)
-results = imap.uid('search', None, 'ALL')
-uids = b','.join(results[1][0].split())
-results = imap.uid('fetch', uids, '(BODY[HEADER.FIELDS (MESSAGE-ID)])')
-
-mail = {}
-uid_re = re.compile(b'^\d+ \(UID (\d+) BODY\[')
-mid_re = re.compile(b'^Message-ID:\s*(.*?)\s*$', re.I)
-uid = None
-for result in results[1]:
- for line in result:
- if isinstance(line, bytes):
- match = uid_re.match(line)
- if match:
- uid = match.group(1)
- else:
- match = mid_re.match(line)
- if match:
- try:
- mail[match.group(1).decode('utf-8')] = uid
- uid = None
- except ValueError:
- pass
-
-# delete items from elasticsearch that are not present in imap
-
-queue1 = []
-queue2 = []
-for mid, _id in db.items():
- if True: # not mid in mail:
- queue1.append({
- '_op_type': 'delete',
- '_index': iname,
- '_type': 'mbox',
- '_id': _id
- })
- queue2.append({
- '_op_type': 'delete',
- '_index': iname,
- '_type': 'mbox_source',
- '_id': _id
- })
- print("deleting: " + mid)
-
-while len(queue1) > 0:
- eshelper.bulk(es, queue1[0:1024])
- del queue1[0:1024]
-
-while len(queue2) > 0:
- eshelper.bulk(es, queue2[0:1024])
- del queue2[0:1024]
-
-# add new items to elasticsearch from imap
-
-for mid, uid in mail.items():
- if not mid in db:
- print("indexing %s" % mid)
- argv = [sys.executable, 'archiver.py', '--lid=%s' % es_list]
- if verbose: argv.append('--verbose')
- if html2text: argv.append('--html2text')
- child = subprocess.Popen(argv, stdin=subprocess.PIPE)
- child.stdin.write(imap.uid('fetch', uid, '(RFC822)')[1][0][1])
- child.stdin.close()
- rc = child.wait()
- if rc != 0: print("rc %d" % rc)