You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2020/09/11 08:39:46 UTC

[incubator-ponymail-foal] branch master updated (a532819 -> 945ad98)

This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git.


    from a532819  epoch should be in descending order
     new b08d25a  convert thread construction to a class, add addl subject hash map for speedups
     new 9979a9d  use new constructor class for thread building
     new 945ad98  children is a required field for threads.py

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 server/endpoints/stats.py |   3 +-
 server/plugins/mbox.py    | 107 ++++++++++++++++++++++++----------------------
 2 files changed, 58 insertions(+), 52 deletions(-)


[incubator-ponymail-foal] 02/03: use new constructor class for thread building

Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git

commit 9979a9d992a549671d563b20d7ab53d78c35e16d
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Fri Sep 11 10:39:15 2020 +0200

    use new constructor class for thread building
---
 server/endpoints/stats.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/server/endpoints/stats.py b/server/endpoints/stats.py
index 8ac7a02..ed328fd 100644
--- a/server/endpoints/stats.py
+++ b/server/endpoints/stats.py
@@ -53,7 +53,8 @@ async def process(
         wordcloud = await plugins.mbox.wordcloud(session, query_defuzzed)
     first_year, last_year = await plugins.mbox.get_years(session, query_defuzzed_nodate)
 
-    tstruct, authors = await server.runners.run(plugins.mbox.construct_threads, results)
+    threads = plugins.mbox.ThreadConstructor(results)
+    tstruct, authors = await server.runners.run(threads.construct)
     xlist = indata.get("list", "*")
     xdomain = indata.get("domain", "*")
 


[incubator-ponymail-foal] 01/03: convert thread construction to a class, add addl subject hash map for speedups

Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git

commit b08d25a40ee6533bdaa772da00b4d0c6950f2f18
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Fri Sep 11 10:38:49 2020 +0200

    convert thread construction to a class, add addl subject hash map for speedups
---
 server/plugins/mbox.py | 104 +++++++++++++++++++++++++------------------------
 1 file changed, 54 insertions(+), 50 deletions(-)

diff --git a/server/plugins/mbox.py b/server/plugins/mbox.py
index 6dcd562..329bb92 100644
--- a/server/plugins/mbox.py
+++ b/server/plugins/mbox.py
@@ -439,57 +439,61 @@ async def get_years(session, query_defuzzed):
     return oldest, youngest
 
 
-def find_root_subject(threads, hashdict, root_email, osubject=None):
-    """Finds the discussion origin of an email, if present"""
-    irt = root_email.get("in-reply-to")
-    subject = root_email.get("subject")
-    subject = subject.replace("\n", "")  # Crop multi-line subjects
-
-    # First, the obvious - look for an in-reply-to in our existing dict with a matching subject
-    if irt and irt in hashdict:
-        if hashdict[irt].get("subject") == subject:
-            return hashdict[irt]
-
-    # If that failed, we break apart our subject
-    if osubject:
-        rsubject = osubject
-    else:
-        rsubject = PYPONY_RE_PREFIX.sub("", subject) + "_" + root_email.get("list_raw")
-    for thread in threads:
-        if thread.get("tsubject") == rsubject:
-            return thread
-
-    return None
-
-
-def construct_threads(emails: typing.List[typing.Dict]):
-    """Turns a list of emails into a nested thread structure"""
-    threads: typing.List[dict] = []
-    authors = {}
-    hashdict: typing.Dict[str, dict] = {}
-    for cur_email in sorted(emails, key=lambda x: x["epoch"]):
-        author = cur_email.get("from")
-        if author not in authors:
-            authors[author] = 0
-        authors[author] += 1
-        subject = cur_email.get("subject", "").replace("\n", "")  # Crop multi-line subjects
-        tsubject = PYPONY_RE_PREFIX.sub("", subject) + "_" + cur_email.get("list_raw", "<a.b.c.d>")
-        parent = find_root_subject(threads, hashdict, cur_email, tsubject)
-        xemail = {
-            "children": [],
-            "tid": cur_email.get("mid"),
-            "subject": subject,
-            "tsubject": tsubject,
-            "epoch": cur_email.get("epoch"),
-            "nest": 1,
-        }
-        if not parent:
-            threads.append(xemail)
+class ThreadConstructor:
+    def __init__(self, emails: typing.List[typing.Dict]):
+        self.emails = emails
+        self.threads: typing.List[dict] = []
+        self.authors = {}
+        self.hashed_by_msg_id: typing.Dict[str, dict] = {}
+        self.hashed_by_subject: typing.Dict[str, dict] = {}
+
+    def construct(self):
+        """Turns a flat array of emails into a nested structure of threads"""
+        for cur_email in sorted(self.emails, key=lambda x: x["epoch"]):
+            author = cur_email.get("from")
+            if author not in self.authors:
+                self.authors[author] = 0
+            self.authors[author] += 1
+            subject = cur_email.get("subject", "").replace("\n", "")  # Crop multi-line subjects
+            tsubject = PYPONY_RE_PREFIX.sub("", subject) + "_" + cur_email.get("list_raw", "<a.b.c.d>")
+            parent = self.find_root_subject(cur_email, tsubject)
+            xemail = {
+                "children": [],
+                "tid": cur_email.get("mid"),
+                "subject": subject,
+                "tsubject": tsubject,
+                "epoch": cur_email.get("epoch"),
+                "nest": 1,
+            }
+            if not parent:
+                self.threads.append(xemail)
+            else:
+                xemail["nest"] = parent["nest"] + 1
+                parent["children"].append(xemail)
+            self.hashed_by_msg_id[cur_email.get("message-id", "??")] = xemail
+            if tsubject not in self.hashed_by_subject:
+                self.hashed_by_subject[tsubject] = xemail
+        return self.threads, self.authors
+
+    def find_root_subject(self, root_email, osubject=None):
+        """Finds the discussion origin of an email, if present"""
+        irt = root_email.get("in-reply-to")
+        subject = root_email.get("subject")
+        subject = subject.replace("\n", "")  # Crop multi-line subjects
+
+        # First, the obvious - look for an in-reply-to in our existing dict with a matching subject
+        if irt and irt in self.hashed_by_msg_id:
+            if self.hashed_by_msg_id[irt].get("subject") == subject:
+                return self.hashed_by_msg_id[irt]
+
+        # If that failed, we break apart our subject
+        if osubject:
+            rsubject = osubject
         else:
-            xemail["nest"] = parent["nest"] + 1
-            parent["children"].append(xemail)
-        hashdict[cur_email.get("message-id", "??")] = xemail
-    return threads, authors
+            rsubject = PYPONY_RE_PREFIX.sub("", subject) + "_" + root_email.get("list_raw")
+        if rsubject and rsubject in self.hashed_by_subject:
+            return self.hashed_by_subject[rsubject]
+        return None
 
 
 def gravatar(eml):


[incubator-ponymail-foal] 03/03: children is a required field for threads.py

Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git

commit 945ad98c415715e23c0bf057e515adb9ebde5b84
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Fri Sep 11 10:39:31 2020 +0200

    children is a required field for threads.py
---
 server/plugins/mbox.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/server/plugins/mbox.py b/server/plugins/mbox.py
index 329bb92..170a1c7 100644
--- a/server/plugins/mbox.py
+++ b/server/plugins/mbox.py
@@ -53,7 +53,8 @@ used_ui_fields = [
     "epoch",
     "subject",
     "id",
-    "gravatar"
+    "gravatar",
+    "children",
 ]