You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2021/12/16 13:58:46 UTC

[incubator-ponymail-foal] branch master updated (b1aecf8 -> 4303192)

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git.


    from b1aecf8  Fix up types
     new 91f8ad8  stats is inefficient; it gets inaccessible mails
     new 4303192  Update version

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 server/endpoints/stats.py  |  7 +++++++
 server/plugins/messages.py | 37 +++++++++++++++++++------------------
 server/server_version.py   |  2 +-
 3 files changed, 27 insertions(+), 19 deletions(-)

[incubator-ponymail-foal] 02/02: Update version

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git

commit 43031928dc5bee857f734bbdf7ca56fa6bf110f7
Author: Sebb <se...@apache.org>
AuthorDate: Thu Dec 16 13:58:38 2021 +0000

    Update version
---
 server/server_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/server_version.py b/server/server_version.py
index 6dbcfb3..80a61f4 100644
--- a/server/server_version.py
+++ b/server/server_version.py
@@ -1,2 +1,2 @@
 # This file is generated by server/update_version.sh
-PONYMAIL_SERVER_VERSION = '1faa0ce'
+PONYMAIL_SERVER_VERSION = '91f8ad8'

[incubator-ponymail-foal] 01/02: stats is inefficient; it gets inaccessible mails

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git

commit 91f8ad856d0f77734f0072072a3fa0a4384c8e70
Author: Sebb <se...@apache.org>
AuthorDate: Thu Dec 16 13:58:07 2021 +0000

    stats is inefficient; it gets inaccessible mails
    
    This fixes #181
---
 server/endpoints/stats.py  |  7 +++++++
 server/plugins/messages.py | 37 +++++++++++++++++++------------------
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/server/endpoints/stats.py b/server/endpoints/stats.py
index 75f46e3..01513ec 100644
--- a/server/endpoints/stats.py
+++ b/server/endpoints/stats.py
@@ -49,6 +49,13 @@ async def process(
     except AssertionError as ae:  # If defuzzer encounters internal errors, it will throw an AssertionError
         return aiohttp.web.Response(headers={"content-type": "text/plain",}, status=500, text=str(ae))
     
+    # get a filter for use with get_activity_span (no date)
+    # It can also be used with dated queries
+    query_filter = await plugins.messages.get_accessible_filter(session, query_defuzzed_nodate)
+    if query_filter:
+        query_defuzzed['filter'] = query_filter
+        query_defuzzed_nodate['filter'] = query_filter
+
     # since: check if there have been recent updates to the data
     if 'since' in indata:
         since = indata.get('since', None)
diff --git a/server/plugins/messages.py b/server/plugins/messages.py
index 781a8c2..46fd0f1 100644
--- a/server/plugins/messages.py
+++ b/server/plugins/messages.py
@@ -350,7 +350,6 @@ async def query_batch(
     """
     assert session.database, DATABASE_NOT_CONNECTED
     preserve_order = True if epoch_order == "asc" else False
-    query_defuzzed = await filter_accessible(session, query_defuzzed)
     es_query = {
         "query": {"bool": query_defuzzed},
         "sort": [{"epoch": {"order": epoch_order}}],
@@ -439,17 +438,16 @@ async def query(
 async def wordcloud(session: plugins.session.SessionObject, query_defuzzed: dict) -> dict:
     """
     Wordclouds via significant terms query in ES
+    The query must include a private mail filter if necessary
     """
     wc = {}
     try:
-        # Copy the query and ensure we're only looking at public content
-        wc_public_query = dict(query_defuzzed)
-        wc_public_query["filter"] = [{"term": {"private": False}}]
+
         assert session.database, DATABASE_NOT_CONNECTED
         res = await session.database.search(
             body={
                 "size": 0,
-                "query": {"bool": wc_public_query},
+                "query": {"bool": query_defuzzed},
                 "aggregations": {
                     "cloud": {"significant_terms": {"field": "subject", "size": 10}}
                 },
@@ -463,17 +461,18 @@ async def wordcloud(session: plugins.session.SessionObject, query_defuzzed: dict
         pass
     return wc
 
-async def filter_accessible(session: plugins.session.SessionObject, query_defuzzed: dict) -> dict:
+async def get_accessible_filter(session: plugins.session.SessionObject, query_defuzzed: dict) -> typing.Optional[list]:
     """
-    Update query to take account of private emails
-    Reduces the need to filter out emails later
+    Return a filter to be applied to the query to exclude inaccessible mails.
+    If no filter is needed, return None
+    e.g. 
+    query_filter = get_accessible_filter(session, query)
+    if query_filter:
+        query['filter'] = query_filter
     """
-    query_copy = dict(query_defuzzed)
     if not session.credentials:
         # if no credentials, only need to search public mails
-        query_copy["filter"] = [{"term": {"private": False}}]
-        return query_copy
-
+        return [{"term": {"private": False}}]
     # which private lists might be involved in the search?
     fuzz_private_only = dict(query_defuzzed)
     fuzz_private_only["filter"] = [{"term": {"private": True}}]
@@ -501,18 +500,20 @@ async def filter_accessible(session: plugins.session.SessionObject, query_defuzz
     
     # If we can't access all private lists found, either only public emails or lists we can access.
     if not private_lists_accessible:  # No private lists accessible, just filter for public
-        query_copy["filter"] = [{"term": {"private": False}}]
+        return [{"term": {"private": False}}]
     elif private_lists_found != private_lists_accessible:  # Some private lists, search for public OR those..
-        query_copy["filter"] = [
+        return [
             {"bool": {"should": [{"term": {"private": False}}, {"terms": {"list_raw": private_lists_accessible}}]}}
         ]
 
-    return query_copy
+    return None
 
-async def get_activity_span(session: plugins.session.SessionObject, query_defuzzed: dict) -> typing.Tuple[datetime.datetime, datetime.datetime, dict]:
-    """ Fetches the activity span of a search as well as active months within that span """
 
-    query_defuzzed = await filter_accessible(session, query_defuzzed)
+async def get_activity_span(session: plugins.session.SessionObject, query_defuzzed: dict) -> typing.Tuple[datetime.datetime, datetime.datetime, dict]:
+    """
+    Fetches the activity span of a search as well as active months within that span
+    The query must include a private filter if necessary
+    """
 
     # Get oldest and youngest doc in single scan, as well as a monthly histogram
     assert session.database, DATABASE_NOT_CONNECTED