You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2016/12/09 23:39:17 UTC

incubator-ponymail git commit: combine pminfo.lua aggregations for efficiency

Repository: incubator-ponymail
Updated Branches:
  refs/heads/master 11545df4f -> a8dd73a4d


combine pminfo.lua aggregations for efficiency

This fixes #273

Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/a8dd73a4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/a8dd73a4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/a8dd73a4

Branch: refs/heads/master
Commit: a8dd73a4db35e48db6bb8f1c7bd0b0323778743b
Parents: 11545df
Author: Sebb <se...@apache.org>
Authored: Fri Dec 9 23:39:02 2016 +0000
Committer: Sebb <se...@apache.org>
Committed: Fri Dec 9 23:39:02 2016 +0000

----------------------------------------------------------------------
 CHANGELOG.md        |   1 +
 site/api/pminfo.lua | 143 ++++++++++-------------------------------------
 2 files changed, 32 insertions(+), 112 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/a8dd73a4/CHANGELOG.md
----------------------------------------------------------------------
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8192cf4..81e63b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
 - mbox export now generates valid From_ line (#190)
 - mbox export now escapes 'From ' lines in body text (#188)
 - stats.lua ignores negated words when building the word cloud (#277)
+- combine pminfo.lua aggregations for efficiency (#273)
 
 ## CHANGES in 0.9b:
 

http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/a8dd73a4/site/api/pminfo.lua
----------------------------------------------------------------------
diff --git a/site/api/pminfo.lua b/site/api/pminfo.lua
index 22a5cb3..6977dd7 100644
--- a/site/api/pminfo.lua
+++ b/site/api/pminfo.lua
@@ -55,11 +55,9 @@ function handle(r)
                   list = "*." .. domain
               }
           }
-    
-    --[[ Get active lists ]]--
-    local doc = elastic.raw {
-        size = 0, -- we don't need the hits themselves
-        query = {
+
+    -- common query
+    local QUERY = {
             bool = {
                 must = {
                     {
@@ -75,20 +73,42 @@ function handle(r)
                     sterm
                 }
             }
-        },
+    }
+
+    --[[ Get active lists ]]--
+    local doc = elastic.raw {
+        size = 0, -- we don't need the hits themselves
+        query = QUERY,
         aggs = {
-            from = {
+            lists = { -- active lists (needed?)
                 terms = {
                     field = "list_raw",
                     size = MAXRESULTS
                 }
+            },
+            cards = { -- total participants
+                cardinality = {
+                    field = "from_raw"
+                }
+            },
+            weekly = { -- histogram of emails
+                date_histogram = {
+                    field = "date",
+                    interval = "1d"
+                }
+            },
+            top100 = { -- top100 senders (needed?)
+                terms = {
+                    field = "from_raw",
+                    size = 100
+                }
             }
         }
     }
     local lists = {} -- TODO unused?
     local nal = 0 -- This *is* used
 
-    for x,y in pairs (doc.aggregations.from.buckets) do
+    for x,y in pairs (doc.aggregations.lists.buckets) do
         local list, domain = y.key:match("^<?(.-)%.(.-)>?$")
         if not domain:match("%..-%..-%..-") and domain:match("^[-_a-z0-9.]+$") and list:match("^[-_a-z0-9.]+$") then
             lists[domain] = lists[domain] or {}
@@ -99,102 +119,17 @@ function handle(r)
     
     -- Debug time point 2
     
-    --[[ Get total number of participants ]]--
-    local doc = elastic.raw {
-        size = 0,
-        query = {
-            bool = {
-                must = {
-                    {
-                        range = {
-                            date = daterange
-                        }
-                    }, 
-                    {
-                        term = {
-                            private = false
-                        }
-                    },
-                    sterm
-                }
-            }
-        },
-        aggs = {
-            from = {
-                cardinality = {
-                    field = "from_raw"
-                }
-            }
-        }
-    }
-    local no_senders = doc.aggregations.from.value
+    local no_senders = doc.aggregations.cards.value
     
     table.insert(t, r:clock() - tnow)
     tnow = r:clock()
     
-    --[[ Get histogram of emails ]]
-    local doc = elastic.raw {
-        size = 0, -- we don't need the hits themselves
-        aggs = {
-            weekly = {
-                date_histogram = {
-                    field = "date",
-                    interval = "1d"
-                }
-            }
-        },
-        query = {
-            bool = {
-                must = {
-                    {
-                        range = {
-                            date = daterange
-                        }
-                    },
-                    {
-                        term = {
-                            private = false
-                        }
-                    },
-                    sterm
-                }
-            }
-        }
-    }
     local activity = {}
     
     for k, v in pairs (doc.aggregations.weekly.buckets) do
         table.insert(activity, {v.key, v.doc_count})
     end
     
-    local doc = elastic.raw {
-        size = 0, -- we don't need the hits themselves
-        aggs = {
-            from = {
-                terms = {
-                    field = "from_raw",
-                    size = 100
-                }
-            }
-        },
-        query = {
-            bool = {
-                must = {
-                    {
-                        range = {
-                            date = daterange
-                        }
-                    },
-                    {
-                        term = {
-                            private = false
-                        }
-                    },
-                    sterm
-                }
-            }
-        }
-    }
     local active_senders = {} -- TODO unused?
     
     
@@ -203,7 +138,7 @@ function handle(r)
     table.insert(t, r:clock() - tnow)
     tnow = r:clock()
     
-    for x,y in pairs (doc.aggregations.from.buckets) do
+    for x,y in pairs (doc.aggregations.top100.buckets) do
         local eml = y.key:match("<(.-)>") or y.key:match("%S+@%S+") or "unknown"
         local gravatar = r:md5(eml)
         local name = y.key:match("([^<]+)%s*<.->") or y.key:match("%S+@%S+") or eml
@@ -229,23 +164,7 @@ function handle(r)
     local emls = {}
     local sid = elastic.scan {
         _source = {'message-id','in-reply-to','subject','epoch','references'},
-        query = {
-            bool = {
-                must = {
-                    {
-                        range = {
-                            date = daterange
-                        }
-                    },
-                    {
-                        term = {
-                            private = false
-                        }
-                    },
-                    sterm
-                }
-            }
-        },
+        query = QUERY,
         sort = {
             {
                 date = {