You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by se...@apache.org on 2016/12/09 23:39:17 UTC
incubator-ponymail git commit: combine pminfo.lua aggregations for
efficiency
Repository: incubator-ponymail
Updated Branches:
refs/heads/master 11545df4f -> a8dd73a4d
combine pminfo.lua aggregations for efficiency
This fixes #273
Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/a8dd73a4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/a8dd73a4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/a8dd73a4
Branch: refs/heads/master
Commit: a8dd73a4db35e48db6bb8f1c7bd0b0323778743b
Parents: 11545df
Author: Sebb <se...@apache.org>
Authored: Fri Dec 9 23:39:02 2016 +0000
Committer: Sebb <se...@apache.org>
Committed: Fri Dec 9 23:39:02 2016 +0000
----------------------------------------------------------------------
CHANGELOG.md | 1 +
site/api/pminfo.lua | 143 ++++++++++-------------------------------------
2 files changed, 32 insertions(+), 112 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/a8dd73a4/CHANGELOG.md
----------------------------------------------------------------------
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8192cf4..81e63b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
- mbox export now generates valid From_ line (#190)
- mbox export now escapes 'From ' lines in body text (#188)
- stats.lua ignores negated words when building the word cloud (#277)
+- combine pminfo.lua aggregations for efficiency (#273)
## CHANGES in 0.9b:
http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/a8dd73a4/site/api/pminfo.lua
----------------------------------------------------------------------
diff --git a/site/api/pminfo.lua b/site/api/pminfo.lua
index 22a5cb3..6977dd7 100644
--- a/site/api/pminfo.lua
+++ b/site/api/pminfo.lua
@@ -55,11 +55,9 @@ function handle(r)
list = "*." .. domain
}
}
-
- --[[ Get active lists ]]--
- local doc = elastic.raw {
- size = 0, -- we don't need the hits themselves
- query = {
+
+ -- common query
+ local QUERY = {
bool = {
must = {
{
@@ -75,20 +73,42 @@ function handle(r)
sterm
}
}
- },
+ }
+
+ --[[ Get active lists ]]--
+ local doc = elastic.raw {
+ size = 0, -- we don't need the hits themselves
+ query = QUERY,
aggs = {
- from = {
+ lists = { -- active lists (needed?)
terms = {
field = "list_raw",
size = MAXRESULTS
}
+ },
+ cards = { -- total participants
+ cardinality = {
+ field = "from_raw"
+ }
+ },
+ weekly = { -- histogram of emails
+ date_histogram = {
+ field = "date",
+ interval = "1d"
+ }
+ },
+ top100 = { -- top100 senders (needed?)
+ terms = {
+ field = "from_raw",
+ size = 100
+ }
}
}
}
local lists = {} -- TODO unused?
local nal = 0 -- This *is* used
- for x,y in pairs (doc.aggregations.from.buckets) do
+ for x,y in pairs (doc.aggregations.lists.buckets) do
local list, domain = y.key:match("^<?(.-)%.(.-)>?$")
if not domain:match("%..-%..-%..-") and domain:match("^[-_a-z0-9.]+$") and list:match("^[-_a-z0-9.]+$") then
lists[domain] = lists[domain] or {}
@@ -99,102 +119,17 @@ function handle(r)
-- Debug time point 2
- --[[ Get total number of participants ]]--
- local doc = elastic.raw {
- size = 0,
- query = {
- bool = {
- must = {
- {
- range = {
- date = daterange
- }
- },
- {
- term = {
- private = false
- }
- },
- sterm
- }
- }
- },
- aggs = {
- from = {
- cardinality = {
- field = "from_raw"
- }
- }
- }
- }
- local no_senders = doc.aggregations.from.value
+ local no_senders = doc.aggregations.cards.value
table.insert(t, r:clock() - tnow)
tnow = r:clock()
- --[[ Get histogram of emails ]]
- local doc = elastic.raw {
- size = 0, -- we don't need the hits themselves
- aggs = {
- weekly = {
- date_histogram = {
- field = "date",
- interval = "1d"
- }
- }
- },
- query = {
- bool = {
- must = {
- {
- range = {
- date = daterange
- }
- },
- {
- term = {
- private = false
- }
- },
- sterm
- }
- }
- }
- }
local activity = {}
for k, v in pairs (doc.aggregations.weekly.buckets) do
table.insert(activity, {v.key, v.doc_count})
end
- local doc = elastic.raw {
- size = 0, -- we don't need the hits themselves
- aggs = {
- from = {
- terms = {
- field = "from_raw",
- size = 100
- }
- }
- },
- query = {
- bool = {
- must = {
- {
- range = {
- date = daterange
- }
- },
- {
- term = {
- private = false
- }
- },
- sterm
- }
- }
- }
- }
local active_senders = {} -- TODO unused?
@@ -203,7 +138,7 @@ function handle(r)
table.insert(t, r:clock() - tnow)
tnow = r:clock()
- for x,y in pairs (doc.aggregations.from.buckets) do
+ for x,y in pairs (doc.aggregations.top100.buckets) do
local eml = y.key:match("<(.-)>") or y.key:match("%S+@%S+") or "unknown"
local gravatar = r:md5(eml)
local name = y.key:match("([^<]+)%s*<.->") or y.key:match("%S+@%S+") or eml
@@ -229,23 +164,7 @@ function handle(r)
local emls = {}
local sid = elastic.scan {
_source = {'message-id','in-reply-to','subject','epoch','references'},
- query = {
- bool = {
- must = {
- {
- range = {
- date = daterange
- }
- },
- {
- term = {
- private = false
- }
- },
- sterm
- }
- }
- },
+ query = QUERY,
sort = {
{
date = {