You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kibble.apache.org by hu...@apache.org on 2018/01/17 18:37:39 UTC
[kibble] 03/06: rough in a comstat-like page

This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kibble.git

commit c377dbee59308927ad0d9ff113c1d8f29a718005
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Wed Jan 17 19:36:33 2018 +0100

    rough in a comstat-like page
    
    This needs to be reworked a bit later on, but it works...
    albeit super slow!
---
 api/pages/bio/newtimers.py | 356 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 356 insertions(+)

diff --git a/api/pages/bio/newtimers.py b/api/pages/bio/newtimers.py
new file mode 100644
index 0000000..41537f7
--- /dev/null
+++ b/api/pages/bio/newtimers.py
@@ -0,0 +1,356 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+########################################################################
+# OPENAPI-URI: /api/bio/newtimers
+########################################################################
+# get:
+#   responses:
+#     '200':
+#       content:
+#         application/json:
+#           schema:
+#             $ref: '#/components/schemas/Biography'
+#       description: 200 Response
+#     default:
+#       content:
+#         application/json:
+#           schema:
+#             $ref: '#/components/schemas/Error'
+#       description: unexpected error
+#   security:
+#   - cookieAuth: []
+#   summary: Shows some facts about a contributor
+# post:
+#   requestBody:
+#     content:
+#       application/json:
+#         schema:
+#           $ref: '#/components/schemas/defaultWidgetArgs'
+#   responses:
+#     '200':
+#       content:
+#         application/json:
+#           schema:
+#             $ref: '#/components/schemas/Biography'
+#       description: 200 Response
+#     default:
+#       content:
+#         application/json:
+#           schema:
+#             $ref: '#/components/schemas/Error'
+#       description: unexpected error
+#   security:
+#   - cookieAuth: []
+#   summary: Shows some facts about a contributor
+# 
+########################################################################
+
+
+
+
+
+"""
+This is the newtimers list renderer for Kibble
+"""
+
+import json
+import time
+import hashlib
+
+def find_earlier(session, query, when, who, which, where, doctype, dOrg):
+    """Find earlier document pertaining to this user. return True if found"""
+    if 'aggs' in query:
+        del query['aggs']
+        
+    rangeQuery = {'range':
+                    {
+                        which: {
+                            'from': 0,
+                            'to': time.time()
+                        }
+                    }
+                }
+    
+    query['query']['bool']['must'] = [
+        rangeQuery,
+        {
+            'term': {
+                'organisation': dOrg
+            }
+        },
+        {
+            'term': {
+                where: who
+            }
+            
+        }
+        ]
+    query['size'] = 1
+    query['sort'] = [{ which: 'asc' }]
+    
+    res = session.DB.ES.search(
+        index=session.DB.dbname,
+        doc_type=doctype,
+        body = query
+    )
+    if res['hits']['hits']:
+        doc = res['hits']['hits'][0]['_source']
+        if doc[which] >= when:
+            return [doc[which], doc]
+        else:
+            return [-1, None]
+    else:
+        return [-1, None]
+    
+
+def run(API, environ, indata, session):
+    
+    # We need to be logged in for this!
+    if not session.user:
+        raise API.exception(403, "You must be logged in to use this API endpoint! %s")
+    
+    now = time.time()
+    
+    # First, fetch the view if we have such a thing enabled
+    viewList = []
+    if indata.get('view'):
+        viewList = session.getView(indata.get('view'))
+    if indata.get('subfilter'):
+        viewList = session.subFilter(indata.get('subfilter'), view = viewList) 
+    
+    
+    dOrg = session.user['defaultOrganisation'] or "apache"
+    
+    
+    # Keep track of all contributors, and newcomers
+    contributors = []
+    newcomers = {}
+    
+    ####################################################################
+    # Start by grabbing all contributors this period via terms agg     #
+    ####################################################################
+    dateTo = indata.get('to', int(time.time()))
+    dateFrom = indata.get('from', dateTo - (86400*30*6)) # Default to a 6 month span
+    
+    
+    
+    
+    ############################
+    # CODE NEWTIMERS           #
+    ############################
+    rangeKey = 'ts'
+    rangeQuery = {'range':
+                    {
+                        rangeKey: {
+                            'from': dateFrom,
+                            'to': dateTo
+                        }
+                    }
+                }
+    
+    query = {
+                'query': {
+                    'bool': {
+                        'must': [
+                            rangeQuery,
+                            {
+                                'term': {
+                                    'organisation': dOrg
+                                }
+                            }
+                        ]
+                    }
+                }
+            }
+    
+    query['aggs'] = {
+        'by_committer': {
+            'terms': {
+                'field': 'committer_email',
+                'size': 500
+            }                
+        },
+        'by_author': {
+            'terms': {
+                'field': 'author_email',
+                'size': 500
+            }                
+        }
+    }
+    
+    # Source-specific or view-specific??
+    if indata.get('source'):
+        query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
+    elif viewList:
+        query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
+    
+    
+    res = session.DB.ES.search(
+            index=session.DB.dbname,
+            doc_type="code_commit",
+            body = query
+        )
+    
+    code_contributors = []
+    for bucket in res['aggregations']['by_committer']['buckets']:
+        email = bucket['key']
+        if email not in code_contributors:
+            code_contributors.append(email)
+    
+    for bucket in res['aggregations']['by_author']['buckets']:
+        email = bucket['key']
+        if email not in code_contributors:
+            code_contributors.append(email)
+    
+    # Now, for each contributor, find if they have done anything before
+    for email in code_contributors:
+        ea = find_earlier(session, query, dateFrom, email, 'ts', 'author_email', 'code_commit', dOrg)
+        ec = find_earlier(session, query, dateFrom, email, 'ts', 'committer_email', 'code_commit', dOrg)
+        if ea[0] != -1 and ec[0] != -1:
+            earliest = ea
+            if earliest[0] == -1 or (earliest[0] > ec[0] and ec[0] != -1):
+                earliest = ec
+            newcomers[email] = {
+                'code': earliest
+            }
+    
+    
+    
+    ############################
+    # ISSUE NEWTIMERS          #
+    ############################
+    rangeKey = 'created'
+    rangeQuery = {'range':
+                    {
+                        rangeKey: {
+                            'from': dateFrom,
+                            'to': dateTo
+                        }
+                    }
+                }
+    
+    query = {
+                'query': {
+                    'bool': {
+                        'must': [
+                            rangeQuery,
+                            {
+                                'term': {
+                                    'organisation': dOrg
+                                }
+                            }
+                        ]
+                    }
+                }
+            }
+    
+    query['aggs'] = {
+        'by_creator': {
+            'terms': {
+                'field': 'issueCreator',
+                'size': 500
+            }                
+        },
+        'by_closer': {
+            'terms': {
+                'field': 'issueCloser',
+                'size': 500
+            }                
+        }
+    }
+    
+    # Source-specific or view-specific??
+    if indata.get('source'):
+        query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
+    elif viewList:
+        query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
+    
+    
+    res = session.DB.ES.search(
+            index=session.DB.dbname,
+            doc_type="issue",
+            body = query
+        )
+    
+    issue_contributors = []
+    for bucket in res['aggregations']['by_creator']['buckets']:
+        email = bucket['key']
+        if email not in issue_contributors:
+            issue_contributors.append(email)
+    
+    for bucket in res['aggregations']['by_closer']['buckets']:
+        email = bucket['key']
+        if email not in issue_contributors:
+            issue_contributors.append(email)
+    
+    # Now, for each contributor, find if they have done anything before
+    for email in issue_contributors:
+        ecr = find_earlier(session, query, dateFrom, email, 'created', 'issueCreator', 'issue', dOrg)
+        ecl = find_earlier(session, query, dateFrom, email, 'closed', 'issueCloser', 'issue', dOrg)
+        if ecr[0] != -1 and ecl[0] != -1:
+            earliest = ecr
+            if earliest[0] == -1 or (earliest[0] > ecl[0] and ecl[0] != -1):
+                earliest = ecl
+            newcomers[email] = newcomers.get(email, {})
+            newcomers[email]['issue'] = earliest
+    
+    email_contributors = []
+    
+    ################################
+    # For each newtimer, get a bio #
+    ################################
+    
+    for email in newcomers:
+        pid = hashlib.sha1( ("%s%s" % (dOrg, email)).encode('ascii', errors='replace')).hexdigest()
+        person = {}
+        if session.DB.ES.exists(index=session.DB.dbname, doc_type="person", id = pid):
+            person = session.DB.ES.get(index=session.DB.dbname, doc_type="person", id = pid)['_source']
+        newcomers[email]['bio'] = person
+    
+    newcomers_code = []
+    newcomers_issues = []
+    newcomers_email = []
+    
+    # Count newcomers in each category (TODO: put this elsewhere earlier)
+    for email, entry in newcomers.items():
+        if 'code' in entry:
+            newcomers_code.append(email)
+        if 'issue' in entry:
+            newcomers_issues.append(email)
+        if 'email' in entry:
+            newcomers_email.append(email)
+    
+    JSON_OUT = {
+        'okay': True,
+        'stats': {
+            'code': {
+                'newcomers': newcomers_code,
+                'seen': len(code_contributors),
+            },
+            'issues': {
+                'newcomers': newcomers_issues,
+                'seen': len(issue_contributors),
+            },
+            'email': {
+                'newcomers': newcomers_email,
+                'seen': len(email_contributors),
+            }
+        },
+        'bios': newcomers,
+        'responseTime': time.time() - now
+    }
+    yield json.dumps(JSON_OUT, indent = 2)

-- 
To stop receiving notification emails like this one, please contact
"commits@kibble.apache.org" <co...@kibble.apache.org>.