api/pages/bio/newtimers.py (223 lines of code) (raw):
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
########################################################################
# OPENAPI-URI: /api/bio/newtimers
########################################################################
# get:
# responses:
# '200':
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Biography'
# description: 200 Response
# default:
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Error'
# description: unexpected error
# security:
# - cookieAuth: []
# summary: Shows some facts about a contributor
# post:
# requestBody:
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/defaultWidgetArgs'
# responses:
# '200':
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Biography'
# description: 200 Response
# default:
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Error'
# description: unexpected error
# security:
# - cookieAuth: []
# summary: Shows some facts about a contributor
#
########################################################################
"""
This is the newtimers list renderer for Kibble
"""
import json
import time
import hashlib
def find_earlier(session, query, when, who, which, where, doctype, dOrg):
"""Find earlier document pertaining to this user. return True if found"""
if 'aggs' in query:
del query['aggs']
rangeQuery = {'range':
{
which: {
'from': 0,
'to': time.time()
}
}
}
query['query']['bool']['must'] = [
rangeQuery,
{
'term': {
'organisation': dOrg
}
},
{
'term': {
where: who
}
}
]
query['size'] = 1
query['sort'] = [{ which: 'asc' }]
res = session.DB.ES.search(
index=session.DB.dbname,
doc_type=doctype,
body = query
)
if res['hits']['hits']:
doc = res['hits']['hits'][0]['_source']
if doc[which] >= when:
return [doc[which], doc]
else:
return [-1, None]
else:
return [-1, None]
def run(API, environ, indata, session):
# We need to be logged in for this!
if not session.user:
raise API.exception(403, "You must be logged in to use this API endpoint! %s")
now = time.time()
# First, fetch the view if we have such a thing enabled
viewList = []
if indata.get('view'):
viewList = session.getView(indata.get('view'))
if indata.get('subfilter'):
viewList = session.subFilter(indata.get('subfilter'), view = viewList)
dOrg = session.user['defaultOrganisation'] or "apache"
# Keep track of all contributors, and newcomers
contributors = []
newcomers = {}
####################################################################
# Start by grabbing all contributors this period via terms agg #
####################################################################
dateTo = indata.get('to', int(time.time()))
dateFrom = indata.get('from', dateTo - (86400*30*6)) # Default to a 6 month span
############################
# CODE NEWTIMERS #
############################
rangeKey = 'ts'
rangeQuery = {'range':
{
rangeKey: {
'from': dateFrom,
'to': dateTo
}
}
}
query = {
'query': {
'bool': {
'must': [
rangeQuery,
{
'term': {
'organisation': dOrg
}
}
]
}
}
}
query['aggs'] = {
'by_committer': {
'terms': {
'field': 'committer_email',
'size': 500
}
},
'by_author': {
'terms': {
'field': 'author_email',
'size': 500
}
}
}
# Source-specific or view-specific??
if indata.get('source'):
query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
elif viewList:
query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
res = session.DB.ES.search(
index=session.DB.dbname,
doc_type="code_commit",
body = query
)
code_contributors = []
for bucket in res['aggregations']['by_committer']['buckets']:
email = bucket['key']
if email not in code_contributors:
code_contributors.append(email)
for bucket in res['aggregations']['by_author']['buckets']:
email = bucket['key']
if email not in code_contributors:
code_contributors.append(email)
# Now, for each contributor, find if they have done anything before
for email in code_contributors:
ea = find_earlier(session, query, dateFrom, email, 'ts', 'author_email', 'code_commit', dOrg)
ec = find_earlier(session, query, dateFrom, email, 'ts', 'committer_email', 'code_commit', dOrg)
if ea[0] != -1 and ec[0] != -1:
earliest = ea
if earliest[0] == -1 or (earliest[0] > ec[0] and ec[0] != -1):
earliest = ec
newcomers[email] = {
'code': earliest
}
############################
# ISSUE NEWTIMERS #
############################
rangeKey = 'created'
rangeQuery = {'range':
{
rangeKey: {
'from': dateFrom,
'to': dateTo
}
}
}
query = {
'query': {
'bool': {
'must': [
rangeQuery,
{
'term': {
'organisation': dOrg
}
}
]
}
}
}
query['aggs'] = {
'by_creator': {
'terms': {
'field': 'issueCreator',
'size': 500
}
},
'by_closer': {
'terms': {
'field': 'issueCloser',
'size': 500
}
}
}
# Source-specific or view-specific??
if indata.get('source'):
query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
elif viewList:
query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
res = session.DB.ES.search(
index=session.DB.dbname,
doc_type="issue",
body = query
)
issue_contributors = []
for bucket in res['aggregations']['by_creator']['buckets']:
email = bucket['key']
if email not in issue_contributors:
issue_contributors.append(email)
for bucket in res['aggregations']['by_closer']['buckets']:
email = bucket['key']
if email not in issue_contributors:
issue_contributors.append(email)
# Now, for each contributor, find if they have done anything before
for email in issue_contributors:
ecr = find_earlier(session, query, dateFrom, email, 'created', 'issueCreator', 'issue', dOrg)
ecl = find_earlier(session, query, dateFrom, email, 'closed', 'issueCloser', 'issue', dOrg)
if ecr[0] != -1 and ecl[0] != -1:
earliest = ecr
if earliest[0] == -1 or (earliest[0] > ecl[0] and ecl[0] != -1):
earliest = ecl
newcomers[email] = newcomers.get(email, {})
newcomers[email]['issue'] = earliest
email_contributors = []
################################
# For each newtimer, get a bio #
################################
for email in newcomers:
pid = hashlib.sha1( ("%s%s" % (dOrg, email)).encode('ascii', errors='replace')).hexdigest()
person = {}
if session.DB.ES.exists(index=session.DB.dbname, doc_type="person", id = pid):
person = session.DB.ES.get(index=session.DB.dbname, doc_type="person", id = pid)['_source']
person['md5'] = hashlib.md5(person['email'].encode('utf-8')).hexdigest() # gravatar needed for UI!
newcomers[email]['bio'] = person
newcomers_code = []
newcomers_issues = []
newcomers_email = []
# Count newcomers in each category (TODO: put this elsewhere earlier)
for email, entry in newcomers.items():
if 'code' in entry:
newcomers_code.append(email)
if 'issue' in entry:
newcomers_issues.append(email)
if 'email' in entry:
newcomers_email.append(email)
JSON_OUT = {
'okay': True,
'stats': {
'code': {
'newcomers': newcomers_code,
'seen': len(code_contributors),
},
'issues': {
'newcomers': newcomers_issues,
'seen': len(issue_contributors),
},
'email': {
'newcomers': newcomers_email,
'seen': len(email_contributors),
}
},
'bios': newcomers,
'responseTime': time.time() - now
}
yield json.dumps(JSON_OUT, indent = 2)