in api/pages/bio/newtimers.py [0:0]
def run(API, environ, indata, session):
# We need to be logged in for this!
if not session.user:
raise API.exception(403, "You must be logged in to use this API endpoint! %s")
now = time.time()
# First, fetch the view if we have such a thing enabled
viewList = []
if indata.get('view'):
viewList = session.getView(indata.get('view'))
if indata.get('subfilter'):
viewList = session.subFilter(indata.get('subfilter'), view = viewList)
dOrg = session.user['defaultOrganisation'] or "apache"
# Keep track of all contributors, and newcomers
contributors = []
newcomers = {}
####################################################################
# Start by grabbing all contributors this period via terms agg #
####################################################################
dateTo = indata.get('to', int(time.time()))
dateFrom = indata.get('from', dateTo - (86400*30*6)) # Default to a 6 month span
############################
# CODE NEWTIMERS #
############################
rangeKey = 'ts'
rangeQuery = {'range':
{
rangeKey: {
'from': dateFrom,
'to': dateTo
}
}
}
query = {
'query': {
'bool': {
'must': [
rangeQuery,
{
'term': {
'organisation': dOrg
}
}
]
}
}
}
query['aggs'] = {
'by_committer': {
'terms': {
'field': 'committer_email',
'size': 500
}
},
'by_author': {
'terms': {
'field': 'author_email',
'size': 500
}
}
}
# Source-specific or view-specific??
if indata.get('source'):
query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
elif viewList:
query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
res = session.DB.ES.search(
index=session.DB.dbname,
doc_type="code_commit",
body = query
)
code_contributors = []
for bucket in res['aggregations']['by_committer']['buckets']:
email = bucket['key']
if email not in code_contributors:
code_contributors.append(email)
for bucket in res['aggregations']['by_author']['buckets']:
email = bucket['key']
if email not in code_contributors:
code_contributors.append(email)
# Now, for each contributor, find if they have done anything before
for email in code_contributors:
ea = find_earlier(session, query, dateFrom, email, 'ts', 'author_email', 'code_commit', dOrg)
ec = find_earlier(session, query, dateFrom, email, 'ts', 'committer_email', 'code_commit', dOrg)
if ea[0] != -1 and ec[0] != -1:
earliest = ea
if earliest[0] == -1 or (earliest[0] > ec[0] and ec[0] != -1):
earliest = ec
newcomers[email] = {
'code': earliest
}
############################
# ISSUE NEWTIMERS #
############################
rangeKey = 'created'
rangeQuery = {'range':
{
rangeKey: {
'from': dateFrom,
'to': dateTo
}
}
}
query = {
'query': {
'bool': {
'must': [
rangeQuery,
{
'term': {
'organisation': dOrg
}
}
]
}
}
}
query['aggs'] = {
'by_creator': {
'terms': {
'field': 'issueCreator',
'size': 500
}
},
'by_closer': {
'terms': {
'field': 'issueCloser',
'size': 500
}
}
}
# Source-specific or view-specific??
if indata.get('source'):
query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
elif viewList:
query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
res = session.DB.ES.search(
index=session.DB.dbname,
doc_type="issue",
body = query
)
issue_contributors = []
for bucket in res['aggregations']['by_creator']['buckets']:
email = bucket['key']
if email not in issue_contributors:
issue_contributors.append(email)
for bucket in res['aggregations']['by_closer']['buckets']:
email = bucket['key']
if email not in issue_contributors:
issue_contributors.append(email)
# Now, for each contributor, find if they have done anything before
for email in issue_contributors:
ecr = find_earlier(session, query, dateFrom, email, 'created', 'issueCreator', 'issue', dOrg)
ecl = find_earlier(session, query, dateFrom, email, 'closed', 'issueCloser', 'issue', dOrg)
if ecr[0] != -1 and ecl[0] != -1:
earliest = ecr
if earliest[0] == -1 or (earliest[0] > ecl[0] and ecl[0] != -1):
earliest = ecl
newcomers[email] = newcomers.get(email, {})
newcomers[email]['issue'] = earliest
email_contributors = []
################################
# For each newtimer, get a bio #
################################
for email in newcomers:
pid = hashlib.sha1( ("%s%s" % (dOrg, email)).encode('ascii', errors='replace')).hexdigest()
person = {}
if session.DB.ES.exists(index=session.DB.dbname, doc_type="person", id = pid):
person = session.DB.ES.get(index=session.DB.dbname, doc_type="person", id = pid)['_source']
person['md5'] = hashlib.md5(person['email'].encode('utf-8')).hexdigest() # gravatar needed for UI!
newcomers[email]['bio'] = person
newcomers_code = []
newcomers_issues = []
newcomers_email = []
# Count newcomers in each category (TODO: put this elsewhere earlier)
for email, entry in newcomers.items():
if 'code' in entry:
newcomers_code.append(email)
if 'issue' in entry:
newcomers_issues.append(email)
if 'email' in entry:
newcomers_email.append(email)
JSON_OUT = {
'okay': True,
'stats': {
'code': {
'newcomers': newcomers_code,
'seen': len(code_contributors),
},
'issues': {
'newcomers': newcomers_issues,
'seen': len(issue_contributors),
},
'email': {
'newcomers': newcomers_email,
'seen': len(email_contributors),
}
},
'bios': newcomers,
'responseTime': time.time() - now
}
yield json.dumps(JSON_OUT, indent = 2)