in api/pages/issue/retention.py [0:0]
def run(API, environ, indata, session):
# We need to be logged in for this!
if not session.user:
raise API.exception(403, "You must be logged in to use this API endpoint! %s")
now = time.time()
# First, fetch the view if we have such a thing enabled
viewList = []
if indata.get('view'):
viewList = session.getView(indata.get('view'))
if indata.get('subfilter'):
viewList = session.subFilter(indata.get('subfilter'), view = viewList)
hl = indata.get('span', 12) # By default, we define a contributor as active if having committer in the past year
tnow = datetime.date.today()
nm = tnow.month - (tnow.month % 3)
ny = tnow.year
cy = ny
ts = []
if nm < 1:
nm += 12
ny = ny - 1
peopleSeen = {}
activePeople = {}
allPeople = {}
FoundSomething = False
ny = 1970
while ny < cy or (ny == cy and (nm+3) <= tnow.month):
d = datetime.date(ny, nm, 1)
t = time.mktime(d.timetuple())
nm += 3
if nm > 12:
nm -= 12
ny = ny + 1
if ny == cy and nm > tnow.month:
break
d = datetime.date(ny, nm, 1)
tf = time.mktime(d.timetuple())
####################################################################
####################################################################
dOrg = session.user['defaultOrganisation'] or "apache"
query = {
'query': {
'bool': {
'must': [
{'range':
{
'closed': {
'from': t,
'to': tf
}
}
},
{
'term': {
'organisation': dOrg
}
}
]
}
}
}
# Source-specific or view-specific??
if indata.get('source'):
query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
elif viewList:
query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
# Get an initial count of commits
res = session.DB.ES.count(
index=session.DB.dbname,
doc_type="issue",
body = query
)
globcount = res['count']
if globcount == 0 and FoundSomething == False:
continue
FoundSomething = True
# Get top 1000 committers this period
query['aggs'] = {
'by_o': {
'terms': {
'field': 'issueCloser',
'size': 50000
}
},
'by_c': {
'terms': {
'field': 'issueCreator',
'size': 50000
}
}
}
res = session.DB.ES.search(
index=session.DB.dbname,
doc_type="issue",
size = 0,
body = query
)
retained = 0
added = 0
lost = 0
thisPeriod = []
for bucket in res['aggregations']['by_o']['buckets']:
who = bucket['key']
thisPeriod.append(who)
if who not in peopleSeen:
peopleSeen[who] = tf
added += 1
activePeople[who] = tf
if who not in allPeople:
allPeople[who] = tf
for bucket in res['aggregations']['by_c']['buckets']:
who = bucket['key']
thisPeriod.append(who)
if who not in peopleSeen:
peopleSeen[who] = tf
added += 1
if who not in activePeople:
activePeople[who] = tf
if who not in allPeople:
allPeople[who] = tf
prune = []
for k, v in activePeople.items():
if v < (t - (hl*30.45*86400)):
prune.append(k)
lost += 1
for who in prune:
del activePeople[who]
del peopleSeen[who]
retained = len(activePeople) - added
ts.append({
'date': tf,
'People who (re)joined': added,
'People who quit': lost,
'People retained': retained,
'Active people': added + retained
})
groups = [
['More than 5 years', (5*365*86400)+1],
['2 - 5 years', (2*365*86400)+1],
['1 - 2 years', (365*86400)],
['Less than a year', 1]
]
counts = {}
totExp = 0
for person, age in activePeople.items():
totExp += time.time() - allPeople[person]
for el in sorted(groups, key = lambda x: x[1], reverse = True):
if allPeople[person] <= time.time() - el[1]:
counts[el[0]] = counts.get(el[0], 0) + 1
break
avgyr = (totExp / (86400*365)) / max(len(activePeople),1)
ts = sorted(ts, key = lambda x: x['date'])
avgm = ""
yr = int(avgyr)
ym = round((avgyr-yr)*12)
if yr >= 1:
avgm += "%u year%s" % (yr, "s" if yr != 1 else "")
if ym > 0:
avgm += "%s%u month%s" % (", " if yr > 0 else "", ym, "s" if ym != 1 else "")
JSON_OUT = {
'text': "This shows Contributor retention as calculated over a %u month timespan. The average experience of currently active people is %s." % (hl, avgm),
'timeseries': ts,
'counts': counts,
'averageYears': avgyr,
'okay': True,
'responseTime': time.time() - now,
}
yield json.dumps(JSON_OUT)