scripts/cronjobs/countaccounts.py (57 lines of code) (raw):

""" Extracts data showing the number of accounts created each month. Reads: https://whimsy.apache.org/public/public_ldap_people.json Updates: ../../site/json/foundation/accounts-evolution.json The JSON data has the following format: { "lastCreateTimestamp": "20190309011146Z", "people": { ... "hboutemy": { "name": "Herve Boutemy", "createTimestamp": "20071016014212Z", ... Whimsy applies corrections to the createTimestamp where known For example the above LDAP record shows: dn: uid=hboutemy,ou=people,dc=apache,dc=org createTimestamp: 20090519192255Z The output file consists of lines of the form: { "1999-02": 22, "1999-03": 0, } N.B. The script updates the entry for the current month. However on day one of a new month, it also updates the previous month This is so it sees accounts that were created after the final run on the last day of the month """ import sys # The output from json.dumps() has a trailing space in Python2, it is absent in Python3 # So ensure we always use the same python version as the crontab if sys.hexversion < 0x030000F0: raise RuntimeError("This script requires Python3") import json from datetime import datetime, timedelta from urlutils import UrlCache uc = UrlCache(interval=0) def loadJson(url): print("Reading " +url) resp = uc.get(url, name=None, encoding='utf-8', errors=None) try: content = resp.read() # json.load() does this anyway try: j = json.loads(content) except Exception as e: # The Proxy error response is around 4800 bytes print("Error parsing response:\n%s" % content[0:4800]) raise e finally: resp.close() return j js = {} with open("../../site/json/foundation/accounts-evolution.json") as f: js = json.loads(f.read()) f.close() now = datetime.now() # fetch time once if len(sys.argv) > 3: now = datetime(year=int(sys.argv[1]),month=int(sys.argv[2]), day=int(sys.argv[3])) print("Overriding current time: %s" % now) currentMonth = now.month currentYear = now.year ym = "%04u-%02u" % (currentYear, currentMonth) tym = "%04u%02u" % (currentYear, currentMonth) print("Looking for entries for %s" % tym) js[ym] = 0 # Potentially check for the previous month as well ym1 = None tym1 = None if now.day == 1: # Day one of month, redo previous month to ensure all new entries are seen yesterday = now - timedelta(days = 1) ym1 = "%04u-%02u" % (yesterday.year, yesterday.month) tym1 = "%04u%02u" % (yesterday.year, yesterday.month) print("Also looking for entries for %s" % tym1) js[ym1] = 0 ldappeople = loadJson('https://whimsy.apache.org/public/public_ldap_people.json')['people'] for p in ldappeople: stamp = ldappeople[p]['createTimestamp'] if stamp.startswith(tym): js[ym] += 1 else: if not tym1 == None: if stamp.startswith(tym1): js[ym1] += 1 with open("../../site/json/foundation/accounts-evolution.json", "w") as f: json.dump(js, f, sort_keys=True, indent=0) f.close() print("Done, found %u entries for this month" % js[ym]) if not tym1 == None: print("Also found %u entries for the previous month (%s)" % (js[ym1], tym1))