scripts/cronjobs/generaterepos.py (36 lines of code) (raw):

#!/usr/bin/python3 """ Reads: https://svn.apache.org/repos/asf/ https://gitbox.apache.org/repositories.json Updates: ../../site/json/foundation/repositories.json """ import json import requests from html.parser import HTMLParser repos = {} class SVNRepoParser(HTMLParser): handleProjectData = False def handle_starttag(self, tag, attrs): if tag == 'li': self.handleProjectData = True def handle_endtag(self, tag): self.handleProjectData = False def handle_data(self, data): if self.handleProjectData: committee = data.rstrip('/') repos[committee + '-svn'] = 'https://svn.apache.org/repos/asf/' + committee + '/' # Parse svn repos try: svnResponse = requests.get("https://svn.apache.org/repos/asf/") svnResponse.raise_for_status() parser = SVNRepoParser() parser.feed(svnResponse.content.decode("utf-8")) except requests.exceptions.RequestException as e: # This is the correct syntax print("ERROR: Unable to retrieve svn repos: %s", e) # Parse git repos try: gitResponse = requests.get("https://gitbox.apache.org/repositories.json") gitResponse.raise_for_status() gitData = json.loads(gitResponse.content.decode("utf-8")) for committee in gitData['projects']: for repo in gitData['projects'][committee]['repositories']: repos[repo] = 'https://gitbox.apache.org/repos/asf/' + repo + '.git' except requests.exceptions.RequestException as e: # This is the correct syntax print("ERROR: Unable to retrieve git repos: %s", e) print("Writing json/foundation/repositories.json...") with open("../../site/json/foundation/repositories.json", "w") as f: json.dump(repos, f, sort_keys=True, indent=0) f.close() print("All done!")