scripts/check-coordinates-with-doap.py (54 lines of code) (raw):

#!/usr/bin/env python3 # Cross-reference the 'project-coordinates.json' with DOAP import os import json import pprint from rdflib import Graph from rdflib.namespace import RDF, DOAP from slugify import slugify from urllib.request import urlopen import xml.etree.ElementTree as ET def fetch_doap(url): filename = 'cache/doap/%s.xml' % (slugify(url)) if not os.path.exists(filename): os.makedirs('cache/doap', exist_ok = True) f = urlopen(url) with open(filename, 'w') as d: d.write(f.read().decode('utf-8')) with open(filename, 'r') as d: f = Graph() try: f.parse(data=d.read(), format='xml') except Exception: print(f"Error parsing {url} ({filename}))") raise return f # manually maintained with open('project-coordinates.json', 'r') as p: project_coordinates = json.load(p) from rdflib.namespace import DefinedNamespace, Namespace from rdflib.term import URIRef class ASFEXT(DefinedNamespace): pmc: URIRef _NS = Namespace("http://projects.apache.org/ns/asfext#") # https://github.com/RDFLib/rdflib/issues/2811 class MYDOAP(DefinedNamespace): _extras = [ "security-contact", "security-policy", ] _NS = Namespace("http://usefulinc.com/ns/doap#") for location in ET.parse('projects.xml').getroot(): if (# https://github.com/apache/kafka/pull/16472 location.text == "https://gitbox.apache.org/repos/asf?p=kafka.git;a=blob_plain;f=doap_Kafka.rdf;hb=HEAD" or # https://github.com/apache/olingo-site/pull/6 location.text == "http://olingo.apache.org/doap_Olingo.rdf" or # https://github.com/apache/orc/pull/1964 location.text == "http://orc.apache.org/doap_orc.rdf" or # https://github.com/apache/kibble-website/pull/15 location.text == "https://kibble.apache.org/doap.rdf" or # https://github.com/apache/hudi/pull/11533 location.text == "https://gitbox.apache.org/repos/asf?p=hudi.git;a=blob_plain;f=doap_HUDI.rdf;hb=HEAD" or # https://issues.apache.org/jira/browse/GORA-716 location.text == "http://svn.apache.org/repos/asf/gora/cms_site/trunk/content/current/doap_Gora.rdf" or # https://github.com/apache/gobblin-site/pull/2 location.text == "https://gobblin.apache.org/doap_Gobblin.rdf"): continue d = fetch_doap(location.text) #pprint.pprint(d) for project in d.subjects(RDF.type, DOAP.Project): for contact in d.objects(project, MYDOAP['security-contact']): pprint.pprint(project) pprint.pprint(contact) for policy in d.objects(project, MYDOAP['security-policy']): pprint.pprint(project) pprint.pprint(policy)