scripts/check-coordinates-with-doap.py (54 lines of code) (raw):
#!/usr/bin/env python3
# Cross-reference the 'project-coordinates.json' with DOAP
import os
import json
import pprint
from rdflib import Graph
from rdflib.namespace import RDF, DOAP
from slugify import slugify
from urllib.request import urlopen
import xml.etree.ElementTree as ET
def fetch_doap(url):
filename = 'cache/doap/%s.xml' % (slugify(url))
if not os.path.exists(filename):
os.makedirs('cache/doap', exist_ok = True)
f = urlopen(url)
with open(filename, 'w') as d:
d.write(f.read().decode('utf-8'))
with open(filename, 'r') as d:
f = Graph()
try:
f.parse(data=d.read(), format='xml')
except Exception:
print(f"Error parsing {url} ({filename}))")
raise
return f
# manually maintained
with open('project-coordinates.json', 'r') as p:
project_coordinates = json.load(p)
from rdflib.namespace import DefinedNamespace, Namespace
from rdflib.term import URIRef
class ASFEXT(DefinedNamespace):
pmc: URIRef
_NS = Namespace("http://projects.apache.org/ns/asfext#")
# https://github.com/RDFLib/rdflib/issues/2811
class MYDOAP(DefinedNamespace):
_extras = [
"security-contact",
"security-policy",
]
_NS = Namespace("http://usefulinc.com/ns/doap#")
for location in ET.parse('projects.xml').getroot():
if (# https://github.com/apache/kafka/pull/16472
location.text == "https://gitbox.apache.org/repos/asf?p=kafka.git;a=blob_plain;f=doap_Kafka.rdf;hb=HEAD" or
# https://github.com/apache/olingo-site/pull/6
location.text == "http://olingo.apache.org/doap_Olingo.rdf" or
# https://github.com/apache/orc/pull/1964
location.text == "http://orc.apache.org/doap_orc.rdf" or
# https://github.com/apache/kibble-website/pull/15
location.text == "https://kibble.apache.org/doap.rdf" or
# https://github.com/apache/hudi/pull/11533
location.text == "https://gitbox.apache.org/repos/asf?p=hudi.git;a=blob_plain;f=doap_HUDI.rdf;hb=HEAD" or
# https://issues.apache.org/jira/browse/GORA-716
location.text == "http://svn.apache.org/repos/asf/gora/cms_site/trunk/content/current/doap_Gora.rdf" or
# https://github.com/apache/gobblin-site/pull/2
location.text == "https://gobblin.apache.org/doap_Gobblin.rdf"):
continue
d = fetch_doap(location.text)
#pprint.pprint(d)
for project in d.subjects(RDF.type, DOAP.Project):
for contact in d.objects(project, MYDOAP['security-contact']):
pprint.pprint(project)
pprint.pprint(contact)
for policy in d.objects(project, MYDOAP['security-policy']):
pprint.pprint(project)
pprint.pprint(policy)