scripts/project2attic.py

#!/usr/bin/env python3 """ Updates a project to the attic: - creates an override DOAP under data/projects-override - updates projects.xml to point to the override DOAP - moves data/committees/<pmc>.rdf to data/committees-retired/<pmc>.rdf - updates data/committees.xml Does not commit the changes. N.B. The current implementation does not handle PMCs with multiple projects. Updates: data/projects.xml data/committees.xml Creates: data/projects-override/<pmc>[_project].rdf Moves: data/committees/<pmc>.rdf to data/committees-retired/<pmc>.rdf """ import sys from os.path import dirname, abspath, join from inspect import getsourcefile import os import re import subprocess if len(sys.argv) == 1: print("Please provide a list of project ids") sys.exit(1) OVERRIDE_DIR='https://svn.apache.org/repos/asf/comdev/projects.apache.org/trunk/data/projects-override' MYHOME = dirname(abspath(getsourcefile(lambda:0))) datadir = join(dirname(MYHOME),'data') def update_pmc_xml(pmc): xmlfile = join(datadir,'committees.xml') xmlfilet = join(datadir,'committees.xml.t') print("Updating committees.xml") found = 0 with open(xmlfile,'r') as r, open(xmlfilet,'w') as w: for l in r: m = re.search("^(\s+)<location>(.+)</location",l) if m: indent = m.group(1) url = m.group(2) # match one of: # committees/<pmc>.rdf # https://ofbiz.apache.org/<pmc>/... # http://svn.apache.org/repos/asf/<pmc>/... regex = "^(committees/%s\.rdf|https?://%s\.apache\.org/.+|https?://svn.apache.org/repos/asf/%s/.+)$" % (pmc,pmc,pmc) if re.search(regex, url, flags=re.IGNORECASE): print("Found %s at %s" % (pmc, url)) if url.startswith('committees/'): new = url.replace('committees/','committees-retired/') subprocess.run(["svn", "mv", "data/%s" % url, "data/%s" % new]) url = new w.write("%s\n" % (indent, url)) found += 1 continue w.write(l) # write the original line if found == 1: print("Found OK") os.system("diff %s %s" % (xmlfile, xmlfilet)) os.rename(xmlfilet, xmlfile) elif found > 1: print("Matched more than once %d" % found) os.remove(xmlfilet) else: print("Not found") os.remove(xmlfilet) # update the DOAP to be an attic project def update_doap(doap, source): infile = doap tmpfile = doap + '.t' catWrite = True with open(infile,'r') as r, open(tmpfile,'w') as w: for l in r: if re.search("<rdf:RDF",l): w.write("\n" % source) if re.search("<asfext:pmc rdf:resource=", l): w.write(re.sub("=['\"].+?['\"]",'="http://attic.apache.org/"',l)) continue # don't write original line if catWrite and re.search("<category",l): catWrite = False w.write(' <category rdf:resource="http://projects.apache.org/category/retired" />\n') w.write(l) # write the original line os.system("diff %s*" % (infile)) os.rename(tmpfile,infile) def update_project_xml(pid): xmlfile = join(datadir,'projects.xml') xmlfilet = join(datadir,'projects.xml.t') doapfile = join(datadir,'projects-override',"%s.rdf" % pid) print("Updating projects.xml") found = 0 source = None # match one of: # http://svn.apache.org/repos/asf/tomee/tomee/trunk/doap_tomee.rdf # https://gitbox.apache.org/repos/asf?p=trafficserver.git; # https://gitbox.apache.org/repos/asf?p=any23-committers.git;a=blob_plain;f=doap_Any23.rdf;hb=HEAD # https://raw.githubusercontent.com/apache/oodt/master/doap_oodt.rdf # https://raw.githubusercontent.com/apache/directory-site/master/static/doap_fortress.rdf # http://zookeeper.apache.org/doap.rdf # https://daffodil.incubator.apache.org/doap.rdf # Parse each line to extract the project name. repore = '([-a-z0-9]+)' # repo regex lines2match = [ f"^https?://svn\.apache\.org/repos/asf/{repore}/", f"^https?://gitbox\.apache\.org/repos/asf\?p={repore}\.git", f"^https?://raw\.githubusercontent\.com/apache/{repore}/", f"^https?://{repore}(?:\.incubator)?\.apache\.org/", ] # 1 2 3 4 with open(xmlfile,'r') as r, open(xmlfilet,'w') as w: for l in r: m = re.search("^(\s+)<location>(.+)<",l) if m: indent = m.group(1) url = m.group(2) if '/data/projects-override/' in url: # already handled w.write(l) # write the original line continue m2 = None for line in lines2match: m2 = re.search(line, url, flags=re.IGNORECASE) if m2: break if m2: reponame = m2.group(1) if reponame.startswith('empire-db'): reponame = 'empire-db' else: reponame = reponame.split('-')[0] if reponame == pid: print("Found %s at %s" % (pid,url)) l = l.replace('<location>','') w.write(l) # write the original line w.write("%s<location>%s/%s.rdf</location>\n" % (indent, OVERRIDE_DIR, pid)) found += 1 source = url continue else: print("??? cannot parse: "+url) w.write(l) # write the original line if found == 0: print("Could not find a match for %s" % pid) os.remove(xmlfilet) else: if "//svn.apache.org/" in source: os.system("svn cp %s %s" % (source.replace('http://','https://'), doapfile)) else: os.system("wget -O %s '%s'" % (doapfile,source)) os.system("svn add %s" % (doapfile)) update_doap(doapfile, source) os.system("diff %s %s" % (xmlfile,xmlfilet)) os.rename(xmlfilet,xmlfile) os.system("svn status %s" % dirname(doapfile)) for arg in sys.argv[1:]: print("Processing "+arg) if not re.search("^[-a-z0-9]+$",arg): print("Expecting lower-case, '-' or digits only") else: update_pmc_xml(arg) update_project_xml(arg)

scripts/project2attic.py (126 lines of code) (raw):