def main()

in scripts/cronjobs/parsereleases.py [0:0]


def main():
    uc = UrlCache(silent=True)
    find_ls = uc.get(FIND_LS, name='find-ls2.txt.gz')
    #  -rw-rw-r--       1 svnwc svnwc           479 2022-06-17 12:55 UTC ./.htaccess
    #    0              1   2     3               4       5       6   7    8 
    with gzip.open(find_ls, mode='rt') as r:
        for l in r:
            fields = l.split() # split the find line (the split drops the final LF)
            if not fields[0].startswith('-'): # only want plain files
                continue
            path = fields[8][2:] # last entry on line is the path; also drop the ./ prefix
            segs = path.split('/')
            if len(segs) == 1: # ignore top level files
                continue
            file = segs.pop() # basename
            # Ignore invisible files
            if file.startswith('.') or file in ['favicon.ico', 'META']:
                continue
            committeeId = segs[0]
            if any( seg in SKIP_DIRS + CTTEE_FILTERS.get(committeeId,{}).get('DIRS',[])  for seg in segs):
                # print('SKIP', segs)
                continue
            if committeeId in ['zzz']:
                continue
            if committeeId == 'incubator':
                podling = segs[1]
                committeeId = f'incubator-{podling}'
            # Now store the info
            stamp = fields[5]
            parseFile(committeeId, file, stamp, path)