in tools/extract_data_categories.py [0:0]
def parse_yaml(yamlpath: str):
with open(yamlpath) as fd:
content = yaml.safe_load(fd)
with open(yamlpath) as fd:
lines = list(fd.readlines())
for category_name, category in content.items():
if category_name.startswith("$") or category_name == "no_lint":
continue
for metric_name, metric in category.items():
categories = set()
for data_review_url in metric["data_reviews"]:
html = fetch_url(data_review_url)
categories.update(parse_data_review(html))
lines = update_lines(
lines, category_name, metric_name, categories_as_strings(categories)
)
with open(yamlpath, "w") as fd:
for line in lines:
fd.write(line)