def parse_data_review()

in tools/extract_data_categories.py [0:0]


def parse_data_review(html: str) -> Set[int]:
    """
    Parse a single data review.
    """
    soup = BeautifulSoup(html, features="html.parser")
    text = soup.get_text()
    lines = iter(text.splitlines())
    for line in lines:
        if QUESTION in line.strip():
            break

    categories: Set[int] = set()
    for line in lines:
        if "?" in line:
            break
        categories.update(int(x) for x in re.findall("[0-9]+", line))

    return categories