def strip_comments()

in scripts/linkchecker.py [0:0]


def strip_comments(content):
    """Manual striping of comments from file content.

    Many localized content pages contain original English content in comments.
    These comments have to be stripped out before analyzing the links.
    Doing this using regular expression is difficult. Even the grep tool is
    not suitable for this use case.

    NOTE: We strived to preserve line numbers when producing the resulted
    text. This can be useful in future if we want to print out the line
    numbers for bad links.
    """
    result = []
    in_comment = False
    for line in content:
        idx1 = line.find("<!--")
        idx2 = line.find("-->")
        if not in_comment:
            # only care if new comment started
            if idx1 < 0:
                result.append(line)
                continue

            # single line comment
            if idx2 > 0:
                result.append(line[:idx1] + line[idx2+4:])
                continue
            result.append(line[:idx1])
            in_comment = True
            continue

        # already in comment block
        if idx2 < 0:  # ignore whole line
            result.append("")
            continue
        result.append(line[idx2+4:])
        in_comment = False

    return result