in scripts/linkchecker.py [0:0]
def strip_comments(content):
"""Manual striping of comments from file content.
Many localized content pages contain original English content in comments.
These comments have to be stripped out before analyzing the links.
Doing this using regular expression is difficult. Even the grep tool is
not suitable for this use case.
NOTE: We strived to preserve line numbers when producing the resulted
text. This can be useful in future if we want to print out the line
numbers for bad links.
"""
result = []
in_comment = False
for line in content:
idx1 = line.find("<!--")
idx2 = line.find("-->")
if not in_comment:
# only care if new comment started
if idx1 < 0:
result.append(line)
continue
# single line comment
if idx2 > 0:
result.append(line[:idx1] + line[idx2+4:])
continue
result.append(line[:idx1])
in_comment = True
continue
# already in comment block
if idx2 < 0: # ignore whole line
result.append("")
continue
result.append(line[idx2+4:])
in_comment = False
return result