in check_all_deadlink.py [0:0]
def process_md_file(file_path):
link_pattern = re.compile(r"\[.*?\]\((.*?)\)")
code_block_pattern = re.compile(r"^```.*$")
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
lines = content.splitlines()
in_code_block = False
for line_number, line in enumerate(lines, start=1):
# Skip codeblocks
if code_block_pattern.match(line):
in_code_block = not in_code_block
continue
if in_code_block:
continue
links = link_pattern.findall(line)
for link in links:
# Skip urls
if (
not urlparse(link).scheme
and not os.path.isabs(link)
and not (link[0] == "#")
):
full_path = os.path.normpath(
os.path.join(os.path.dirname(file_path), link)
)
# Skip section headers
if "#" in full_path:
full_path = full_path.split("#", 1)[0]
if not full_path.endswith(".md") and not full_path.endswith(".mdx"):
full_path += ".md"
md_exists = os.path.exists(full_path)
mdx_exists = (
os.path.exists(full_path[:-3] + ".mdx")
if full_path.endswith(".md")
else False
)
if not md_exists and not mdx_exists:
print(
f"Error: File not found for link '{link}' in file '{file_path}:{line_number}'"
)