in scripts/validate-urls.py [0:0]
def main():
args = parser.parse_args()
# find all md files under INPUT_DIR.
files = []
for (dirpath, dirname, filenames) in os.walk(args.input_dir):
for f in filenames:
if f.endswith(".md"):
files.append(os.path.join(dirpath, f))
print(f'Found {len(files)} MD files')
urls = {}
for file in files:
with open(file, "r") as f:
u = HTTP_PATTERN.findall(f.read())
if u:
urls[file[len(args.input_dir):]] = u
print(f'Found {len(urls)} URLS')
problematic_urls = []
for file, urls in urls.items():
for url in urls:
if should_skip(url):
print(f"skipping {url} ")
continue
print(f"{file}: URL {url}",end='')
try:
r = requests.head(url)
print(f" , Status {r.status_code}")
if r.status_code >= 400 and r.status_code < 500:
problematic_urls.append((file, url, r.status_code))
except Exception as e:
print(e)
problematic_urls.append((file, url, "FAIL"))
print("\nSummary:\n")
for u in problematic_urls:
print(f"|{u[0]} | {u[1]} | {u[2]}|")