in link-verifier/verify-links.py [0:0]
def test_url(url):
"""Tests a single url"""
global use_gh_cache
global main_repo_list
global link_cache
status = ''
is_broken = False
# Test if link was already tested before.
if url in link_cache:
return link_cache[url]
# Test if link was cached in pre-fetched GitHub issues. If not, send a request for the link.
if use_gh_cache:
pr_match = re.search(PULL_REQUEST_SEARCH, url)
issue_match = re.search(ISSUE_SEARCH, url)
if pr_match is not None:
repo_key = f'{pr_match.group(1)}/{pr_match.group(2)}'.lower()
if repo_key in main_repo_list and PR_KEY in main_repo_list[repo_key]:
if int(pr_match.group(3)) in main_repo_list[repo_key][PR_KEY]:
status = 'Good'
elif issue_match is not None:
repo_key = f'{issue_match.group(1)}/{issue_match.group(2)}'.lower()
if repo_key in main_repo_list and ISSUE_KEY in main_repo_list[repo_key]:
if int(issue_match.group(3)) in main_repo_list[repo_key][ISSUE_KEY]:
status = 'Good'
if status != 'Good':
try:
r = requests.head(url, allow_redirects=True)
# Some sites may return 404 for head but not get, e.g.
# https://tls.mbed.org/kb/development/thread-safety-and-multi-threading
if r.status_code >= 400:
# Allow redirects is already enabled by default for GET.
r = requests.get(url)
# It's likely we will run into GitHub's rate-limiting if there are many links.
if r.status_code == 429:
time.sleep(int(r.headers['Retry-After']))
r = requests.head(url, allow_redirects=True)
if r.status_code >= 400:
is_broken = True
status = r.status_code
# requests.exceptions.ConnectionError if URL does not exist, but we capture
# all possible exceptions from trying the link to be safe.
except Exception as e:
print(str(e))
is_broken = True
status = 'Error'
# Add result to cache so it won't be tested again.
link_cache[url] = (is_broken, status)
return is_broken, status