def __init__()

in link-verifier/verify-links.py [0:0]


    def __init__(self, html_file_name):
        """Parse html in file and extract links and ids"""

        self.ids = []
        self.internal_links = []
        self.external_links = []
        self.name = html_file_name
        self.abspath = os.path.abspath(html_file_name)
        self.broken_links = []
        self.linked_repos = {}
        with open(html_file_name, 'r') as infile:
            html_data = infile.read()
        dirname = os.path.dirname(self.name)
        soup = BeautifulSoup(html_data, 'html.parser')
        # Find IDs. This is to check internal links within a file.
        for tag in soup.find_all(True, {'id': True}):
            self.ids.append(tag.get('id'))
        pr_search = re.compile(PULL_REQUEST_SEARCH)
        issue_search = re.compile(ISSUE_SEARCH)
        for tag in soup.find_all('a'):
            link = tag.get('href')
            if not re.search(HTTP_URL_SEARCH_TERM, link, re.IGNORECASE):
                if not re.search(IGNORED_LINK_SCHEMES, link, re.IGNORECASE):
                    if link is not None and link not in self.internal_links:
                        self.internal_links.append(link)
            else:
                if link is not None and link not in self.external_links:
                    self.external_links.append(link)
                    pr_match = pr_search.search(link)
                    if pr_match:
                        self.increment_gh_link_count(pr_match.group(1), pr_match.group(2), pr_match.group(3), True)
                    else:
                        issue_match = issue_search.search(link)
                        if issue_match:
                            self.increment_gh_link_count(issue_match.group(1), issue_match.group(2), issue_match.group(3), False)