def process_one_url()

in modules/url_comparison.py [0:0]


    def process_one_url(self, url):
        """
        Function to iterate over query params in a particular url,
        parse params, iteratively remove each, store comparison. Also
        performs an 'AA test' comparing the full URL to itself to
        account for dynamic elements in a webpage and minimize false
        positives.

        :param url: STRING
        :return: dict containing each param, the difference ratio
        """
        url_withs_soup = URLContentFetcher(
            url, timeout=self.timeout, parser=self.parser, proxies=self.proxies)
        modified_urls = self.generate_modified_urls(url)
        compare_result = []
        for key, mod_url in modified_urls:
            # Compare urls and save output:
            # how similar would a URL would be to its original form if
            # a particular query string was removed? Use content similarity
            # and whether it has the same title as metrics.
            mod_url_with_soup = URLContentFetcher(
                mod_url,
                timeout=self.timeout, parser=self.parser, proxies=self.proxies)
            comp = self.compare_two_soups(url_withs_soup, mod_url_with_soup)

            current = pd.concat(
                (pd.Series({'url': url, 'key': key, 'mod_url': mod_url}), comp))
            compare_result.append(current)

        compare_result = pd.DataFrame(compare_result)
        return compare_result