def parse_urls_for_param()

in modules/url_parameters_removal.py [0:0]


    def parse_urls_for_param(self):
        urls_id_list = []
        urls_param_list = []
        for i in range(self.url_data.shape[0]):
            row_i = self.url_data.loc[i]
            parsed = urlparse.urlparse(row_i['canonical_url'])
            query = urlparse.parse_qs(parsed.query)
            params = list(query.keys())
            urls_id_list.extend([row_i['url_id']] * len(params))
            urls_param_list.extend(params)
            if i % 100000 == 0:
                print("progress: {} / {}".format(i, self.url_data.shape[0]),
                      file=sys.stderr)
        urls_with_param = pd.DataFrame({
            'url_id': urls_id_list,
            'param': urls_param_list
        })
        return urls_with_param