in modules/url_parameters_removal.py [0:0]
def drop_query_params(url_group):
params_dropped = []
params_kept = []
url = url_group['url'].values[0]
url_id = url_group['url_id'].values[0]
parsed = urlparse.urlparse(url)
query = urlparse.parse_qs(parsed.query)
query_dict = dict(query)
query_dict_output = dict(query)
# Delete query param unless it meets 'keep' criteria
for qp, v in query_dict.items():
if qp in set(url_group['param']):
if url_group['keep'][
url_group['param'] == qp].values[0] \
& URLParametersRemoval._qp_no_phone(str(v)):
params_kept.append(qp)
else:
params_dropped.append(qp)
del query_dict_output[qp]
else:
params_dropped.append(qp)
del query_dict_output[qp]
# re-parse query
query_parsed = parsed._replace(
query=urlparse.urlencode(query_dict_output, True)
)
url = urlparse.urlunparse(query_parsed)
return (
url_id,
url_group['url'].values[0],
url,
params_dropped,
params_kept)