in src/kg_builder.py [0:0]
def extract_additional_path_info(row):
"""break down the url path and extract useful path_info """
url = row['url']
host = row['host']
path = url.replace(f"https://{host}", "").replace(f"http://{host}", "")
path = path.strip("/")
path = path.replace(".html", "").replace(".htm", "")
path_info = path.split("/")
return path_info