def extract_additional_path_info()

in src/kg_builder.py [0:0]


def extract_additional_path_info(row):
    """break down the url path and extract useful path_info """
    url = row['url']
    host = row['host']
    path = url.replace(f"https://{host}", "").replace(f"http://{host}", "")
    path = path.strip("/")
    path = path.replace(".html", "").replace(".htm", "")
    path_info = path.split("/")
    return path_info