in src/jobs/util/labeled_data_utils.py [0:0]
def get_doc_name(s: str):
"""
Page title processor that extracts the title of a document from the page title
Only works with certain title types like google docs.
Args:
s: Page title string
Returns: Document name
"""
if s.find(" | ") > 0:
s = s.split(" | ")[0]
if s.find(" - ") > 0:
s = s.split(" - ")[0]
if s.find(" – ") > 0:
s = s.split(" – ")[0]
if s.find(" · ") > 0:
s = s.split(" · ")[0]
return s