def get_doc_name()

in src/jobs/util/labeled_data_utils.py [0:0]


def get_doc_name(s: str):
    """
    Page title processor that extracts the title of a document from the page title
    Only works with certain title types like google docs.

    Args:
        s: Page title string
    Returns: Document name
    """
    if s.find(" | ") > 0:
        s = s.split(" | ")[0]
    if s.find(" - ") > 0:
        s = s.split(" - ")[0]
    if s.find(" – ") > 0:
        s = s.split(" – ")[0]
    if s.find(" · ") > 0:
        s = s.split(" · ")[0]
    return s