in readability/htmls.py [0:0]
def shorten_title(doc):
title = doc.find(".//title")
if title is None or title.text is None or len(title.text) == 0:
return ""
title = orig = norm_title(title.text)
candidates = set()
for item in [".//h1", ".//h2", ".//h3"]:
for e in list(doc.iterfind(item)):
if e.text:
add_match(candidates, e.text, orig)
if e.text_content():
add_match(candidates, e.text_content(), orig)
for item in TITLE_CSS_HEURISTICS:
for e in doc.cssselect(item):
if e.text:
add_match(candidates, e.text, orig)
if e.text_content():
add_match(candidates, e.text_content(), orig)
if candidates:
title = sorted(candidates, key=len)[-1]
else:
for delimiter in [" | ", " - ", " :: ", " / "]:
if delimiter in title:
parts = orig.split(delimiter)
if len(parts[0].split()) >= 4:
title = parts[0]
break
elif len(parts[-1].split()) >= 4:
title = parts[-1]
break
else:
if ": " in title:
parts = orig.split(": ")
if len(parts[-1].split()) >= 4:
title = parts[-1]
else:
title = orig.split(": ", 1)[1]
if not 15 < len(title) < 150:
return orig
return title