in collection/download_wayback_passages.py [0:0]
def extract_text(html_text: str) -> str:
"""Extracts text from an HTML document."""
soup = BeautifulSoup(html_text, 'html.parser')
text = soup.find_all(text=True)
output = ''
for t in text:
if t.parent.name not in blacklist:
output += f'{t} '
return output