in build_obelics/13_final_processing.py [0:0]
def remove_spam_paragraphs(texts, images, metadata):
new_texts = []
for text in texts:
if text is None:
new_texts.append(None)
else:
paragraphs = text.split("\n\n")
new_paragraphs = [
paragraph for paragraph in paragraphs if compute_spam_word_ratio(paragraph) < SPAM_WORD_RATIO_CUTOFF
]
new_text = "\n\n".join(new_paragraphs)
new_texts.append(new_text)
return new_texts, images, metadata