in fineweb-2-pipeline.py [0:0]
def above_lang_threshold(doc, threshold): return doc.metadata["language_score"] >= threshold