def process_json_line()

in dpr_scale/utils/ccnews_stats.py [0:0]


def process_json_line(line_str: str):
    line = json.loads(line_str)
    if (
        line["language"] != "en"
        or line["text"] is None
        or line["title"] is None
        or line["url"] is None
    ):
        return None
    sentences = split_text_into_sentences(line["text"], language="en")
    words = word_tokenize(line["text"])
    return line["url"], len(sentences), len(words)