def load_from_tsv

def load_from_tsv_gz()

in voxpopuli/get_lm_data.py [0:0]

19 lines of code
3 McCabe index (conditional complexity)


def load_from_tsv_gz(in_file: Path) -> List[str]:
    output = []
    with gzip.open(in_file, "rt") as f:
        reader = csv.DictReader(
            f,
            delimiter="|",
            quotechar=None,
            doublequote=False,
            lineterminator="\n",
            quoting=csv.QUOTE_NONE,
        )

        for e in reader:
            e = dict(e)
            if e["split"] != "train":
                continue
            text = e["normed_text"]
            text = text.translate(REMOVE_TRANSLATOR)
            output.append(text)

    return output