in cc_net/mine.py [0:0]
def get_lm_languages(self) -> Sequence[str]:
if self.lm_languages is not None:
return self.lm_languages
if self.lang_whitelist:
return self.lang_whitelist
languages = [m.name.split(".")[0] for m in self.lm_dir.glob("*.arpa.bin")]
if self.lang_blacklist:
languages = [l for l in languages if l not in self.lang_blacklist]
return languages