def skip()

in fastchat/data/optional_clean.py [0:0]


def skip(conv, args):
    # Remove certain languages
    if args.keep_lang != "all" or args.skip_lang is not None:
        text = "\n".join([x["value"] for x in conv["conversations"]])
        try:
            lang_code = Detector(text).language.code
        except (pycld2.error, polyglot.detect.base.UnknownLanguage):
            lang_code = "unknown"

        if args.keep_lang != "all" and lang_code != args.keep_lang:
            return True

        if lang_code == args.skip_lang:
            return True

    # Remove repetitive numbers
    if args.reduce_rep:
        for sentence in conv["conversations"]:
            val = sentence["value"]
            sub = re.search(r"(\d)\1{8}", val)
            if sub is not None:
                return True

    return False