def detect_type()

in fastchat/serve/monitor/dataset_release_scripts/arena_33k/filter_bad_conv.py [0:0]

26 lines of code
19 McCabe index (conditional complexity)


def detect_type(conv):
    for key in ["conversation_a", "conversation_b"]:
        messages = [row["content"] for row in conv[key]]
        for msg in messages:
            if not isinstance(msg, str):
                return TypeCode.BAD_FORMAT

        user_prompts = [
            row["content"].lower().strip() for row in conv[key] if row["role"] == "user"
        ]
        if len(messages) <= 2 and all(len(x) < 16 for x in user_prompts):
            return TypeCode.TOO_SHORT

        if all(x in frequent_prompts for x in user_prompts):
            return TypeCode.TOO_FREQUENT

        for msg in messages:
            msg = msg.lower()
            if "<anonymized>" in msg:
                return TypeCode.ANONYMIZED
            if "<redacted>" in msg:
                return TypeCode.REDACTED

            for w in blocked_words:
                if w in msg:
                    return TypeCode.BLOCKED_WORD

    for key in ["model_a", "model_b"]:
        if conv[key] in ["vicuna-33b", "mpt-30b-chat"]:
            return TypeCode.BLOCKED_MODEL

    return TypeCode.CORRECT