def validate_datapoint()

in kilt/kilt_utils.py [0:0]


def validate_datapoint(datapoint, logger):

    # input is a string
    if not isinstance(datapoint["input"], str):
        if logger:
            logger.warning(
                "[{}] input is not a string {}".format(
                    datapoint["id"], datapoint["input"]
                )
            )
        return False

    # output is not empty
    if "output" in datapoint:
        if len(datapoint["output"]) == 0:
            if logger:
                logger.warning("[{}] empty output".format(datapoint["id"]))
            return False

        for output in datapoint["output"]:
            # answer is a string
            if "answer" in output:
                if not isinstance(output["answer"], str):
                    if logger:
                        logger.warning(
                            "[{}] answer is not a string {}".format(
                                datapoint["id"], output["answer"]
                            )
                        )
                    return False

            # provenance is not empty
            # if len(output["provenance"]) == 0:
            #    if logger:
            #        logger.warning("[{}] empty provenance".format(datapoint["id"]))
            #    return False

            if "provenance" in output:
                for provenance in output["provenance"]:
                    # wikipedia_id is provided
                    if provenance["wikipedia_id"] is not None and not isinstance(
                        provenance["wikipedia_id"], str
                    ):
                        if logger:
                            logger.warning(
                                "[{}] wikipedia_id is not a string {}".format(
                                    datapoint["id"], provenance["wikipedia_id"]
                                )
                            )
                        return False

                    # title is provided
                    if not isinstance(provenance["title"], str):
                        if logger:
                            logger.warning(
                                "[{}] title is not a string {}".format(
                                    datapoint["id"], provenance["title"]
                                )
                            )
                        return False

    return True