def load_data()

in assets/training/model_evaluation/src/compute_metrics.py [0:0]
53 lines of code
24 McCabe index (conditional complexity)

    def load_data(self):
        """Load Test data.

        Returns:
            _type_: _description_
        """
        ground_truth = None
        if self.ground_truth:
            if os.path.isdir(self.ground_truth) and not self.is_ground_truth_mltable:
                ground_truth, _ = read_multiple_files(self.ground_truth)
            else:
                ground_truth, _ = read_data(self.ground_truth)
            ground_truth = list(ground_truth)[0]

            if self.config.get(constants.OpenAIConstants.METRICS_KEY) and (
                    self.task in [TASK.QnA] or (self.task == TASK.CHAT_COMPLETION and
                                                self.config.get(SubTask.SUB_TASK_KEY, "") == SubTask.RAG_EVALUATION)
            ):
                questions_key = (constants.OpenAIConstants.QUESTIONS_KEY,
                                 self.rag_input_data_keys[constants.OpenAIConstants.QUESTIONS_KEY])
                contexts_key = (constants.OpenAIConstants.CONTEXTS_KEY,
                                self.rag_input_data_keys[constants.OpenAIConstants.CONTEXTS_KEY])
                keys = [questions_key, contexts_key]
                key_data = {key[0]: fetch_key_column_from_data(ground_truth, key[1], self.ground_truths_column_name)
                            for key in keys}

                if self.task == TASK.QnA and not any(len(values) for values in key_data.values()):
                    logger.warning("Failed to Fetch Questions and Contexts from Ground Truth Data.\n\
                                   Skipping GPT Based Metrics Calculation")
                    self.config.pop(constants.OpenAIConstants.METRICS_KEY)
                elif self.task == TASK.CHAT_COMPLETION and not all(len(values) for values in key_data.values()):
                    exception = get_azureml_exception(DataValidationException,
                                                      BadQuestionsContextGroundTruthData, None)
                    log_traceback(exception, logger)
                    raise exception
                else:
                    for key, values in key_data.items():
                        if len(values):
                            self.config[key] = values

            if len(ground_truth) > 0:
                ground_truth, self._is_multiple_ground_truth = filter_ground_truths(ground_truth, self.task,
                                                                                    self.ground_truths_column_name,
                                                                                    self.extra_y_test_cols,
                                                                                    self.config)

        if os.path.isdir(self.predictions) and not self.is_predictions_mltable:
            predictions, _ = read_multiple_files(path=self.predictions)
        else:
            predictions, _ = read_data(self.predictions)
        predictions = list(predictions)[0]
        if self.predictions_column_name is not None:
            predictions = filter_predictions(predictions, self.task, self.predictions_column_name)

        predictions_probabilities = None
        if self.predictions_probabilities is not None:
            if os.path.isdir(self.predictions_probabilities) and not self.is_predictions_probabilities_mltable:
                predictions_probabilities, _ = read_multiple_files(path=self.predictions_probabilities)
            else:
                predictions_probabilities, _ = read_data(self.predictions_probabilities)
            predictions_probabilities = list(predictions_probabilities)[0]
        self.ground_truth, self.predictions, self.predictions_probabilities = \
            ground_truth, predictions, predictions_probabilities