def _log_metrics()

in assets/training/model_evaluation/src/utils.py [0:0]
112 lines of code
49 McCabe index (conditional complexity)

def _log_metrics(metrics, artifacts):
    """Log metrics and artifacts to current run.

    Args:
        metrics (_type_): _description_
        artifacts (_type_): _description_

    Raises:
        ModelEvaluationException: _description_
    """
    table_scores = {}
    nonscalar_scores = {}
    list_metrics = [metrics_constants.Metric.FMPerplexity]
    run = current_run.run
    list_scores = {}
    classwise_scores = {}

    for name, score in artifacts.items():
        if score is None:
            logger.warning("Empty score for {}. Skipping.".format(name))
            continue
        elif _scoring_utilities.is_table_metric(name) or name in metrics_constants.Metric.QA_GPT_METRICS_SET \
                or name == metrics_constants.Metric.BERTScore:
            table_scores[name] = score
        elif name in list_metrics:
            try:
                list_scores[name] = list(score)
                if name == metrics_constants.Metric.FMPerplexity:
                    metrics["mean_" + name] = np.mean(score)
            except TypeError:
                logger.warning(f"{name} is not of type list.")
        elif name in metrics_constants.Metric.NONSCALAR_FULL_SET or \
                name in metrics_constants.FULL_NONSCALAR_SET:
            nonscalar_scores[name] = score
        elif name in metrics_constants.TrainingResultsType.ALL_TIME:
            # Filter out time metrics as we do not log these
            pass
        elif name in metrics_constants.FULL_CLASSWISE_SET:
            classwise_scores[name] = score
        else:
            logger.warning("Unknown metric {}. Will not log.".format(name))

    try:
        # Log the scalar metrics. (Currently, these are stored in CosmosDB)
        for name, score in metrics.items():
            if isinstance(score, list):
                list_scores[name] = list(score)
                continue
            run.log(name, score)

        for name, score in table_scores.items():
            if name == metrics_constants.Metric.BERTScore:
                for k, v in score.items():
                    if not isinstance(v, np.ndarray) and not isinstance(v, list):
                        continue
                    x, y = np.histogram(v, bins=10)
                    # TODO: check if we need to look in different keys for precision, recall and f1
                    x = np.array(x).tolist()
                    y = np.array(y).tolist()

                    run.log_table("Bert F1 Score", value={"_score": json.dumps(list(y)[1:]),
                                                          "count": json.dumps(list(x))})

                    x, y = np.histogram(v, bins=10)
                    x = np.array(x).tolist()
                    y = np.array(y).tolist()

                    run.log_table("Bert Precision", value={"_score": json.dumps(list(y)[1:]),
                                                           "count": json.dumps(list(x))})

                    x, y = np.histogram(v, bins=10)
                    x = np.array(x).tolist()
                    y = np.array(y).tolist()

                    run.log_table("Bert Recall", value={"_score": json.dumps(list(y)[1:]),
                                                        "count": json.dumps(list(x))})
            elif name in metrics_constants.Metric.QA_GPT_METRICS_SET:
                try:
                    if not isinstance(score, list) and not isinstance(score, np.ndarray):
                        logger.warning(f"{name} is not an iterable. \nValue: {score}")
                        continue
                    int_score = [int(i) for i in score]
                    counts = [0]*5
                    for i in int_score:
                        counts[i-1] += 1
                    cur_score = {
                        "_rating": [i for i in range(1, 6)],
                        "count": counts
                    }
                    run.log_table(name, cur_score)
                except Exception as e:
                    if (isinstance(score, list) or isinstance(score, np.ndarray)) and len(score) > 0:
                        exception_cls_name = score[0]
                        logger.warning(f"Ignoring metric: {name}\n Computation Failed due to: {exception_cls_name}")
                    else:
                        logger.warning(f"Ignoring metric: {name}\n Logging Failed due to: {repr(e)}")
            else:
                run.log_table(name, score)

        # for name, score in list_scores.items():
        #     # TODO: Add checks for logging longer lists
        #     pass
        #     # run.log_list(name, score)

        # Log the non-scalar metrics. (Currently, these are all artifact-based.)
        for name, score in nonscalar_scores.items():
            if name == metrics_constants.Metric.AccuracyTable:
                run.log_accuracy_table(name, score)
            elif name in metrics_constants.Metric.IMAGE_LEVEL_BINARY_CLASSIFIER_METRICS:
                run.log_table(name, score)
            elif name == metrics_constants.Metric.ConfusionMatrix:
                run.log_confusion_matrix(name, score)
            elif name == metrics_constants.Metric.CONFUSION_MATRICES_PER_SCORE_THRESHOLD:
                for key, confusion_matrix in score.items():
                    run.log_confusion_matrix(key, confusion_matrix)
            elif name == metrics_constants.Metric.Residuals:
                run.log_residuals(name, score)
            elif name == metrics_constants.Metric.PredictedTrue:
                run.log_predictions(name, score)
            elif name in metrics_constants.Metric.NONSCALAR_FORECAST_SET:
                # Filter out non-scalar forecasting metrics as we do not log these yet
                pass
            else:
                logger.warning("Unsupported non-scalar metric {}. Will not log.".format(name))

        # Log the classwise metrics. (Currently, these are all artifact-based.)
        for name, score in classwise_scores.items():
            try:
                if name == metrics_constants.Metric.PER_LABEL_METRICS:
                    for metrics in score.values():
                        run.log_table(name, metrics)
                else:
                    logger.warning("Unsupported non-scalar metric {}. Will not log.".format(name))
            except Exception:  # TODO
                e = ModelEvaluationException(f"Failed to log classwise metric {name} with value {score}")
                log_traceback(e, logger)
                raise e
    except Exception as e:
        exception = get_azureml_exception(ComputeMetricsException, MetricsLoggingError, e, wrap_azureml_ex=False,
                                          metric_name=name, error=repr(e))
        log_traceback(exception, logger)
        raise exception