def main()

in scripts/coxph_evaluation.py [0:0]
80 lines of code
2 McCabe index (conditional complexity)

def main(args):
    """
    Runs evaluation for the data set
        1. Loads model from tar.gz
        2. Reads in test features
        3. Runs an accuracy report
        4. Generates feature importance with SHAP

    Args:
        model-name (str): Name of the trained model, default xgboost
        test-features (str): preprocessed test features for
         evaluation, default test_features.csv
        train-features (str): preproceed train features for SHAP,
        default train_features.csv
        test-features (str): preproceed test features for SHAP,
        default test_features.csv
        report-name (str): Name of the evaluation output
        , default evaluation.json
        shap-name (str): Name of the SHAP feature importance
        output file, default shap.csv
        threshold (float): Threshold to cut probablities at
        , default 0.5
        tau (int): time range for the c-index will be from 0 to tau
        , default 100
    """

    model_path = os.path.join("/opt/ml/processing/model", "model.tar.gz")

    logger.info(f"Extracting model from path: {model_path}")

    with tarfile.open(model_path) as tar:
        tar.extractall(path=".")
    logger.info("Loading model")
    with open(args.model_name, "rb") as f:
        model = pickle.load(f)

    logger.info("Loading train and test data")

    test_features_data = os.path.join("/opt/ml/processing/test", args.test_features)
    train_features_data = os.path.join("/opt/ml/processing/train", args.train_features)

    X_test = pd.read_csv(test_features_data, header=0)
    X_train = pd.read_csv(train_features_data, header=0)

    y_test = X_test.iloc[:, 0]
    y_train = X_train.iloc[:, 0]

    # Reverse transfrom to event and duration columns
    y_test_df = pd.DataFrame(
        np.vstack((np.where(y_test > 0, 1, 0), np.abs(y_test))).T,
        columns=["event", "duration"],
    )

    y_train_df = pd.DataFrame(
        np.vstack((np.where(y_train > 0, 1, 0), np.abs(y_train))).T,
        columns=["event", "duration"],
    )

    X_test.drop(X_test.columns[0], axis=1, inplace=True)
    X_train.drop(X_test.columns[0], axis=1, inplace=True)

    logger.info("Running inference")

    predictions = model.predict(
        xgboost.DMatrix(X_test.values[:, 1:]), output_margin=False
    )

    logger.info("Creating evaluation report")

    # NOTE: technical evaluation is really not as a classifier
    # TO DO: Normalize to 0 to 1 scale
    report_dict = classification_report(
        y_test_df["event"], predictions > args.threshold, output_dict=True
    )
    report_dict["accuracy"] = accuracy_score(
        y_test_df["event"], predictions > args.threshold
    )

    _, y_train_tuple = get_x_y(y_train_df, ["event", "duration"], pos_label=True)
    _, y_test_tuple = get_x_y(y_test_df, ["event", "duration"], pos_label=True)

    concordance_index = concordance_index_ipcw(
        y_train_tuple,
        y_test_tuple,
        predictions,
        tau=args.tau,  # default within 100 days
    )

    report_dict["concordance_index"] = {
        "cindex": float(concordance_index[0]),
        "concordant": int(concordance_index[1]),
        "discordant": int(concordance_index[2]),
        "tied_risk": int(concordance_index[3]),
        "tied_time": int(concordance_index[4]),
    }

    times, score = brier_score(
        y_train_tuple, y_test_tuple, predictions, y_test_df["duration"].max() - 1
    )

    report_dict["brier_score"] = {
        "times": times.astype(np.int32).tolist(),
        "score": score.astype(np.float32).tolist(),
    }

    logger.info(f"Classification report:\n{report_dict}")

    evaluation_output_path = os.path.join(
        "/opt/ml/processing/evaluation", args.report_name
    )
    logger.info(f"Saving classification report to {evaluation_output_path}")

    logger.debug(report_dict)

    with open(evaluation_output_path, "w") as f:
        f.write(json.dumps(report_dict))

    # SHAP
    latest_job_debugger_artifacts_path = "/opt/ml/processing/debug/debug-output"
    trial = create_trial(latest_job_debugger_artifacts_path)

    shap_values = trial.tensor("full_shap/f0").value(trial.last_complete_step)

    pd.DataFrame(shap_values).to_csv(
        os.path.join("/opt/ml/processing/evaluation", args.shap_name)
    )

    shap_no_base = shap_values[1:, :-1]
    feature_names = X_train.columns
    os.makedirs("/opt/ml/processing/plot/", exist_ok=True)
    logger.info(shap_values.shape, shap_no_base.shape, X_train.shape)
    shap.summary_plot(
        shap_no_base, features=X_train, feature_names=feature_names, show=False
    )
    plt.savefig("/opt/ml/processing/plot/feature_importance.png", bbox_inches="tight")