def main()

in scripts/evaluation.py [0:0]


def main(args):
    """
    Runs evaluation for the data set
        1. Loads model from tar.gz
        2. Reads in test features
        3. Runs an classification accuracy report
        4. Generates feature importance with SHAP

    Args:
        model-name (str): Name of the trained model, default xgboost
        test-features (str): preprocessed test features for
         evaluation, default test_features.csv
        train-features (str): preproceed train features for SHAP,
        default train_features.csv
        test-features (str): preproceed test features for SHAP,
        default test_features.csv
        report-name (str): Name of the evaluation output
        , default evaluation.json
        shap-name (str): Name of the SHAP feature importance
        output file, default shap.csv
        threshold (float): Threshold to cut probablities at
        , default 0.5
    """

    model_path = os.path.join("/opt/ml/processing/model", "model.tar.gz")

    logger.info(f"Extracting model from path: {model_path}")

    with tarfile.open(model_path) as tar:
        tar.extractall(path=".")
    logger.info("Loading model")
    with open(args.model_name, "rb") as f:
        model = pickle.load(f)

    logger.info("Loading test input data")
    test_features_data = os.path.join("/opt/ml/processing/test", args.test_features)

    X_test = pd.read_csv(test_features_data, header=0)
    y_test = X_test.iloc[:, 0]
    X_test.drop(X_test.columns[0], axis=1, inplace=True)
    predictions = model.predict(xgboost.DMatrix(X_test.values))

    logger.info("Creating classification evaluation report")
    report_dict = classification_report(
        y_test, predictions > args.threshold, output_dict=True
    )
    report_dict["accuracy"] = accuracy_score(y_test, predictions > args.threshold)
    report_dict["roc_auc"] = roc_auc_score(y_test, predictions)

    logger.info(f"Classification report:\n{report_dict}")

    evaluation_output_path = os.path.join(
        "/opt/ml/processing/evaluation", args.report_name
    )
    logger.info(f"Saving classification report to {evaluation_output_path}")

    with open(evaluation_output_path, "w") as f:
        f.write(json.dumps(report_dict))

    # SHAP
    train_features_data = os.path.join("/opt/ml/processing/train", args.train_features)
    X_train = pd.read_csv(train_features_data, header=0)
    X_train.drop(X_train.columns[0], axis=1, inplace=True)

    latest_job_debugger_artifacts_path = "/opt/ml/processing/debug/debug-output"
    trial = create_trial(latest_job_debugger_artifacts_path)

    shap_values = trial.tensor("full_shap/f0").value(trial.last_complete_step)

    pd.DataFrame(shap_values).to_csv(
        os.path.join("/opt/ml/processing/evaluation", args.shap_name)
    )

    shap_no_base = shap_values[1:, :-1]
    feature_names = X_train.columns
    os.makedirs("/opt/ml/processing/plot/", exist_ok=True)
    logger.info(shap_values.shape, shap_no_base.shape, X_train.shape)
    shap.summary_plot(
        shap_no_base, features=X_train, feature_names=feature_names, show=False
    )
    plt.savefig("/opt/ml/processing/plot/feature_importance.png", bbox_inches="tight")