def collect_perf()

in k-fold-cross-validation/collect_perf.py [0:0]


def collect_perf(eval_id_list):
    """
    This function collects the AUC score for a list of
    evaluations (based on binary classification model)
    on Amazon ML. If any evaluation is in progress,
    the script will poll and wait with exponential
    backoff.

    Args:
        eval_id_list: a list of Evaluation IDs to collect
            performance metrics.
    Returns:
        a map of completed evaluation's ID to
            the corresponding AUC score.
    Raises:
        exception when any Evaluation is in
            Failed status.
    """
    ml = boto.connect_machinelearning()  # boto Amazon ML client
    completed_evals = dict()  # to collect completed Evaluations
    start_timestamp = time.time()  # start timestamp in seconds

    # time delay in seconds between two polling attempt
    polling_delay = config.INITIAL_POLLING_DELAY

    logger.info("Checking the Evaluation status...")
    while time.time() - start_timestamp < config.TIME_OUT:
        any_in_progress = False  # assume all complete

        for ev_id in eval_id_list:  # fetching each Evaluation status
            if ev_id in completed_evals:  # skip any completed Evaluations
                continue

            # fetch evaluation status
            evaluation = ml.get_evaluation(ev_id)
            eval_status = evaluation["Status"]
            logger.info("{} status: {}".format(ev_id, eval_status))

            if eval_status == "COMPLETED":
                # get the AUC score from the Evaluation
                auc = evaluation["PerformanceMetrics"][
                    "Properties"]["BinaryAUC"]
                # mark this Evaluation to be completed, and write down the
                # AUC score in floating point number. Note that this entity
                # will be skipped in next polling
                completed_evals[ev_id] = float(auc)
            elif eval_status == "FAILED":
                raise Exception("Evaluation {} is FAILED!".format(
                    ev_id))
            else:
                any_in_progress = True  # in progress

        if not any_in_progress:  # exit polling if all Evaluations completed
            break
        logger.debug("Next poll in {} seconds...".format(polling_delay))
        time.sleep(polling_delay)
        # update polling_delay in the next polling
        polling_delay = min(polling_delay * 2, config.DELAY_CAP)
    return completed_evals