in k-fold-cross-validation/collect_perf.py [0:0]
def collect_perf(eval_id_list):
"""
This function collects the AUC score for a list of
evaluations (based on binary classification model)
on Amazon ML. If any evaluation is in progress,
the script will poll and wait with exponential
backoff.
Args:
eval_id_list: a list of Evaluation IDs to collect
performance metrics.
Returns:
a map of completed evaluation's ID to
the corresponding AUC score.
Raises:
exception when any Evaluation is in
Failed status.
"""
ml = boto.connect_machinelearning() # boto Amazon ML client
completed_evals = dict() # to collect completed Evaluations
start_timestamp = time.time() # start timestamp in seconds
# time delay in seconds between two polling attempt
polling_delay = config.INITIAL_POLLING_DELAY
logger.info("Checking the Evaluation status...")
while time.time() - start_timestamp < config.TIME_OUT:
any_in_progress = False # assume all complete
for ev_id in eval_id_list: # fetching each Evaluation status
if ev_id in completed_evals: # skip any completed Evaluations
continue
# fetch evaluation status
evaluation = ml.get_evaluation(ev_id)
eval_status = evaluation["Status"]
logger.info("{} status: {}".format(ev_id, eval_status))
if eval_status == "COMPLETED":
# get the AUC score from the Evaluation
auc = evaluation["PerformanceMetrics"][
"Properties"]["BinaryAUC"]
# mark this Evaluation to be completed, and write down the
# AUC score in floating point number. Note that this entity
# will be skipped in next polling
completed_evals[ev_id] = float(auc)
elif eval_status == "FAILED":
raise Exception("Evaluation {} is FAILED!".format(
ev_id))
else:
any_in_progress = True # in progress
if not any_in_progress: # exit polling if all Evaluations completed
break
logger.debug("Next poll in {} seconds...".format(polling_delay))
time.sleep(polling_delay)
# update polling_delay in the next polling
polling_delay = min(polling_delay * 2, config.DELAY_CAP)
return completed_evals