in scripts/tf_cnn_benchmarks/benchmark_cnn.py [0:0]
def _eval_once(self, sess, summary_writer, fetches, summary_op,
image_producer, global_step):
"""Evaluate the model using the validation dataset."""
with self._do_eval():
mlperf.logger.log_eval_epoch(
mlperf.tags.EVAL_START, global_step, self.batch_size)
loop_start_time = start_time = time.perf_counter()
# TODO(laigd): refactor the part to compute/report the accuracy. Currently
# it only works for image models.
top_1_accuracy_sum = 0.0
top_5_accuracy_sum = 0.0
total_eval_count = self.num_batches * self.batch_size
for step in xrange(self.num_batches):
if (summary_writer and self.params.save_summaries_steps > 0 and
(step + 1) % self.params.save_summaries_steps == 0):
results, summary_str = sess.run([fetches, summary_op])
summary_writer.add_summary(summary_str)
else:
results = sess.run(fetches)
# Make global_step available in results for postprocessing.
results['global_step'] = global_step
results = self.model.postprocess(results)
top_1_accuracy_sum += results['top_1_accuracy']
top_5_accuracy_sum += results['top_5_accuracy']
if (step + 1) % self.params.display_every == 0:
duration = time.perf_counter() - start_time
examples_per_sec = (
self.batch_size * self.params.display_every / duration)
log_fn('%i\t%.1f examples/sec' % (step + 1, examples_per_sec))
start_time = time.perf_counter()
if image_producer is not None:
image_producer.notify_image_consumption()
loop_end_time = time.perf_counter()
accuracy_at_1 = top_1_accuracy_sum / self.num_batches
accuracy_at_5 = top_5_accuracy_sum / self.num_batches
summary = tf.Summary()
summary.value.add(tag='eval/Accuracy@1', simple_value=accuracy_at_1)
summary.value.add(tag='eval/Accuracy@5', simple_value=accuracy_at_5)
for result_key, result_value in results.items():
if result_key.startswith(constants.SIMPLE_VALUE_RESULT_PREFIX):
prefix_len = len(constants.SIMPLE_VALUE_RESULT_PREFIX)
summary.value.add(tag='eval/' + result_key[prefix_len:],
simple_value=result_value)
if summary_writer:
summary_writer.add_summary(summary, global_step)
log_fn('Accuracy @ 1 = %.4f Accuracy @ 5 = %.4f [%d examples]' %
(accuracy_at_1, accuracy_at_5, total_eval_count))
elapsed_time = loop_end_time - loop_start_time
images_per_sec = (self.num_batches * self.batch_size / elapsed_time)
if self.mode != constants.BenchmarkMode.TRAIN_AND_EVAL:
# Note that we compute the top 1 accuracy and top 5 accuracy for each
# batch, which will have a slight performance impact.
log_fn('-' * 64)
log_fn('total images/sec: %.2f' % images_per_sec)
log_fn('-' * 64)
if self.benchmark_logger:
eval_result = {
'eval_top_1_accuracy', accuracy_at_1,
'eval_top_5_accuracy', accuracy_at_5,
'eval_average_examples_per_sec', images_per_sec,
tf.GraphKeys.GLOBAL_STEP, global_step,
}
self.benchmark_logger.log_evaluation_result(eval_result)
mlperf.logger.log_eval_epoch(
mlperf.tags.EVAL_STOP, global_step, self.batch_size)
mlperf.logger.log(key=mlperf.tags.EVAL_SIZE,
value=self.num_batches * self.batch_size)
if self.params.model != 'ssd300': # ssd300 logs eval accuracy elsewhere.
mlperf.logger.log_eval_accuracy(
accuracy_at_1, global_step, self.train_batch_size,
examples_per_epoch=self.dataset.num_examples_per_epoch('train'))
if self.params.stop_at_top_1_accuracy:
mlperf.logger.log(key=mlperf.tags.EVAL_TARGET,
value=self.params.stop_at_top_1_accuracy)
return accuracy_at_1, accuracy_at_5