in scripts/tf_cnn_benchmarks/benchmark_cnn.py [0:0]
def benchmark_one_step(sess,
fetches,
step,
batch_size,
step_train_times,
trace_filename,
partitioned_graph_file_prefix,
profiler,
image_producer,
params,
summary_op=None,
show_images_per_sec=True,
benchmark_logger=None,
collective_graph_key=0,
should_output_files=True):
"""Advance one step of benchmarking."""
should_profile = profiler and 0 <= step < _NUM_STEPS_TO_PROFILE
need_options_and_metadata = (
should_profile or collective_graph_key > 0 or
((trace_filename or partitioned_graph_file_prefix) and step == -2)
)
if need_options_and_metadata:
run_options = tf.RunOptions()
if (trace_filename and step == -2) or should_profile:
run_options.trace_level = tf.RunOptions.FULL_TRACE
if partitioned_graph_file_prefix and step == -2:
run_options.output_partition_graphs = True
if collective_graph_key > 0:
run_options.experimental.collective_graph_key = collective_graph_key
run_metadata = tf.RunMetadata()
else:
run_options = None
run_metadata = None
summary_str = None
start_time = time.perf_counter()
if summary_op is None:
results = sess.run(fetches, options=run_options, run_metadata=run_metadata)
else:
(results, summary_str) = sess.run(
[fetches, summary_op], options=run_options, run_metadata=run_metadata)
if not params.forward_only:
lossval = results['average_loss']
else:
lossval = 0.
if image_producer is not None:
image_producer.notify_image_consumption()
train_time = time.perf_counter() - start_time
step_train_times.append(train_time)
if (show_images_per_sec and step >= 0 and
(step == 0 or (step + 1) % params.display_every == 0)):
speed_mean, speed_uncertainty, speed_jitter = get_perf_timing(
batch_size, step_train_times, params.display_perf_ewma)
log_str = '%i\t%s\t%.*f' % (
step + 1,
get_perf_timing_str(speed_mean, speed_uncertainty, speed_jitter),
LOSS_AND_ACCURACY_DIGITS_TO_SHOW, lossval)
if 'top_1_accuracy' in results:
log_str += '\t%.*f\t%.*f' % (
LOSS_AND_ACCURACY_DIGITS_TO_SHOW, results['top_1_accuracy'],
LOSS_AND_ACCURACY_DIGITS_TO_SHOW, results['top_5_accuracy'])
log_fn(log_str)
if benchmark_logger:
benchmark_logger.log_metric(
'current_examples_per_sec', speed_mean, global_step=step + 1)
if 'top_1_accuracy' in results:
benchmark_logger.log_metric(
'top_1_accuracy', results['top_1_accuracy'], global_step=step + 1)
benchmark_logger.log_metric(
'top_5_accuracy', results['top_5_accuracy'], global_step=step + 1)
if need_options_and_metadata:
if should_profile:
profiler.add_step(step, run_metadata)
if trace_filename and step == -2 and should_output_files:
log_fn('Dumping trace to %s' % trace_filename)
trace_dir = os.path.dirname(trace_filename)
if not gfile.Exists(trace_dir):
gfile.MakeDirs(trace_dir)
with gfile.Open(trace_filename, 'w') as trace_file:
if params.use_chrome_trace_format:
trace = timeline.Timeline(step_stats=run_metadata.step_stats)
trace_file.write(trace.generate_chrome_trace_format(show_memory=True))
else:
trace_file.write(str(run_metadata.step_stats))
if partitioned_graph_file_prefix and step == -2 and should_output_files:
path, filename = os.path.split(partitioned_graph_file_prefix)
if '.' in filename:
base_filename, ext = filename.rsplit('.', 1)
ext = '.' + ext
else:
base_filename, ext = filename, ''
as_text = filename.endswith('txt')
for graph_def in run_metadata.partition_graphs:
device = graph_def.node[0].device.replace('/', '_').replace(':', '_')
graph_filename = '%s%s%s' % (base_filename, device, ext)
log_fn('Writing partitioned GraphDef as %s to %s' % (
'text' if as_text else 'binary',
os.path.join(path, graph_filename)))
tf.train.write_graph(graph_def, path, graph_filename, as_text)
return (summary_str, lossval)