in scripts/tf_cnn_benchmarks/benchmark_cnn.py [0:0]
def create_config_proto(params):
"""Returns session config proto.
Args:
params: Params tuple, typically created by make_params or
make_params_from_flags.
"""
config = tf.ConfigProto()
config.allow_soft_placement = True
if params.num_intra_threads is None:
if params.device == 'gpu':
config.intra_op_parallelism_threads = 1
else:
config.intra_op_parallelism_threads = params.num_intra_threads
config.inter_op_parallelism_threads = params.num_inter_threads
config.experimental.collective_group_leader = '/job:worker/replica:0/task:0'
config.gpu_options.experimental.collective_ring_order = params.gpu_indices
config.gpu_options.force_gpu_compatible = params.force_gpu_compatible
config.experimental.use_numa_affinity = params.use_numa_affinity
if params.device == 'cpu':
# TODO(tucker): change num_gpus to num_devices
config.device_count['CPU'] = params.num_gpus
if params.allow_growth is not None:
config.gpu_options.allow_growth = params.allow_growth
if params.gpu_memory_frac_for_testing > 0:
config.gpu_options.per_process_gpu_memory_fraction = (
params.gpu_memory_frac_for_testing)
if params.use_unified_memory:
config.gpu_options.experimental.use_unified_memory = (
params.use_unified_memory)
if params.timestamped_allocator:
config.gpu_options.experimental.timestamped_allocator = (
params.timestamped_allocator)
if params.gpu_kt_max_interval > 0:
config.gpu_options.experimental.kernel_tracker_max_interval = (
params.gpu_kt_max_interval)
if params.gpu_kt_max_bytes > 0:
config.gpu_options.experimental.kernel_tracker_max_bytes = (
params.gpu_kt_max_bytes)
if params.gpu_kt_max_pending > 0:
config.gpu_options.experimental.kernel_tracker_max_pending = (
params.gpu_kt_max_pending)
if params.xla:
config.graph_options.optimizer_options.global_jit_level = (
tf.OptimizerOptions.ON_1)
if params.rewriter_config:
rewriter_config = rewriter_config_pb2.RewriterConfig()
text_format.Merge(params.rewriter_config, rewriter_config)
config.graph_options.rewrite_options.CopyFrom(rewriter_config)
elif not params.enable_optimizations:
config.graph_options.optimizer_options.opt_level = tf.OptimizerOptions.L0
config.graph_options.rewrite_options.disable_meta_optimizer = True
elif params.variable_update == 'collective_all_reduce':
rewrite_options = config.graph_options.rewrite_options
rewrite_options.scoped_allocator_optimization = (
rewriter_config_pb2.RewriterConfig.ON)
rewrite_options.scoped_allocator_opts.enable_op.append('CollectiveReduce')
if params.variable_update == 'horovod':
import horovod.tensorflow as hvd # pylint: disable=g-import-not-at-top
config.gpu_options.visible_device_list = str(hvd.local_rank())
# For collective_all_reduce, ignore all devices except current worker.
if params.variable_update == 'collective_all_reduce':
del config.device_filters[:]
config.device_filters.append(
'/job:%s/replica:0/task:%d' % (params.job_name, params.task_index))
# TODO(b/117324590): Re-enable PinToHostOptimizer when b/117324590 is fixed.
# Currently we have to disable PinToHostOptimizer w/ XLA since it causes
# OOM/perf cliffs.
config.graph_options.rewrite_options.pin_to_host_optimization = (
rewriter_config_pb2.RewriterConfig.OFF)
return config