in python/saved_model.py [0:0]
def convert_to_inference_model(model_dir, new_model_dir, batch_size=1,
model_shape_feed_dict=None, model_feed_dict=None,
tags=None, signature_def_key=None, strip_default_attrs=False,
config_proto=None, constant_size_to_exclude=1024,
convert_constants_to_variables=False, compiler_workdir=None, **kwargs):
"""Convert a `SavedModel` to a Neuron-optimized `SavedModel`.
Args:
model_dir: The path of the original `SavedModel`.
new_model_dir: The path to which the Neuron-optimized `SavedModel` will be stored.
batch_size: (Optional) Positive integer representing batch size used in inference.
Defaults to 1.
model_shape_feed_dict: (Optional) Dictionary {str: list} used for inferring
tensor shapes. Keys should match model input names and values are lists
of positive integers representing model input tensor shapes.
model_feed_dict: (Optional) Dictionary {str: numpy.array} used for inference.
Useful for inferring tensor shapes. Keys should match model input names
and values are numpy arrays that can be fed as inputs to the `SavedModel`.
tags: (Optional) Iterable of strings to identify the required `MetaGraphDef`.
These should correspond to the tags used when saving the variables using
the `SavedModel` `save()` API. Default is to use the first `tag_set` available
in the `SavedModel`.
signature_def_key: (Optional) String specifying the `signature_def` to use. Default is
to use 'serving_default' or the first `signature_def` corresponding to `tags`.
strip_default_attrs: Boolean. If `True`, default-valued attributes will be
removed from the NodeDefs. For a detailed guide, see
[Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
minimum_segment_size: Integer; minimum number of ops in an `NeuronOp` used by
`whitelist_partition`.
no_fuse_ops: None or iterable of strings (unordered) representing names of ops
that are forcibly placed on CPU.
compiler_args: List of strings representing neuron-cc compiler arguments. Note that
these arguments apply to all subgraphs generated by whitelist partitioning.
compiler_workdir: Str representing work directory of the neuron-cc compiler.
Returns:
Dictionary with operator counts before/after optimization, etc.
Note: This function sends all unknown arguments to `tf.neuron.graph_util.inference_graph_from_session`.
"""
if config_proto is None:
config_proto = config_pb2.ConfigProto(allow_soft_placement=True)
_check_export_dir(new_model_dir)
kwargs = kwargs.copy()
tags = _normalize_tags(tags, model_dir)
with tf_session.Session(graph=ops.Graph(), config=config_proto) as sess:
meta_graph = tf_saved_model.loader.load.__wrapped__(sess, tags, model_dir)
_check_for_compatible_tf_version(model_dir, sess)
signature_def_key, signature_def = _get_signature_def(meta_graph, signature_def_key)
input_tensors = {sess.graph.get_tensor_by_name(ts.name)
for ts in signature_def.inputs.values()}
output_tensors = {sess.graph.get_tensor_by_name(ts.name)
for ts in signature_def.outputs.values()}
saved_model_main_op = meta_graph.collection_def['saved_model_main_op'].node_list.value
inputs = {}
for key, value in signature_def.inputs.items():
if key not in inputs:
inputs[key] = sess.graph.get_tensor_by_name(value.name)
if model_feed_dict is not None:
feed_dict = {inputs[key]: value for key, value in model_feed_dict.items()}
kwargs.update(feed_dict=feed_dict)
else:
if model_shape_feed_dict is not None:
kwargs.update(shape_feed_dict={
inputs[key]: value for key, value in model_shape_feed_dict.items()})
else:
if 'shape_feed_dict' not in kwargs and 'feed_dict' not in kwargs:
kwargs.update(shape_feed_dict=_infer_input_shapes(inputs.values(), batch_size, signature_def))
# get inference graph
infer_graph = inference_graph_from_session.__wrapped__(
sess, input_tensors=input_tensors, output_tensors=output_tensors,
signature_def=signature_def,
protected_op_names=saved_model_main_op, compiler_workdir=compiler_workdir, **kwargs)
if convert_constants_to_variables:
if compiler_workdir is None:
temp_dir = TemporaryDirectory()
compiler_workdir = temp_dir.name
infer_graph = convert_constant_to_variables(
sess,
infer_graph,
compiler_workdir=compiler_workdir,
constant_size_to_exclude=constant_size_to_exclude,
)
# load inference graph into a session and export as a SavedModel
with tf_session.Session(graph=infer_graph, config=config_proto) as sess:
# After adding variables in the graph, need to initialize the variables before saving them
for op in infer_graph.get_operations():
if "init" in op.name and op.type == "NoOp":
sess.run(op)
builder = tf_saved_model.builder.SavedModelBuilder(new_model_dir)
signature_def_map = {signature_def_key: signature_def}
for tensor in signature_def.inputs.values():
infer_tensor = infer_graph.get_tensor_by_name(tensor.name)
tensor.tensor_shape.CopyFrom(infer_tensor.shape.as_proto())
for tensor in signature_def.outputs.values():
infer_tensor = infer_graph.get_tensor_by_name(tensor.name)
tensor.tensor_shape.CopyFrom(infer_tensor.shape.as_proto())
saved_model_main_op = [sess.graph.get_operation_by_name(name) for name in saved_model_main_op]
main_op = saved_model_main_op[0] if saved_model_main_op else None
builder.add_meta_graph_and_variables(sess, tags, signature_def_map=signature_def_map,
strip_default_attrs=strip_default_attrs,
main_op=main_op)
builder.save()
num_ops_tfn, num_ops_on_neuron = gdu.compiled_graph_op_counts(infer_graph.as_graph_def())
on_neuron_ratio = float(num_ops_on_neuron) / num_ops_tfn if num_ops_tfn != 0 else 0.0
utils.model_conversion_report(model_dir, new_model_dir, on_neuron_ratio)
return dict(OnNeuronRatio=on_neuron_ratio)