def inference_graph_from

def inference_graph_from_session()

in python/graph_util.py [0:0]
160 lines of code
62 McCabe index (conditional complexity)

def inference_graph_from_session(
        sess=None, input_tensors=None, output_tensors=None, signature_def=None,
        shape_feed_dict=None, feed_dict=None, dynamic_batch_size=False,
        protected_op_names=None,
        supported_op_types=None, no_fuse_ops=None, force_fuse_ops=None, minimum_segment_size=None,
        grappler=False, max_num_compilers=None,
        compiler_args=None, compiler_workdir=None, compiler_timeout=None, compiler_recovery=True,
        compiler_verbose=None, amp_pass=False):
    """Constructs an inference graph from a tensorflow session.

    Generally decomposes into 5 passes:
        1. Convert all variables to constants, `Assign`s to `Identity`s.
        2. Whitelist-based graph partitioning, each subgraph (wrapped in an `NeuronOp`)
            will contain only operations whose types match the types listed in `supported_op_types`.
        3. Shape inference to find shapes for input/output tensors of `NeuronOp` subgraphs.
        4. Call neuron-cc compiler on each `NeuronOp`.
        5. Restore `NeuronOp`s that are failed to compile into their original form.

    Args:
        sess: Active TensorFlow session.
        input_tensors: None or iterable of strings/tensors (unordered). Strings should be
            tensor names. Setting this argument can help when inference starts from some
            arbitrary tensors that are not placeholder tensors.
        output_tensors: None or iterable of strings/tensors (unordered). Strings should be
            tensor names.
        signature_def: None or a `SignatureDef` protobuf message marking graph inputs and outputs.
        shape_feed_dict: Dict `{str: shape}` used by `shape_inference`.
        feed_dict: Dict `{str: numpy.ndarray}` used by `shape_inference_with_inputs`.
            Optional. If both `shape_feed_dict` and `feed_dict` are unspecified, no shape
            inference will be performed. If only `shape_feed_dict` is specified, will perform
            `shape_inference` only. As long as `feed_dict` is specified, will perform
            `shape_inference` first and then `shape_inference_with_inputs`.
        dynamic_batch_size: Bool that represents whether the inference graph will support
            dynamic batch sizes during inference.
        supported_op_types: Iterable of strings (unordered) representing compilable op names.
        no_fuse_ops: None or iterable of strings (unordered) representing names of ops
            that are forcibly placed on CPU.
        force_fuse_ops: None or iterable of strings (unordered) representing names of ops
            that are forcibly sent to the neuron-cc compiler.
        minimum_segment_size: Integer; minimum number of ops in an `NeuronOp` used by
            `whitelist_partition`.
        max_num_compilers: Integer representing maximum allowed compiler processes.
        compiler_args: List of strings representing compiler arguments. Note that these
            arguments will be applied to all subgraphs generated by whitelist partitioning.
        compiler_workdir: Str representing work directory of the neuron-cc compiler.
        compiler_timeout: Integer representing maximum allowed runtime for the neuron-cc compiler.
        compiler_recovery: Bool representing whether to recovery from neuron-cc compiler failure.

    Returns:
        A `Graph` object that is optimized for running inference on Inferentia.

    Note:
        `input_tensors`, `shape_feed_dict`, and `feed_dict` can all set input tensors, and so
        the latter one will always override the former one.
    """
    if 'NEURON_CC_FLAGS' in os.environ:
        parser = argparse.ArgumentParser()
        parser.add_argument('--must-compile', action='store_true')
        parser.add_argument('--dump-prefix', default=None)
        parser.add_argument('--verbose', type=int, default=None)
        tf_neuron_args, neuron_cc_args = parser.parse_known_args(shlex.split(os.environ['NEURON_CC_FLAGS']))
        if tf_neuron_args.verbose is not None:
            compiler_verbose = tf_neuron_args.verbose
        if tf_neuron_args.must_compile:
            compiler_recovery = False
            if compiler_verbose is None:
                compiler_verbose = 1
            logging.warning('Enabling must-compile according to NEURON_CC_FLAGS environment variable; '
                            'neuron-cc failures will be thrown as exceptions')
        if tf_neuron_args.dump_prefix is not None:
            compiler_workdir = tf_neuron_args.dump_prefix
        if neuron_cc_args:
            if compiler_args is None:
                compiler_args = neuron_cc_args
            else:
                compiler_args.extend(neuron_cc_args)
    if sess is None:
        sess = ops.get_default_session()
    if feed_dict is not None:
        feed_dict = {getattr(ts, 'name', ts): value for ts, value in feed_dict.items()}
    if shape_feed_dict is not None:
        shape_feed_dict = {getattr(ts, 'name', ts): value for ts, value in shape_feed_dict.items()}
    if signature_def is None:
        # build a SignatureDef from input/output tensors
        if input_tensors is None:
            if feed_dict is not None:
                input_names = feed_dict.keys()
            elif shape_feed_dict is not None:
                input_names = shape_feed_dict.keys()
            else:
                input_names = [op.outputs[0].name for op in sess.graph.get_operations()
                                                  if op.type == 'Placeholder']
        else:
            input_names = [getattr(ts, 'name', ts) for ts in input_tensors]
        input_tensors = [sess.graph.get_tensor_by_name(name) for name in input_names]
        if output_tensors is None:
            output_ops = [op for op in sess.graph.get_operations()
                             if all(not ts.consumers() for ts in op.outputs)]
            output_names = [ts.name for op in output_ops for ts in op.outputs]
        else:
            output_names = [getattr(ts, 'name', ts) for ts in output_tensors]
        output_tensors = [sess.graph.get_tensor_by_name(name) for name in output_names]
        signature_def = mgu.build_signature_def(input_tensors, output_tensors)

    # convert variables to constants
    if protected_op_names is None:
        protected_op_names = set()
    protected_op_names = set(protected_op_names)
    io_infos = itertools.chain(signature_def.inputs.values(), signature_def.outputs.values())
    protected_op_names.update(sess.graph.get_tensor_by_name(info.name).op.name for info in io_infos)
    if feed_dict is not None:
        protected_op_names.update(sess.graph.get_tensor_by_name(name).op.name
                                  for name in feed_dict.keys())
        if shape_feed_dict is None:
            key_dict = {key: key for key in feed_dict}
            evaluated_feed_dict = sess.run(key_dict, feed_dict)
            shape_feed_dict = {key: value.shape for key, value in evaluated_feed_dict.items()}
    if shape_feed_dict is not None:
        protected_op_names.update(sess.graph.get_tensor_by_name(name).op.name
                                  for name in shape_feed_dict.keys())

    if grappler:
        with sess.graph.as_default():
            rewriter_config = rewriter_config_pb2.RewriterConfig()
            opt_config = config_pb2.ConfigProto()
            opt_config.graph_options.rewrite_options.CopyFrom(rewriter_config)
            train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
            train_op.extend(sess.graph.get_tensor_by_name(name) for name in output_names)
            grappler_metagraph = meta_graph.create_meta_graph_def(graph=sess.graph)
            graph_def = tf_optimizer.OptimizeGraph(opt_config, grappler_metagraph)
    else:
        graph_def = sess.graph.as_graph_def(add_shapes=True)

    # convert all variables to constants
    with replace_extract_sub_graph():
        graph_def = tf_graph_util.convert_variables_to_constants.__wrapped__(
            sess, graph_def, list(protected_op_names))
    for node in graph_def.node:
        if node.op == 'RefEnter':
            node.op = 'Enter'
        elif node.op == 'RefExit':
            node.op = 'Exit'
    original_graph_def = graph_def

    # setup op exclusions
    no_fuse_ops = set() if no_fuse_ops is None else set(no_fuse_ops)
    control_op_names = [node.name for node in graph_def.node if _has_control_input(node)]

    # exclude ops with control outputs
    no_fuse_ops.update(control_op_names)

    # exclude ops that are attached to string tensors
    name_to_consumers = {}
    for node in graph_def.node:
        for inp in node.input:
            input_node_name = inp[:inp.index(':')] if ':' in inp else inp
            if input_node_name not in name_to_consumers:
                name_to_consumers[input_node_name] = set()
            name_to_consumers[input_node_name].add(node.name)
    for node in graph_def.node:
        if 'T' in node.attr:
            if node.attr['T'].type == dtypes.string.as_datatype_enum:
                no_fuse_ops.add(node.name)
                no_fuse_ops.update(name_to_consumers.get(node.name, []))
        if 'dtype' in node.attr:
            if node.attr['dtype'].type == dtypes.string.as_datatype_enum:
                no_fuse_ops.add(node.name)
                no_fuse_ops.update(name_to_consumers.get(node.name, []))

    # normalize operators
    graph_def = gdu.normalize_operators(graph_def)

    # initialize inferred shapes
    graph_def = gdu.encode_inferred_shapes(graph_def, shape_feed_dict)

    # Adding the auto-mixed precision pass
    if amp_pass:
        graph_def = amp_optimization(graph_def, signature_def)

    # fuse ops into `NeuronOp`'s and determine tensors that require shapes
    part_graph_def = whitelist_partition(
        graph_def, signature_def, supported_op_types=supported_op_types,
        no_fuse_ops=no_fuse_ops, force_fuse_ops=force_fuse_ops,
        minimum_segment_size=minimum_segment_size)

    # perform an inference to find tensor shapes as a last resort
    # todo: change to hard_shape_inference == True
    if feed_dict is not None:
        part_graph_def = gdu.shape_inference_with_inputs(part_graph_def, sess, feed_dict)

    # call compiler for each `NeuronOp`
    args_dict = {}
    if compiler_args is not None:
        args_dict = {node.name: compiler_args for node in gdu.get_neuron_nodes(part_graph_def)}
    compiled_graph_def = compile_subgraphs(
        part_graph_def, workdir=compiler_workdir,
        args_dict=args_dict, timeout=compiler_timeout, max_num_compilers=max_num_compilers,
        verbose=compiler_verbose)

    if dynamic_batch_size:
        compiled_graph_def = mark_batch_axis(compiled_graph_def)

    if compiler_recovery:
        compiled_graph_def = gdu.restore_compiler_failures(compiled_graph_def, original_graph_def)
        compiled_graph_def = gdu.run_graph_def_pass_in_subgraphs(compiled_graph_def, gdu.erase_large_constants)
        compiled_graph_def = nchw_to_nhwc(compiled_graph_def)

    # try to enable dynamic batch size if possible
    if not dynamic_batch_size:
        compiled_graph_def, dynamic_batch_size = set_dynamic_batch_size(compiled_graph_def)

    # rename NeuronOp's for better visualization
    compiled_graph_def = gdu.prefix_node_names(compiled_graph_def)

    # raise exception if NeuronOp is still uncompiled after fallback pass
    uncompiled_node_names = []
    for node in gdu.get_neuron_nodes(compiled_graph_def):
        if not node.attr['executable'].s:
            uncompiled_node_names.append(node.name)
    if uncompiled_node_names:
        raise ValueError('The following subgraphs failed to compile: {}'.format(uncompiled_node_names))

    # execution plan analysis
    compiled_graph_def = gdu.set_execution_plan(compiled_graph_def)

    # return a new graph
    compiled_graph = _graph_def_to_graph(compiled_graph_def)

    # statistics on number of operations
    num_ops_original = len(sess.graph.get_operations())
    num_ops_tfn, num_ops_on_neuron = gdu.compiled_graph_op_counts(compiled_graph_def)
    with utils.logging_show_info():
        logging.info('Number of operations in TensorFlow session: {}'.format(num_ops_original))
        logging.info('Number of operations after tf.neuron optimizations: {}'.format(num_ops_tfn))
        logging.info('Number of operations placed on Neuron runtime: {}'.format(num_ops_on_neuron))
    if ncc.find_neuron_cc() is None:
        logging.warning('***************************************************************')
        logging.warning('')
        logging.warning('  neuron-cc is not found.')
        logging.warning('')
        logging.warning('  To fully optimize TensorFlow model with AWS Neuron, please')
        logging.warning('')
        logging.warning('  install the neuron-cc compiler by "pip install neuron-cc".')
        logging.warning('')
        logging.warning('***************************************************************')
    return compiled_graph