scripts/float16.py

# MIT License # # Copyright (c) Microsoft Corporation, Hugging Face. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from typing import Optional import itertools import numpy as np import onnx import packaging.version as pv import warnings from onnx import helper, numpy_helper from onnx import onnx_pb as onnx_proto import onnxslim.third_party.onnx_graphsurgeon as gs FLOAT32 = 1 FLOAT16 = 10 def _npfloat16_to_int(np_list): """ Convert numpy float16 to python int. :param np_list: numpy float16 list :return int_list: python int list """ return [int(bin(_.view("H"))[2:].zfill(16), 2) for _ in np_list] def convert_np_to_float16(np_array, min_positive_val=1e-7, max_finite_val=1e4): """ Convert float32 numpy array to float16 without changing sign or finiteness. Positive values less than min_positive_val are mapped to min_positive_val. Positive finite values greater than max_finite_val are mapped to max_finite_val. Similar for negative values. NaN, 0, inf, and -inf are unchanged. """ def between(a, b, c): return np.logical_and(a < b, b < c) positive_values = np_array[np.where(np_array > 0)] if positive_values.shape[0] > 0: pos_max = positive_values.max() pos_min = positive_values.min() if pos_max >= max_finite_val: warnings.warn( "the float32 number {} will be truncated to {}".format( pos_max, max_finite_val ) ) if pos_min <= min_positive_val: warnings.warn( "the float32 number {} will be truncated to {}".format( pos_min, min_positive_val ) ) negative_values = np_array[np.where(np_array < 0)] if negative_values.shape[0] > 0: neg_max = negative_values.max() neg_min = negative_values.min() if neg_min <= -max_finite_val: warnings.warn( "the float32 number {} will be truncated to {}".format( neg_min, -max_finite_val ) ) if neg_max >= -min_positive_val: warnings.warn( "the float32 number {} will be truncated to {}".format( neg_max, -min_positive_val ) ) np_array = np.where( between(0, np_array, min_positive_val), min_positive_val, np_array ) np_array = np.where( between(-min_positive_val, np_array, 0), -min_positive_val, np_array ) np_array = np.where( between(max_finite_val, np_array, float("inf")), max_finite_val, np_array ) np_array = np.where( between(float("-inf"), np_array, -max_finite_val), -max_finite_val, np_array ) return np.float16(np_array) def convert_tensor_float_to_float16(tensor, min_positive_val=1e-7, max_finite_val=1e4): """ Convert tensor float to float16. :param tensor: TensorProto object :return tensor_float16: converted TensorProto object """ if not isinstance(tensor, onnx_proto.TensorProto): raise ValueError( "Expected input type is an ONNX TensorProto but got %s" % type(tensor) ) if tensor.data_type == onnx_proto.TensorProto.FLOAT: tensor.data_type = onnx_proto.TensorProto.FLOAT16 # convert float_data (float type) to float16 and write to int32_data if tensor.float_data: float16_data = convert_np_to_float16( np.array(tensor.float_data), min_positive_val, max_finite_val ) int_list = _npfloat16_to_int(float16_data) tensor.int32_data[:] = int_list tensor.float_data[:] = [] # convert raw_data (bytes type) if tensor.raw_data: # convert n.raw_data to float float32_list = np.fromstring(tensor.raw_data, dtype="float32") # convert float to float16 float16_list = convert_np_to_float16( float32_list, min_positive_val, max_finite_val ) # convert float16 to bytes and write back to raw_data tensor.raw_data = float16_list.tostring() return tensor def make_value_info_from_tensor(tensor): shape = numpy_helper.to_array(tensor).shape return helper.make_tensor_value_info(tensor.name, tensor.data_type, shape) DEFAULT_OP_BLOCK_LIST = [ "ArrayFeatureExtractor", "Binarizer", "CastMap", "CategoryMapper", "DictVectorizer", "FeatureVectorizer", "Imputer", "LabelEncoder", "LinearClassifier", "LinearRegressor", "Normalizer", "OneHotEncoder", "RandomUniformLike", "SVMClassifier", "SVMRegressor", "Scaler", "TreeEnsembleClassifier", "TreeEnsembleRegressor", "ZipMap", "NonMaxSuppression", "TopK", "RoiAlign", "Resize", # 'Range', "CumSum", "Min", "Max", "Upsample", # NEW: "RandomNormalLike", # TODO: Ideally, "Cast" nodes should not be here, for the following reasons: # - It breaks the semantics that the default list contains "ops that are not supported for float16 in ONNX Runtime". # - When fp32 casts already exist in the model (e.g., for rotary embeddings), this script will insert redundant casts around it. # However, without it, the graphs produced are invalid. Eventually, we will resolve this. "Cast", ] def initial_checking(model, disable_shape_infer): func_infer_shape = None if not disable_shape_infer and pv.Version(onnx.__version__) >= pv.Version("1.2"): try: from onnx.shape_inference import infer_shapes func_infer_shape = infer_shapes finally: pass if not isinstance(model, onnx_proto.ModelProto): raise ValueError( "Expected model type is an ONNX ModelProto but got %s" % type(model) ) if func_infer_shape is not None: model = func_infer_shape(model) is_fp16_ready_flag = check_if_fp16_ready(model.graph) return model, func_infer_shape, is_fp16_ready_flag def convert_float_to_float16( model, min_positive_val=1e-7, max_finite_val=1e4, keep_io_types=False, disable_shape_infer=False, op_block_list=None, node_block_list=None, check_fp16_ready=True, ): # create blocklists if op_block_list is None: op_block_list = DEFAULT_OP_BLOCK_LIST if node_block_list is None: node_block_list = [] op_block_list = set(op_block_list) node_block_list = set(node_block_list) global_input_name_dict = ( {} ) # key: input name, value: new output name after Cast node # basic checking, including shape inference model, func_infer_shape, is_fp16_ready_flag = initial_checking( model, disable_shape_infer ) if is_fp16_ready_flag and check_fp16_ready: raise ValueError( "The model is already converted to float16, if convert again, the model might be wrong. \n If you are sure to convert again, please set check_fp16_ready=False." ) graph_stack = [model.graph] is_top_level = True while graph_stack: next_level = [] for curr_graph in graph_stack: process_graph_input( curr_graph, is_top_level, keep_io_types, global_input_name_dict ) value_info_block_list = process_tensor_in_node( curr_graph, op_block_list, node_block_list, min_positive_val, max_finite_val, ) process_value_info(curr_graph, value_info_block_list) process_node_in_block_list( curr_graph, global_input_name_dict, op_block_list, node_block_list ) process_initializers( curr_graph, op_block_list, node_block_list, min_positive_val, max_finite_val, ) process_graph_output(curr_graph, is_top_level, keep_io_types) sub_graph_list = get_next_level_graph( curr_graph, op_block_list, node_block_list ) if len(sub_graph_list) > 0: next_level.extend(sub_graph_list) if not is_top_level: process_node_input_output(curr_graph, global_input_name_dict) is_top_level = False # Going to process sub-graph graph_stack = next_level remove_unnecessary_cast_node(model.graph) # Topologically sort the graph # NOTE: We do not perform another round of optimization as the model is already optimized graph = gs.import_onnx(model) graph.toposort() model = gs.export_onnx(graph) return model # Change the input/output of the node to the new output name after Cast node for sub-graph # Because there have NO value_info start from def process_node_input_output( graph: onnx_proto.GraphProto, global_input_name_dict: dict ): for node in graph.node: for i, input_name in enumerate(node.input): if input_name in global_input_name_dict: node.input[i] = global_input_name_dict[input_name] for i, output_name in enumerate(node.output): if output_name in global_input_name_dict: node.output[i] = global_input_name_dict[output_name] def process_graph_input( graph: onnx_proto.GraphProto, is_top_level: bool, is_io_fp32: bool, global_input_name_dict: dict, ): # The input dtype is float32, need to cast to fp16 if is_top_level and is_io_fp32: for graph_input in graph.input: # n_input is ValueInfoProto if graph_input.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT: downstream_nodes = find_downstream_node_by_input_name( graph, graph_input.name ) for d_node in downstream_nodes: # More than one node may consume the model input, so we only create # a single cast node, and then reuse this node when needed. cast_exists = graph_input.name in global_input_name_dict if cast_exists: cast_node_output_name = global_input_name_dict[graph_input.name] else: cast_node_output_name = graph_input.name + "_fp16" add_cast_node( graph, [graph_input.name], [cast_node_output_name], cast_node_output_name, # Set node name same as output name FLOAT16, ) add_new_value_info( graph, graph_input, cast_node_output_name, onnx_proto.TensorProto.FLOAT16, ) for i, input_name in enumerate(d_node.input): if input_name == graph_input.name: d_node.input[i] = ( cast_node_output_name # Change the input of the second node ) global_input_name_dict[graph_input.name] = ( cast_node_output_name ) # For the sub-graph, don't do cast else: # Change the input dtype to fp16 without any cast for graph_input in graph.input: if graph_input.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT: graph_input.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16 def process_graph_output( graph: onnx_proto.GraphProto, is_top_level: bool, is_io_fp32: bool ): if is_top_level and is_io_fp32: # the output dtype is float32, need to cast to fp16 for i, graph_output in enumerate(graph.output): if graph_output.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT: new_producer_name = graph_output.name + "_fp16" original_name = graph_output.name # The correct output name # Get the node(s) that produce the model output # These will most likely be fp16, but could be fp32 if the previous node is in block_list upstream_nodes = find_upstream_node_by_output_name(graph, original_name) assert len(upstream_nodes) == 1 # Should be only one node producer_node = upstream_nodes[0] for i, output_name in enumerate(producer_node.output): if output_name == original_name: producer_node.output[i] = new_producer_name cast_node_name = new_producer_name + "_input_cast" + str(i) add_cast_node( graph, [new_producer_name], [original_name], cast_node_name, onnx_proto.TensorProto.FLOAT, ) for value_info in graph.value_info: if original_name == value_info.name: value_info.type.tensor_type.elem_type = ( onnx_proto.TensorProto.FLOAT ) # Get the node(s) that consume the model output downstream_nodes = find_downstream_node_by_input_name( graph, original_name, include_subgraphs=False, ) # It is possible that the producer node is also input to downstream nodes # So, we update the inputs of these downstream nodes for d_node in downstream_nodes: for i, input_name in enumerate(d_node.input): if input_name == original_name: d_node.input[i] = new_producer_name else: # change the output dtype to fp16 in tensor for graph_output in graph.output: if graph_output.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT: graph_output.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16 def process_node_in_block_list( graph: onnx_proto.GraphProto, global_input_name_dict: dict, op_block_list: list, node_block_list: list, ): # NB: Important to create a copy of the nodes in the graph to avoid modifying # the graph in-place while iterating (causing an infinite loop) for node in list(graph.node): if (node.op_type in op_block_list) or (node.name in node_block_list): insert_cast32_before_node(graph, node, global_input_name_dict) insert_cast16_after_node(graph, node, global_input_name_dict) # Todo: global_input_name_dict still not fill value def insert_cast32_before_node( graph: onnx_proto.GraphProto, node: onnx_proto.NodeProto, global_input_name_dict ): for i, input_name in enumerate(node.input): for value_info in itertools.chain(graph.value_info, graph.input): if input_name == value_info.name: if ( value_info.type.tensor_type.elem_type != onnx_proto.TensorProto.FLOAT16 ): break cast_output_name = node.name + "_input_cast_" + str(i) add_new_value_info( graph, value_info, cast_output_name, onnx_proto.TensorProto.FLOAT ) cast_node_name = node.name + "_input_cast" + str(i) add_cast_node( graph, [input_name], [cast_output_name], cast_node_name, onnx_proto.TensorProto.FLOAT, ) node.input[i] = cast_output_name break # Todo: global_input_name_dict still not fill value def insert_cast16_after_node( graph: onnx_proto.GraphProto, node: onnx_proto.NodeProto, global_input_name_dict ): for i, output_name in enumerate(node.output): for value_info in itertools.chain(graph.value_info, graph.output): if output_name == value_info.name: if ( value_info.type.tensor_type.elem_type != onnx_proto.TensorProto.FLOAT ): break cast_input_name = node.name + "_output_cast_" + str(i) add_new_value_info( graph, value_info, cast_input_name, onnx_proto.TensorProto.FLOAT ) value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16 cast_node_name = node.name + "_output_cast" + str(i) add_cast_node( graph, [cast_input_name], [output_name], cast_node_name, onnx_proto.TensorProto.FLOAT16, ) node.output[i] = cast_input_name break # Process tensor data in attribute of the node def process_tensor_in_node( graph: onnx_proto.GraphProto, op_block_list: list, node_block_list: list, min_positive_val, max_finite_val, ): value_info_block_list = set() # This is for later use, not in this step for node in graph.node: # NOTE: "Cast" operation cannot change its output type because it is strongly typed. if ( (node.op_type in op_block_list) or (node.name in node_block_list) or (node.op_type == "Cast") ): # if (node.op_type in op_block_list) or (node.name in node_block_list): # Only need to block the output value_info changing for output_name in node.output: value_info_block_list.add(output_name) else: for attr in node.attribute: # one tensor if attr.t.data_type == onnx_proto.TensorProto.FLOAT: attr.t.CopyFrom( convert_tensor_float_to_float16( attr.t, min_positive_val, max_finite_val ) ) # list of tensor for t in attr.tensors: if t.data_type == onnx_proto.TensorProto.FLOAT: t.CopyFrom( convert_tensor_float_to_float16( t, min_positive_val, max_finite_val ) ) return value_info_block_list # Change all the value info type from float32 to float16 if not in block list def process_value_info(graph: onnx_proto.GraphProto, value_info_block_list: list): for value_info in graph.value_info: if value_info.name in value_info_block_list: continue else: if value_info.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT: value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16 # Initializer is 'edge' type, so doesn't have value_info def process_initializers( graph: onnx_proto.GraphProto, op_block_list, node_block_list, min_positive_val, max_finite_val, ): # Find the input of the block node, don't need to change this kind of initializer initializer_block_list = set() for node in graph.node: if (node.op_type in op_block_list) or (node.name in node_block_list): for ( input_name ) in ( node.input ): # some is initializer, some is value_info, can't distinguish but doesn't matter initializer_block_list.add(input_name) # Process initializers for initializer in graph.initializer: if initializer.name not in initializer_block_list: if initializer.data_type == onnx_proto.TensorProto.FLOAT: convert_tensor_float_to_float16( initializer, min_positive_val, max_finite_val ) def get_next_level_graph( graph: onnx_proto.GraphProto, op_block_list: list, node_block_list: list ): sub_graph_list = [] for node in graph.node: if node.op_type in op_block_list or node.name in node_block_list: continue for attr in node.attribute: # Check if sub-graph exist if len(attr.g.node) > 0: # single sub-graph sub_graph_list.append(attr.g) for g in attr.graphs: if len(g.node) > 0: # multiple sub-graphs sub_graph_list.append(g) return sub_graph_list def add_cast_node( graph: onnx_proto.GraphProto, inputs: list, outputs: list, node_name: str, to_type: int, ): new_node = [helper.make_node("Cast", inputs, outputs, to=to_type, name=node_name)] graph.node.extend(new_node) def add_new_value_info( graph: onnx_proto.GraphProto, exist_value_info: onnx_proto.ValueInfoProto, name: str, dtype: int, ): new_value_info = graph.value_info.add() new_value_info.CopyFrom(exist_value_info) new_value_info.name = name new_value_info.type.tensor_type.elem_type = dtype # Find the node that has the specified output name def find_upstream_node_by_output_name(graph: onnx_proto.GraphProto, output_name: str): nodes = [] for node in graph.node: if output_name in node.output: nodes.append(node) assert len(nodes) <= 1 # Suppose there is less than one node found return nodes # Find the node that has the specified input name, including in subgraphs def find_downstream_node_by_input_name( graph: onnx_proto.GraphProto, input_name: str, include_subgraphs=True ): nodes = [] # Check nodes in current graph for node in graph.node: if input_name in node.input: nodes.append(node) if not include_subgraphs: continue # Recursively check subgraphs in node attributes for attr in node.attribute: if attr.type == onnx_proto.AttributeProto.GRAPH: # Single subgraph if len(attr.g.node) > 0: nodes.extend(find_downstream_node_by_input_name(attr.g, input_name)) # Multiple subgraphs if attr.type == onnx_proto.AttributeProto.GRAPHS: for g in attr.graphs: if len(g.node) > 0: nodes.extend(find_downstream_node_by_input_name(g, input_name)) return nodes # Remove identity node def remove_identity_node_from_model(model: onnx_proto.ModelProto): remove_identity_node_from_graph(model.graph) try: from onnx.shape_inference import infer_shapes func_infer_shape = infer_shapes model = func_infer_shape(model) return model finally: pass # Remove identity node def remove_identity_node_from_graph(graph: onnx_proto.GraphProto): for curr_node in graph.node: if curr_node.op_type == "Identity": for input_name in curr_node.input: upstream_nodes = find_upstream_node_by_output_name(graph, input_name) for u_node in upstream_nodes: if u_node is not None: u_node.output[0] = curr_node.output[0] graph.node.remove(curr_node) def convert_float_to_float16_model_path( model_path, min_positive_val=1e-7, max_finite_val=1e4, keep_io_types=False ): """ Convert tensor float type in the ONNX Model to tensor float16. *It is to fix an issue that infer_shapes func cannot be used to infer >2GB models. *But this function can be applied to all model sizes. :param model_path: ONNX Model path :return: converted ONNX ModelProto object Examples :: #Convert to ONNX ModelProto object and save model binary file: from onnxmltools.utils.float16_converter import convert_float_to_float16_model_path new_onnx_model = convert_float_to_float16_model_path('model.onnx') onnx.save(new_onnx_model, 'new_model.onnx') """ disable_shape_infer = False if pv.Version(onnx.__version__) >= pv.Version("1.8"): try: # infer_shapes_path can be applied to all model sizes from onnx.shape_inference import infer_shapes_path import tempfile import os # shape_infer_model_path should be in the same folder of model_path with tempfile.NamedTemporaryFile( dir=os.path.dirname(model_path) ) as tmpfile: shape_infer_model_path = tmpfile.name infer_shapes_path(model_path, shape_infer_model_path) model = onnx.load(shape_infer_model_path) disable_shape_infer = True finally: pass if not disable_shape_infer: model = onnx.load(model_path) return convert_float_to_float16( model, min_positive_val, max_finite_val, keep_io_types, disable_shape_infer ) def remove_unnecessary_cast_node(graph_proto: onnx_proto.GraphProto): # 1. find all cast nodes in the graph cast_node_list = [] input_name_to_cast_node_dict = {} output_name_to_cast_node_dict = {} # using name as key to point to a node. because node object cannot be key name_to_node_dict = {} for node in graph_proto.node: if node.op_type == "Cast": # if node.name not in ["graph_input_cast0", "graph_output_cast0"]: cast_node_list.append(node) name_to_node_dict[node.name] = node for input_name in node.input: input_name_to_cast_node_dict[input_name] = node for output_name in node.output: output_name_to_cast_node_dict[output_name] = node # 2. find upstream and downstream node of the cast node cast_node_upstream_dict = {} # mapping cast node(name) to its upstream node cast_node_downstream_dict = {} # mapping cast node(name) to its downstream node for current_node in graph_proto.node: # find the downstream node(s) for input_name in current_node.input: if input_name in output_name_to_cast_node_dict: # found the downstream node of the cast node, might be multiple cast_node = output_name_to_cast_node_dict[input_name] if cast_node.name not in cast_node_downstream_dict: cast_node_downstream_dict[cast_node.name] = current_node else: # already exists one downstream node, make it a list existing_downstream_nodes = cast_node_downstream_dict[ cast_node.name ] if isinstance(existing_downstream_nodes, list): existing_downstream_nodes.append(current_node) else: # make a list existing_downstream_nodes = [ existing_downstream_nodes, current_node, ] cast_node_downstream_dict[cast_node.name] = ( existing_downstream_nodes ) # find the upstream node for output_name in current_node.output: if output_name in input_name_to_cast_node_dict: # found the upstream node of the cast node, should be unique cast_node = input_name_to_cast_node_dict[output_name] cast_node_upstream_dict[cast_node.name] = current_node # 3. remove the cast node which upstream is 'Constant' for cast_node_name, upstream_node in cast_node_upstream_dict.items(): cast_node = name_to_node_dict[cast_node_name] if upstream_node.op_type == "Constant": cast_node_list.remove(cast_node) # 4. find (cast_to_fp16, cast_to_fp32) pairs where --fp32--> cast_to_fp16 --fp16--> cast_to_fp32. remove_candidate = [] name_to_value_info = { value_info.name: value_info for value_info in itertools.chain(graph_proto.value_info, graph_proto.input) } def get_type(name: str) -> Optional[int]: if name in name_to_value_info: return name_to_value_info[name].type else: # `name` has no value info. return None for cast_node_name, downstream_node in cast_node_downstream_dict.items(): cast_node = name_to_node_dict[cast_node_name] if len(cast_node.input) != 1: raise RuntimeError( f"Cast node {cast_node_name} should have only one input, but has {len(cast_node.input)}." ) input_type = get_type(cast_node.input[0]) if input_type != onnx_proto.TensorProto.FLOAT: continue if isinstance(downstream_node, list): for dn in downstream_node: if ( dn.op_type == "Cast" and dn.attribute[0].i == 32 and cast_node.attribute[0].i == 16 and dn in cast_node_list and cast_node in cast_node_list ): remove_candidate.append((cast_node, dn)) else: if ( downstream_node.op_type == "Cast" and cast_node.attribute[0].i == FLOAT16 and downstream_node.attribute[0].i == FLOAT32 and downstream_node in cast_node_list and cast_node in cast_node_list ): remove_candidate.append((cast_node, downstream_node)) # 5. change "upstream --fp32--> cast_to_fp16 --fp16--> cast_to_fp32 --fp32--> downstream" to # "upstream --fp32--> downstream". for cast_node_pair in remove_candidate: first_cast_node = cast_node_pair[0] second_cast_node = cast_node_pair[1] upstream_node = cast_node_upstream_dict.get(first_cast_node.name) downstream_node = cast_node_downstream_dict.get(second_cast_node.name) if upstream_node is None and downstream_node is not None: # The upstream_node should be graph input out = first_cast_node.input[0] for i, input_name in enumerate(downstream_node.input): for output_name in second_cast_node.output: if input_name == output_name: # change the input as the upstream node's output downstream_node.input[i] = out elif upstream_node is not None and downstream_node is None: raise ValueError( "The downstream node of the second cast node should be graph output" ) else: # find the upstream node's output to first_cast_node out = None for output_name in upstream_node.output: if output_name == first_cast_node.input[0]: out = output_name break # find the downstream node's input as second_cast_node's output for i, input_name in enumerate(downstream_node.input): for output_name in second_cast_node.output: if input_name == output_name: # change the input as the upstream node's output downstream_node.input[i] = out # 6. remove the cast node pair for cast_node_pair in remove_candidate: graph_proto.node.remove(cast_node_pair[0]) graph_proto.node.remove(cast_node_pair[1]) # Check if the model is already converted to float16 def check_if_fp16_ready(graph_proto): # Check graph input and ouput is_value_info_fp16 = False for value_info in itertools.chain( graph_proto.output, graph_proto.input, graph_proto.value_info ): if value_info.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT16: is_value_info_fp16 = True break # Check initializer is_initializer_fp16 = False for initializer in graph_proto.initializer: if initializer.data_type == onnx_proto.TensorProto.FLOAT16: is_initializer_fp16 = True break # Check cast node has_cast_node_fp16 = False for node in graph_proto.node: if node.op_type == "Cast" and node.attribute[0].i == FLOAT16: has_cast_node_fp16 = True break # Any of above flags is True, return True if is_value_info_fp16 or is_initializer_fp16 or has_cast_node_fp16: return True # already converted to float16 else: return False # not converted to float16 yet

scripts/float16.py (644 lines of code) (raw):