tinynn/converter/operators/tflite/transformable.py (550 lines of code) (raw):
from abc import abstractmethod
from .base import BaseOperator, QuantizationParameters, Tensor
from .custom import MTKTransposeConvOperator
from . import generated_ops as tfl_ops
from ..base import ExtendedOperator
from ...schemas.tflite import schema_generated as tflite
import typing
import torch
import warnings
import numpy as np
class TransformableOperator(BaseOperator):
def __init__(self, op: int, inputs: typing.List['Tensor'], outputs: typing.List['Tensor'], op_version: int):
super().__init__(op, inputs, outputs, op_version=op_version)
self.attr_count = 0
self.transform_count = 0
@abstractmethod
def transform(self):
pass
def create_attr_tensor(self, tensor, name=None, quantization=None):
if name is None:
if self.attr_count == 0:
name = self.outputs[0].name + '_te_attr'
else:
name = self.outputs[0].name + f'_te_attr_{self.attr_count}'
self.attr_count += 1
return Tensor(tensor, name, has_buffer=True, quantization=quantization)
def create_transform_tensor(self, tensor, name=None, quantization=None):
if name is None:
if self.transform_count == 0:
name = self.outputs[0].name + '_te_transform'
else:
name = self.outputs[0].name + f'_te_transform_{self.transform_count}'
self.transform_count += 1
return Tensor(tensor, name, has_buffer=False, quantization=quantization)
def wrap_ops_with_nhwc_nchw_transposes(
self, ops: typing.List[tfl_ops.BaseOperator], input_idx: int = 0, output_idx: int = 0
) -> typing.List[tfl_ops.BaseOperator]:
orig_input = ops[0].inputs[input_idx]
orig_output = ops[-1].outputs[output_idx]
if orig_input.tensor.ndim == 4:
nhwc2nchw_perm = np.array([0, 3, 1, 2], dtype='int32')
nchw2nhwc_perm = np.array([0, 2, 3, 1], dtype='int32')
elif orig_input.tensor.ndim == 5:
nhwc2nchw_perm = np.array([0, 4, 1, 2, 3], dtype='int32')
nchw2nhwc_perm = np.array([0, 2, 3, 4, 1], dtype='int32')
else:
assert False, f'Don\'t know how to wrap tranposes for {orig_input.tensor.ndim}d tensors'
nhwc2nchw_perm_tensor = self.create_attr_tensor(nhwc2nchw_perm)
nchw2nhwc_perm_tensor = self.create_attr_tensor(nchw2nhwc_perm)
new_input = self.create_transform_tensor(
np.transpose(orig_input.tensor, nchw2nhwc_perm), quantization=orig_input.quantization
)
new_output = self.create_transform_tensor(
np.transpose(orig_output.tensor, nchw2nhwc_perm), quantization=orig_output.quantization
)
nchw2nhwc_transpose = tfl_ops.TransposeOperator([orig_input, nchw2nhwc_perm_tensor], [new_input])
nhwc2nchw_transpose = tfl_ops.TransposeOperator([new_output, nhwc2nchw_perm_tensor], [orig_output])
nchw2nhwc_transpose.extra_hints['direction'] = 'up'
nhwc2nchw_transpose.extra_hints['direction'] = 'down'
ops[0].inputs[input_idx] = new_input
ops[-1].outputs[output_idx] = new_output
return [nchw2nhwc_transpose] + ops + [nhwc2nchw_transpose]
class BatchNormOperator(TransformableOperator):
input_index = 0
weight_index = 1
bias_index = 2
running_mean_index = 3
running_variance_index = 4
output_index = 0
def __init__(
self,
inputs: typing.List['Tensor'],
outputs: typing.List['Tensor'],
eps: float,
quantization: typing.Optional[QuantizationParameters] = None,
fusedActivationFunction=tflite.ActivationFunctionType.NONE,
):
super().__init__(ExtendedOperator.BATCH_NORM, inputs, outputs, 1)
self.eps = eps
self.fusedActivationFunction = fusedActivationFunction
def transform(self, graph_converter, mapping):
assert all((x.buffer is not None for x in self.inputs[1:]))
w, b, mean, var = [
self.inputs[i]
for i in (self.weight_index, self.bias_index, self.running_mean_index, self.running_variance_index)
]
inv = 1 / np.sqrt(var.tensor + self.eps)
new_w = inv * w.tensor
new_b = b.tensor - mean.tensor * new_w
inp = self.inputs[0]
new_shape = [1] + [new_w.shape[0]] + [1] * (inp.tensor.ndim - 2)
new_w = new_w.reshape(new_shape)
new_b = new_b.reshape(new_shape)
weight = self.create_attr_tensor(new_w)
bias = self.create_attr_tensor(new_b)
new_inp = inp
if inp.quantization is not None:
new_inp = self.create_transform_tensor(inp.tensor)
graph_converter.add_operator(tfl_ops.DequantizeOperator([inp], [new_inp]))
mul_out = self.create_transform_tensor(new_inp.tensor * weight.tensor)
graph_converter.add_operator(tfl_ops.MulOperator([new_inp, weight], [mul_out]))
if inp.quantization is not None:
add_out = self.create_transform_tensor(mul_out.tensor + bias.tensor)
else:
add_out = self.outputs[self.output_index]
graph_converter.add_operator(
tfl_ops.AddOperator([mul_out, bias], [add_out], fusedActivationFunction=self.fusedActivationFunction),
transform=True,
)
if inp.quantization is not None:
quant_out = self.outputs[self.output_index]
graph_converter.add_operator(tfl_ops.QuantizeOperator([add_out], [quant_out]), transform=True)
graph_converter.try_restore_edges(mapping)
class GenericConvOperator(TransformableOperator):
input_index = 0
weight_index = 1
bias_index = 2
output_index = 0
stride: typing.List[int]
padding: typing.List[int]
dilation: typing.List[int]
transpose: bool
output_padding: typing.List[int]
groups: int
fusedActivationFunction: tflite.ActivationFunctionType
def __init__(
self,
inputs: typing.List['Tensor'],
outputs: typing.List['Tensor'],
stride: typing.List[int],
padding: typing.List[int],
dialation: typing.List[int],
output_padding: typing.List[int],
groups: int,
fusedActivationFunction=tflite.ActivationFunctionType.NONE,
):
super().__init__(ExtendedOperator.GENERIC_CONV, inputs, outputs, 1)
self.stride = stride
self.padding = padding
self.dilation = dialation
self.output_padding = output_padding
self.groups = groups
self.fusedActivationFunction = fusedActivationFunction
def transform(self, graph_converter, mapping):
input_tensor = self.inputs[0]
weight_tensor = self.inputs[1]
input_dim = len(input_tensor.shape)
weight_dim = len(weight_tensor.shape)
prev_ops = []
next_ops = []
if weight_dim == 3 or input_dim == 3:
reshape_input_size = 1
reshape_output_size = 1
if weight_dim == 3:
self.stride.insert(0, 1)
self.padding.insert(0, 0)
self.dilation.insert(0, 1)
self.output_padding.insert(0, 0)
reshape_input_size = 2
reshape_outputs = [
self.create_transform_tensor(
np.expand_dims(t.tensor, 2),
name=f'{self.outputs[0].name}_{t.name}_4d_input',
quantization=t.quantization,
)
for t in self.inputs[:reshape_input_size]
]
reshape_attrs = [self.create_attr_tensor(np.array(t.shape, dtype='int32')) for t in reshape_outputs]
reshape_ops = [
tfl_ops.ReshapeOperator([old, attr], [new], attr.tensor)
for old, new, attr in zip(self.inputs[:reshape_input_size], reshape_outputs, reshape_attrs)
]
for op in reshape_ops:
op.extra_hints['direction'] = 'up'
prev_ops.extend(reshape_ops)
conv_outputs = [
self.create_transform_tensor(
np.expand_dims(self.outputs[i].tensor, 2),
name=f'{self.outputs[i].name}_4d_output',
quantization=self.outputs[i].quantization,
)
for i in range(reshape_output_size)
]
conv_attrs = [
self.create_attr_tensor(np.array(t.shape, dtype='int32')) for t in self.outputs[:reshape_output_size]
]
conv_ops = [
tfl_ops.ReshapeOperator([old, attr], [new], attr.tensor)
for old, new, attr in zip(conv_outputs, self.outputs[:reshape_output_size], conv_attrs)
]
for op in conv_ops:
op.extra_hints['direction'] = 'down'
next_ops.extend(conv_ops)
self.inputs = reshape_outputs + self.inputs[reshape_input_size:]
self.outputs = conv_outputs + self.outputs[reshape_output_size:]
weight_tensor = self.inputs[1]
elif weight_dim not in (4, 5):
assert False, "Only Conv[Transpose]1d/2d/3d is supported"
if weight_tensor.shape[1] == 1 and weight_tensor.shape[0] == self.groups:
if weight_dim in (3, 4):
conv_op = tfl_ops.DepthwiseConv2dOperator(
self.inputs,
self.outputs,
strideH=self.stride[0],
strideW=self.stride[1],
depthMultiplier=1,
dilationHFactor=self.dilation[0],
dilationWFactor=self.dilation[1],
fusedActivationFunction=self.fusedActivationFunction,
padding=tflite.Padding.VALID,
)
else:
assert False, "Only DepthwiseConv1d/2d is supported"
else:
if input_tensor.shape[1] != weight_tensor.shape[1]:
warnings.warn(
'Group conv is not supported if official tflite interpreter is used. If that is the case for you,'
' plese pass in `group_conv_rewrite=True`. If you want to run the model with TFLite micro, then you'
' may also need to pass in `tflite_micro_rewrite=True`'
)
if weight_dim in (3, 4):
conv_op = tfl_ops.Conv2dOperator(
self.inputs,
self.outputs,
strideH=self.stride[0],
strideW=self.stride[1],
dilationHFactor=self.dilation[0],
dilationWFactor=self.dilation[1],
fusedActivationFunction=self.fusedActivationFunction,
padding=tflite.Padding.VALID,
)
else:
conv_op = tfl_ops.Conv3dOperator(
self.inputs,
self.outputs,
strideD=self.stride[0],
strideH=self.stride[1],
strideW=self.stride[2],
dilationDFactor=self.dilation[0],
dilationHFactor=self.dilation[1],
dilationWFactor=self.dilation[2],
fusedActivationFunction=self.fusedActivationFunction,
padding=tflite.Padding.VALID,
)
ops = self.wrap_ops_with_nhwc_nchw_transposes([conv_op])
conv_op = ops[1]
# Pad handling
if sum(self.padding) > 0:
if weight_dim in (3, 4):
pad_h = self.padding[0]
pad_w = self.padding[1]
pad = [[0, 0], [pad_h, pad_h], [pad_w, pad_w], [0, 0]]
else:
pad_d = self.padding[0]
pad_h = self.padding[1]
pad_w = self.padding[2]
pad = [[0, 0], [pad_d, pad_d], [pad_h, pad_h], [pad_w, pad_w], [0, 0]]
pad_tensor = self.create_attr_tensor(np.array(pad, dtype='int32'))
pad_input = ops[0].outputs[0]
pad_array = np.pad(pad_input.tensor, pad)
pad_out = self.create_transform_tensor(pad_array, quantization=pad_input.quantization)
ops[1].inputs[0] = pad_out
pad_op = tfl_ops.PadOperator([pad_input, pad_tensor], [pad_out])
ops.insert(1, pad_op)
# Weight handling
weight = conv_op.inputs[1]
if conv_op.op.code == tflite.BuiltinOperator.DEPTHWISE_CONV_2D:
nchw2chwn_perm = np.array([1, 2, 3, 0], dtype='int32')
nchw2chwn_perm_tensor = self.create_attr_tensor(nchw2chwn_perm)
weight_q = weight.quantization
if weight_q is not None and weight_q.dim is not None:
new_dim = np.nonzero(nchw2chwn_perm == weight_q.dim)[0][0]
weight_q = QuantizationParameters(weight_q.scale, weight_q.zero_point, new_dim)
reordered_weight = self.create_transform_tensor(
np.transpose(weight.tensor, nchw2chwn_perm), quantization=weight_q
)
conv_op.inputs[1] = reordered_weight
reorder_op = tfl_ops.TransposeOperator([weight, nchw2chwn_perm_tensor], [reordered_weight])
else:
if weight_dim in (3, 4):
nchw2nhwc_perm = np.array([0, 2, 3, 1], dtype='int32')
nchw2nhwc_perm_tensor = self.create_attr_tensor(nchw2nhwc_perm)
else:
nchw2nhwc_perm = np.array([2, 3, 4, 1, 0], dtype='int32')
nchw2nhwc_perm_tensor = self.create_attr_tensor(nchw2nhwc_perm)
weight_q = weight.quantization
if weight_q is not None and weight_q.dim is not None:
new_dim = np.nonzero(nchw2nhwc_perm == weight_q.dim)[0][0]
weight_q = QuantizationParameters(weight_q.scale, weight_q.zero_point, new_dim)
reordered_weight = self.create_transform_tensor(
np.transpose(weight.tensor, nchw2nhwc_perm), quantization=weight_q
)
conv_op.inputs[1] = reordered_weight
reorder_op = tfl_ops.TransposeOperator([weight, nchw2nhwc_perm_tensor], [reordered_weight])
ops.insert(1, reorder_op)
# Bias handling
kernel_num = self.inputs[1].shape[0]
if conv_op.op.code in (tflite.BuiltinOperator.DEPTHWISE_CONV_2D, tflite.BuiltinOperator.CONV_3D):
kernel_num = self.inputs[1].shape[-1]
if len(conv_op.inputs) == 2 or conv_op.inputs[2] is None:
if conv_op.inputs[0].dtype == np.dtype('float32'):
bias = np.zeros((kernel_num,), dtype='float32')
q_args = None
else:
bias = np.zeros((kernel_num,), dtype='int32')
per_tensor = weight_tensor.quantization.dim is None
# Bias handling
if per_tensor:
bias_scale = input_tensor.quantization.scale * weight_tensor.quantization.scale
bias_zero_point = 0
bias_dim = None
else:
bias_scale = [input_tensor.quantization.scale * s for s in weight_tensor.quantization.scale]
bias_zero_point = [0] * len(bias_scale)
bias_dim = 0
q_args = QuantizationParameters(bias_scale, bias_zero_point, bias_dim)
conv_op.inputs.append(self.create_attr_tensor(bias, quantization=q_args))
elif conv_op.inputs[2].shape[0] != kernel_num and conv_op.inputs[2].shape[0] == 1:
if conv_op.inputs[0].dtype == np.float32:
bias = torch.tensor([conv_op.inputs[2][0]] * kernel_num, dtype='float32')
else:
bias = torch.tensor([conv_op.inputs[2][0]] * kernel_num, dtype='int32')
conv_op.inputs[2] = self.create_attr_tensor(bias)
ops = prev_ops + ops + next_ops
for op in ops:
graph_converter.add_operator(op, transform=True)
graph_converter.try_restore_edges(mapping)
for op in ops[:-1]:
output_name = op.outputs[0].name
node_name = graph_converter.tensor_node_map[output_name]
node = graph_converter.graph.vs.find(name=node_name)
assert node.outdegree() > 0, (
'The following node should be a part of the transformable node, but the outdegree of'
f' it is zero. {node}'
)
next_node = graph_converter.graph.vs[node.out_edges()[0].target]
assert next_node['node_type'] != ExtendedOperator.CONSTANT_NODE
class GenericTransposeConvOperator(TransformableOperator):
input_index = 0
weight_index = 1
bias_index = 2
output_index = 0
stride: typing.List[int]
padding: typing.List[int]
dilation: typing.List[int]
transpose: bool
output_padding: typing.List[int]
groups: int
enable_mtk_ops: bool
conv_transpose_with_bias: bool
fusedActivationFunction: tflite.ActivationFunctionType
def __init__(
self,
inputs: typing.List['Tensor'],
outputs: typing.List['Tensor'],
stride: typing.List[int],
padding: typing.List[int],
dilation: typing.List[int],
output_padding: typing.List[int],
groups: int,
enable_mtk_ops: bool = False,
conv_transpose_with_bias: bool = True,
fusedActivationFunction=tflite.ActivationFunctionType.NONE,
):
super().__init__(ExtendedOperator.GENERIC_DECONV, inputs, outputs, 1)
self.stride = stride
self.padding = padding
self.dilation = dilation
self.output_padding = output_padding
self.groups = groups
self.enable_mtk_ops = enable_mtk_ops
self.conv_transpose_with_bias = conv_transpose_with_bias
self.fusedActivationFunction = fusedActivationFunction
def transform(self, graph_converter, mapping):
input_tensor = self.inputs[0]
weight_tensor = self.inputs[1]
output_tensor = self.outputs[0]
input_dim = len(input_tensor.shape)
weight_dim = len(weight_tensor.shape)
prev_ops = []
next_ops = []
if weight_dim == 3 or input_dim == 3:
self.stride.insert(0, 1)
self.padding.insert(0, 0)
self.dilation.insert(0, 1)
self.output_padding.insert(0, 0)
reshape_outputs = [
self.create_transform_tensor(
np.expand_dims(t.tensor, 2),
name=f'{self.outputs[0].name}_{t.name}_4d_input',
quantization=t.quantization,
)
for t in self.inputs[:2]
]
reshape_attrs = [self.create_attr_tensor(np.array(t.shape, dtype='int32')) for t in reshape_outputs]
reshape_ops = [
tfl_ops.ReshapeOperator([old, attr], [new], attr.tensor)
for old, new, attr in zip(self.inputs[:2], reshape_outputs, reshape_attrs)
]
for op in reshape_ops:
op.extra_hints['direction'] = 'up'
if weight_dim == 3 and input_dim == 3:
prev_ops.extend(reshape_ops)
elif weight_dim == 3:
prev_ops.append(reshape_ops[1])
else:
prev_ops.append(reshape_ops[0])
conv_outputs = [
self.create_transform_tensor(
np.expand_dims(self.outputs[0].tensor, 2),
name=f'{self.outputs[0].name}_4d_output',
quantization=self.outputs[0].quantization,
)
]
conv_attrs = [self.create_attr_tensor(np.array(t.shape, dtype='int32')) for t in self.outputs[:1]]
conv_ops = [
tfl_ops.ReshapeOperator([old, attr], [new], attr.tensor)
for old, new, attr in zip(conv_outputs, self.outputs[:1], conv_attrs)
]
for op in conv_ops:
op.extra_hints['direction'] = 'down'
next_ops.extend(conv_ops)
if weight_dim == 3 and input_dim == 3:
self.inputs = reshape_outputs + self.inputs[2:]
elif weight_dim == 3:
self.inputs = self.inputs[0:1] + reshape_outputs[1:2] + self.inputs[1:]
else:
self.inputs = reshape_outputs[0:1] + self.inputs[1:]
self.outputs = conv_outputs + self.outputs[1:]
weight_tensor = self.inputs[1]
elif weight_dim not in (4, 5):
assert False, "Only Conv[Transpose]1d/2d/3d is supported"
if output_tensor.shape[1] != weight_tensor.shape[1]:
warnings.warn(
'Group transposed conv is not supported if official tflite interpreter is used. If that is the case'
' for you, plese pass in `group_conv_rewrite=True`. If you want to run the model with TFLite micro,'
' then you may also need to pass in `tflite_micro_rewrite=True`'
)
if weight_dim in (3, 4):
assert all((x == 1 for x in self.dilation)), "Only dilation=1 is supported for conv_transpose2d"
if self.enable_mtk_ops:
conv_op = MTKTransposeConvOperator(
self.inputs[:2][::-1],
self.outputs,
depth_multiplier=1,
dilation_height_factor=self.dilation[0],
dilation_width_factor=self.dilation[1],
padding_type=tflite.Padding.VALID,
stride_height=self.stride[0],
stride_width=self.stride[1],
)
else:
conv_op = tfl_ops.TransposeConvOperator(
self.inputs[:2][::-1],
self.outputs,
strideH=self.stride[0],
strideW=self.stride[1],
padding=tflite.Padding.VALID,
fusedActivationFunction=self.fusedActivationFunction,
)
else:
conv_op = tfl_ops.Conv3dTransposeOperator(
self.inputs[:2][::-1],
self.outputs,
strideD=self.stride[0],
strideH=self.stride[1],
strideW=self.stride[2],
dilationDFactor=self.dilation[0],
dilationHFactor=self.dilation[1],
dilationWFactor=self.dilation[2],
padding=tflite.Padding.VALID,
fusedActivationFunction=self.fusedActivationFunction,
)
ops = self.wrap_ops_with_nhwc_nchw_transposes([conv_op], input_idx=1)
# Pad handling
output_shape = conv_op.outputs[0].shape
if sum(self.padding) > 0:
if weight_dim in (3, 4):
pad_h = self.padding[0]
pad_w = self.padding[1]
start = np.array([0, pad_h, pad_w, 0], dtype='int32')
pad_sizes = ((0, 0), (pad_h, pad_h), (pad_w, pad_w), (0, 0))
else:
pad_d = self.padding[0]
pad_h = self.padding[1]
pad_w = self.padding[2]
start = np.array([0, pad_d, pad_h, pad_w, 0], dtype='int32')
pad_sizes = ((0, 0), (pad_d, pad_d), (pad_h, pad_h), (pad_w, pad_w), (0, 0))
size = np.array(ops[1].outputs[0].shape, dtype='int32')
start_tensor = self.create_attr_tensor(start)
size_tensor = self.create_attr_tensor(size)
slice_out = ops[1].outputs[0]
pad_array = np.pad(self.outputs[0].tensor, pad_sizes)
slice_input = self.create_transform_tensor(pad_array, quantization=self.outputs[0].quantization)
ops[1].outputs[0] = slice_input
slice_op = tfl_ops.SliceOperator([slice_input, start_tensor, size_tensor], [slice_out])
output_shape = slice_input.shape
ops.insert(2, slice_op)
# Output shape handling
output_shape_tensor = self.create_attr_tensor(np.array(output_shape, dtype='int32'))
conv_op.inputs.insert(0, output_shape_tensor)
# Weight handling
weight = conv_op.inputs[1]
if weight_dim in (3, 4):
nchw2chwn_perm = np.array([1, 2, 3, 0], dtype='int32')
else:
nchw2chwn_perm = np.array([2, 3, 4, 1, 0], dtype='int32')
nchw2chwn_perm_tensor = self.create_attr_tensor(nchw2chwn_perm)
reordered_weight = self.create_transform_tensor(
np.transpose(weight.tensor, nchw2chwn_perm), quantization=weight.quantization
)
conv_op.inputs[1] = reordered_weight
reorder_op = tfl_ops.TransposeOperator([weight, nchw2chwn_perm_tensor], [reordered_weight])
ops.insert(1, reorder_op)
# Bias handling
if self.enable_mtk_ops or self.conv_transpose_with_bias:
kernel_num = output_tensor.shape[1]
if len(self.inputs) > 2 and self.inputs[2].shape[0] != kernel_num and self.inputs[2].shape[0] == 1:
if conv_op.inputs[-1].dtype == np.float32:
bias = torch.tensor([self.inputs[2][0]] * kernel_num, dtype='float32')
else:
bias = torch.tensor([self.inputs[2][0]] * kernel_num, dtype='int32')
conv_op.inputs.append(self.create_attr_tensor(bias))
else:
if len(self.inputs) == 2 or self.inputs[2] is None:
if conv_op.inputs[-1].dtype == np.dtype('float32'):
bias = np.zeros((kernel_num,), dtype='float32')
q_args = None
else:
bias = np.zeros((kernel_num,), dtype='int32')
else:
bias = self.inputs[2].tensor
q_args = None
if bias.dtype != np.dtype('float32'):
per_tensor = weight_tensor.quantization.dim is None
# Bias handling
if per_tensor:
bias_scale = input_tensor.quantization.scale * weight_tensor.quantization.scale
bias_zero_point = 0
bias_dim = None
else:
bias_scale = [input_tensor.quantization.scale * s for s in weight_tensor.quantization.scale]
bias_zero_point = [0] * len(bias_scale)
bias_dim = 0
q_args = QuantizationParameters(bias_scale, bias_zero_point, bias_dim)
conv_op.inputs.append(self.create_attr_tensor(bias, quantization=q_args))
else:
if len(self.inputs) > 2 and self.inputs[2] is not None:
bias_tensor = self.inputs[2]
add_out = ops[-2].outputs[0]
bias_transform = self.create_transform_tensor(
add_out.tensor.copy(), quantization=self.outputs[0].quantization
)
ops[-2].outputs[0] = bias_transform
ops.insert(len(ops) - 1, tfl_ops.AddOperator([bias_transform, bias_tensor], [add_out]))
ops = prev_ops + ops + next_ops
for op in ops:
graph_converter.add_operator(op)
graph_converter.try_restore_edges(mapping)