in tensorflow/lite/micro/kernels/xtensa/transpose_conv.cc [236:376]
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFilterTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 4)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(
context,
input->type == filter->type ||
(input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
"Hybrid models are not supported on TFLite Micro.");
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
reference_ops::TransposeConv(
data.params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
tflite::micro::GetTensorShape(nullptr), nullptr);
break;
}
case kTfLiteInt8: {
int32_t* scratch_buffer = static_cast<int32_t*>(
context->GetScratchBuffer(context, data.scratch_buffer_index));
reference_integer_ops::TransposeConv(
data.params, data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output),
tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
break;
}
case kTfLiteInt16: {
std::int64_t* scratch_buffer = static_cast<int64_t*>(
context->GetScratchBuffer(context, data.scratch_buffer_index));
// TODO(b/192090531): Remove this once all 8x16 transpose conv models use
// 64-bit biases.
if (bias->type == kTfLiteInt16) {
std::int64_t* bias_converted_buffer =
static_cast<int64_t*>(context->GetScratchBuffer(
context, data.bias_converted_buffer_index));
for (int i = 0; i < tflite::micro::GetTensorShape(bias).FlatSize();
i++) {
bias_converted_buffer[i] = bias->data.i16[i];
}
reference_integer_ops::TransposeConv(
data.params, data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias), bias_converted_buffer,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output),
tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
} else {
#if defined(HIFI4_INTERNAL)
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
const RuntimeShape& filter_shape =
tflite::micro::GetTensorShape(filter);
const RuntimeShape& output_shape =
tflite::micro::GetTensorShape(output);
const int stride_width = data.params.stride_width;
const int stride_height = data.params.stride_height;
const int pad_width = data.params.padding_values.width;
const int pad_height = data.params.padding_values.height;
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int16_t* input_data =
tflite::micro::GetTensorData<int16_t>(input);
const int8_t* filter_data =
tflite::micro::GetTensorData<int8_t>(filter);
const int64_t* bias_data = tflite::micro::GetTensorData<int64_t>(bias);
int16_t* output_data = tflite::micro::GetTensorData<int16_t>(output);
const int num_elements = output_shape.FlatSize();
for (int b = 0; b < batches; b++) {
xa_nn_transpose_conv(
&output_data[b * output_height * output_width * output_depth],
const_cast<WORD16*>(
&input_data[b * input_height * input_width * input_depth]),
const_cast<WORD8*>(filter_data), const_cast<WORD64*>(bias_data),
stride_width, stride_height, pad_width, pad_height, input_depth,
output_depth, input_height, input_width, filter_height,
filter_width, output_height, output_width, num_elements / batches,
data.per_channel_output_shift, data.per_channel_output_multiplier,
&scratch_buffer[b * output_height * output_width * output_depth]);
}
#else
reference_integer_ops::TransposeConv(
data.params, data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<std::int64_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output),
tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
#endif
}
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}