void CNTKToONNXHelper::ProcessInputs()

in Source/CNTKv2LibraryDll/proto/onnx/CNTKToONNX.cpp [5968:6316]
235 lines of code
81 McCabe index (conditional complexity)

void CNTKToONNXHelper::ProcessInputs(const FunctionPtr& src,
    onnxruntime::Graph* graph,
    std::unordered_map<FunctionPtr, onnxruntime::Node*>& functionNodes,
    std::unordered_map<Variable, onnxruntime::Node*>& variableNodes,
    std::vector<onnxruntime::NodeArg *>& inputs,
    std::vector<ScanLoop> &scanLoops, int createLoopIndex)
{
    std::string cntkOpName = ToLegacyString(ToUTF8(src->OpName()));
    std::string onnxOpName = ToOPName(src);

    std::vector<FunctionPtr> fs;
    for (size_t inputIndex = 0; inputIndex < src->Inputs().size(); ++inputIndex)
    {
        auto input = src->Inputs()[inputIndex];

        while (input.IsPlaceholder())
        {
            input = input.BlockFunctionVariableMapping();
            if (!input.IsInitialized())
                LogicError("Node '%S': Placeholder isn't supported currently.", src->AsString().c_str());
        }

        // of the pattern so that complex patterns become not skipped as a whole.
        // retry SkipBatchAndSequenceAxisInput shall be good enough.
        input = SkipBatchAndSequenceAxisInput(input);
        //// UnpackBatchAxis and ToBatchAxis is a noop in ONNX
        //bool dynamicAxisPackUnpackSkipped = false;

        //// TODO: to skip a batch/sequence pack/uppack, we need
        //// to ensure src only sees its direct inputs to maintain dynamic axis semantic of CNTK ops.
        //// However, if batch size is not FreeBatchSize, we need to keep the batch size, not the #.
        //// For example (in c++ shape order):
        //// (1987, 600) -> ToBatchAxis -> (1987, 600)        // because 1987 != FreeBatchSize
        //// ElementTimes with [#][600] -> (1987, 600)
        //// if we keep CNTK dynamic semantics:
        //// (1987, 600) -> ToBatchAxis -> [#](600, )
        //// ElementTimes with [#][600] -> (#, 600) which is (1, 600)
        //if (dynamicAxisPackUnpackSkipped)
        //    input = SkipBatchAndSequenceAxisInput(input);

        // Input might be a placeholder after skipping.
        while (input.IsPlaceholder())
        {
            input = input.BlockFunctionVariableMapping();
            if (!input.IsInitialized())
                LogicError("Node '%S': Placeholder isn't supported currently.", src->AsString().c_str());
        }

        // Special case handling of LayerNormalization layer because it changes
        // ops dynamically based on value of inputs. If more such cases ops are seen,
        // this should be abstracted out from here.
        if (ToLegacyString(ToUTF8(src->OpName())) == "LayerNormalization")
        {
            // If non-zero epsilon was specified, a fourth input is included
            // which must be ignored because we cannot export epsilon to ONNX.
            // See LayerNormalization branch in AddNode() below.
            if (src->Inputs().size() == 4 && inputIndex == 0 && input.IsConstant())
                continue;
        }
        else if (ToLegacyString(ToUTF8(src->OpName())) == "Crop")
        {
            // Export only the first input. In ONNX Crop accepts only 1 input, and there is no notion of referent input.
            if (inputIndex > 0)
                continue;
        }

        if ((src->OpName() == L"Sequence::Slice" || src->OpName() == L"Sequence::IsFirst" || src->OpName() == L"Sequence::IsLast") && inputIndex != src->Inputs().size() - 1)
        {
            // for these sequence ops, only the last input is the real valid input.
            continue;
        }
        else if (FilterInput(src, input, inputIndex))
            continue;

        //
        // Get unique name based on user-defined name if available, otherwise use our internal unique name ID.
        //
        std::string inputName = [&]() {
            auto inputItr = compositeOutputsMap.find(input);
            if (inputItr != compositeOutputsMap.end())
                return UniqueNodeNameStorage::GetUniqueInputNodeName(inputItr->second);
            else
                return UniqueNodeNameStorage::GetUniqueInputNodeName(input);
        }();

        bool isConstant = (input.IsParameter() || input.IsConstant());
        // sequence convolution has different indexing which cannot be handled by IgnoreConstantAndParameter
        if (src->OpName() != L"Convolution" || !src->Outputs()[0].HasSequenceAxis())
            isConstant &= !Operators::IgnoreConstantAndParameter(src->OpName(), inputIndex);

        bool isInSubGraph = createLoopIndex >= 0 && createLoopIndex < scanLoops.size();

        bool isScanInputInSubgraph = createLoopIndex != -1 &&
            std::find_if(scanLoops[createLoopIndex].m_scanInputs.begin(), scanLoops[createLoopIndex].m_scanInputs.end(),
                [inputName](Variable v) { return inputName == UniqueNodeNameStorage::GetUniqueInputNodeName(v); }) != scanLoops[createLoopIndex].m_scanInputs.end();

        bool isOutputOfStepFunction = input.Owner() &&
            (input.Owner()->OpName() == L"PastValue" || input.Owner()->OpName() == L"FutureValue");

        onnx::TypeProto inputArgType;

        if (isOutputOfStepFunction)
        {
            if (isInSubGraph)
            {
                // need to take input from step function's initial state (second input to the step function)
                // if initial state is a scalar, it will be created with correct shape later in this method.

                ScanLoop &scanLoop = scanLoops[createLoopIndex];
                // one intial state may map to multiple final states.
                // to make one to one mapping from initial to final states,
                // we have to split the inital state.
                inputName = MakeInitialStateNodeArgName(input);
                inputArgType = ToTypeProto(input.Shape(), input.HasBatchAxis(), input.HasSequenceAxis());
            }
        }
        else if (input.Owner() && ONNX::Operators::IsRNNOp(ToLegacyString(ToUTF8(input.Owner()->OpName()))) &&
            isInSubGraph)
        {
            // we are processing subgraph and hit LSTM block.
            // Because LSTM is constructed as a whole compositeOutputsMap does not have map for LSTM block.
            // Now LSTM is in the loop. The LSTM block is decomposed in scan loop.
            // So we need to use its internal names (instead of block names).
            BlockFunction* block = dynamic_cast<BlockFunction *>(input.Owner().get());

            // from block to underlying
            std::unordered_map<Variable, Variable> bm = block->CompositeOutputsMap();
            if (bm.find(input) == bm.end())
                LogicError("cannot map PastValue/Future's input to LSTM underlying output");

            inputName = UniqueNodeNameStorage::GetUniqueInputNodeName(bm[input]);
        }

        //
        // If this input is output, then it is the ouput of an up stream node. Recursively add all upstream nodes.
        // Pretty much, we are doing DFS.
        //
        if (input.IsOutput())
            // fs.push_back(input.Owner());
            CreateNode(input.Owner(), graph, functionNodes, variableNodes,
                scanLoops, createLoopIndex);

        if (cntkOpName == "Splice")
        {
            // for ops like Concat, batch axis may exist in one of the operand
            // CNTK allows the other operand(s) not having batch axis. But ONNX
            // requires operands to have the same rank
            inputArgType = ToTypeProto(input.Shape(), OpInputsHasBatchAxis(src), input.HasSequenceAxis());
        }
        else if (cntkOpName == "ImageScaler")
        {
            // TODO: verify - ONNX specifies that ImageScaler always need a batch axis
            inputArgType = ToTypeProto(input.Shape(), true);
        }
        else if (cntkOpName == "Convolution")
        {
            const size_t ConvWeightIndex = 0u;
            const size_t ConvOperandIndex = 1u;
            NDShape inputShape = input.Shape();
            if (inputIndex == ConvWeightIndex)
            {
                // CNTK kernel shape can omit the out channel axis if its value equals to 1.
                // On the other hand, ONNX spec requires out channel axis to be explicitly set.
                // w: [O x C x W x H], operand: [N] x [C x W x H].
                // Thus insert the emulated out channel axis if needed.
                const NDShape& operandShape = src->Inputs()[ConvOperandIndex].Shape();
                if (operandShape.Rank() >= inputShape.Rank())
                    inputShape = inputShape.AppendShape({ 1 });
                assert(inputShape.Rank() == (operandShape.Rank() + 1));
            }
            inputArgType = ToTypeProto(inputShape, input.HasBatchAxis(), input.HasSequenceAxis());
        }
        else
        {
            inputArgType = ToTypeProto(input.Shape(), input.HasBatchAxis(), input.HasSequenceAxis());

            if (isConstant && cntkOpName == "BatchNormalization" && (inputIndex > 0 && inputIndex <= 4))
            {
                // In case of BatchNormalization, if data (input[0]) is of type FP16, then all BN stats(inputs[1:4])
                // need to be converted from FP32 to FP16 prior to getting exported to ONNX
                if (src->Inputs()[0].GetDataType() == DataType::Float16)
                    input = Utils::ConvertVariableType<float, float16>(input, true);

                //// This is a workaround allowing CNTK V1 pretrained models to continue running after removal of sequence axis from input
                if ((src->Attributes()[L"spatial"].Value<bool>() ? 1 : 0) && input.Shape().Rank() > 1)
                    inputArgType = ToTypeProto(input.Shape().SubShape(0, 1), input.HasBatchAxis(), input.HasSequenceAxis());
            }
        }

        // TODO: if it is an identity op, we shall peek its input node to find the correct tensor element type.

        if (onnxOpName == "Identity")
        {
            // shall match the type of the same name NodeArg from upstream.
            string inputNodeArgName = UniqueNodeNameStorage::GetUniqueInputNodeName(input);
            if (!TryMatchNodeArgType(inputArgType, graph, inputNodeArgName))
                UpdateONNXType(src->Inputs()[0].GetDataType(), inputArgType);
        }
        else if (OpNeedONNXTypeMap(cntkOpName))
        {
            if (!input.IsOutput())
            {
                MapAndUpdateONNXType(onnxOpName, true, inputIndex, input.GetDataType(), &inputArgType);
            }
            else
            {
                // input NodeArg has already been created as an output NodeArg of the previous function node.
                // a Cast op needs to be inserted to get the desired type in ONNX.
                TensorProto_DataType onnx_type = MapAndUpdateONNXType(onnxOpName, true, inputIndex, input.GetDataType(), nullptr);
                if (ConvertDataTypeCNTKToTensorProto(input.GetDataType()) != onnx_type)
                {
                    UpdateONNXType(input.GetDataType(), inputArgType);
                    onnxruntime::NodeArg &castInputArg = graph->GetOrCreateNodeArg(inputName, &inputArgType);
                    onnxruntime::Node* castNode = AddCastNode(castInputArg, graph, onnx_type, ToLegacyString(ToUTF8(src->Uid())));
                    inputs.push_back(const_cast<NodeArg *>(castNode->OutputDefs()[0]));

                    // we already completed preparation of this input and can proceed to the next input.
                    continue;
                }
                else if (isInSubGraph)
                {
                    //
                    UpdateONNXType(input.GetDataType(), inputArgType);
                }
            }
        }
        else
        {
            UpdateONNXType(input.GetDataType(), inputArgType);
        }

        bool addedInitializer = false;
        //
        // Leaf nodes are data entry to the graph and need their own node with only output arg.
        //
        if (isConstant)
        {
            if (variableNodes.find(input) == variableNodes.end())
            {
                if (input.IsParameter() || input.IsConstant())
                {
                    auto srcTensor = input.IsParameter() ? Parameter(input).Value() : Constant(input).Value();

                    onnx::TensorProto dstTensor;
                    dstTensor.set_name(inputName);

                    CopyTensor(srcTensor, dstTensor, &inputArgType);
                    if (CNTKToONNXHelper::globalGraph && createLoopIndex != -1)
                    {
                        scanLoops[createLoopIndex].initializerAsInput.push_back(inputName);

                        // With Bing.Malta50.proto1_128_gru_normv3_ep3_z.model, I can only got ONNX runtime
                        // to produce matching results by putting initializers in the subgraphs
                        // (calling graph->AddInitializedTensor instead).
                        CNTKToONNXHelper::globalGraph->AddInitializedTensor(dstTensor);
                        // graph->AddInitializedTensor(dstTensor);
                        addedInitializer = true;
                    }
                    else
                        graph->AddInitializedTensor(dstTensor);
                }
            }
        }

        onnxruntime::NodeArg *adjusted = nullptr;
        if ((isOutputOfStepFunction && isInSubGraph) || isScanInputInSubgraph)
        {
            inputName = MakeScanInputOutputNodeArgName(inputName);
            
            // in case of broadcast, we want the input name unchanged. 
            // The inserted reshape op is treated as being inside of the scan subgraph.
            adjusted = GetInputAdjustmentForBroadcast(graph, src, input, inputIndex, inputArgType, inputName);
        }
        else
        {
            adjusted = GetInputAdjustmentForBroadcast(graph, src, input, inputIndex, inputArgType);
        }

        onnxruntime::NodeArg &inputArg = adjusted == nullptr ? graph->GetOrCreateNodeArg(inputName, &inputArgType) : *adjusted;
        if (addedInitializer)
        {
            graph->AddOuterScopeNodeArg(inputArg.Name());
        }

        inputs.push_back(&inputArg);

        if (cntkOpName == "Reshape")
        {
            // ONNX1.2 reshape node take shape as input instead of attribute.

            // We can construct the shape input for onnx by two ways: 1. cntk node output shape, or 2. cntk node attribute "newShape".
            // If there attribute "newShape" is missing, or attributes "beginAxis" and "endAxis" exists, we use cntk node output shape.
            // such that we don't need to duplicate the shape inference logic here.
            // Otherwise we use the cntk node attribute "newShape".
            bool useOutputShape = [&]() {
                if (!src->Attributes().Contains(L"newShape") || ((NDShape)src->Attributes()[L"newShape"].Value<NDShape>()).Rank() == 0)
                    return true;
                if (src->Attributes().Contains(L"beginAxis") && ((Axis)src->Attributes()[L"beginAxis"].Value<Axis>()).StaticAxisIndex() != 0)
                    return true;
                if (src->Attributes().Contains(L"endAxis") && ((Axis)src->Attributes()[L"endAxis"].Value<Axis>()).StaticAxisIndex() != src->Inputs()[0].Shape().Rank())
                    return true;
                return false;
            }();
            const NDShape shape = useOutputShape ? src->Output().Shape() : (NDShape)src->Attributes()[L"newShape"].Value<NDShape>();
            const NDShape inputShape = src->Inputs()[0].Shape();
            std::vector<int64_t> newShapeVec;
            size_t numInferredDimensions(0);
            // If output has batch axis, then create an output shape (which goes in as input to the
            // ONNX node) with an additional axis for batch axis (1).
            // ONNX dimensions are left aligned
            if (src->Output().HasSequenceAxis() && !isInSubGraph)
                newShapeVec.push_back(NDShape::FreeDimension);
            if (src->Output().HasBatchAxis())
                newShapeVec.push_back(BatchSizeProcessor::FreeBatchSize());
            for (int i = 0; i < shape.Rank(); i++)
            {
                int indexToOutputShape = shape.Rank() - i - 1;
                int indexToInputShape = inputShape.Rank() - i - 1;
                const auto& axisSize = shape.Dimensions()[indexToOutputShape];
                if (axisSize == NDShape::InferredDimension)
                {
                    numInferredDimensions++;
                    if (numInferredDimensions > 1)
                        LogicError("Reshape: Multiple InferredDimension not supported by ONNX.");
                    else
                        newShapeVec.push_back(ReshapeInferredDim);
                }
                else if (axisSize == NDShape::FreeDimension &&
                    indexToInputShape >= 0 && inputShape[indexToInputShape] != NDShape::FreeDimension)
                {
                    numInferredDimensions++;
                    if (numInferredDimensions > 1)
                        LogicError("Reshape: Multiple InferredDimension not supported by ONNX.");
                    newShapeVec.push_back(ReshapeInferredDim);
                }
                else // REVIEW SPTIWARI: Should we fill 0 for FreeDimension here?
                    newShapeVec.push_back(static_cast<int64_t>(axisSize));
            }

            // std::reverse(newShapeVec.begin(), newShapeVec.end());
            onnx::TypeProto shapeInputArgType = ToTypeProto(std::vector<int64_t>({ (int64_t)newShapeVec.size() }));
            shapeInputArgType.mutable_tensor_type()->set_elem_type(onnx::TensorProto_DataType_INT64);

            onnxruntime::NodeArg &shapeInputArg = graph->GetOrCreateNodeArg(ToLegacyString(ToUTF8(src->Output().Uid())) + "_shape", &shapeInputArgType);
            inputs.push_back(&shapeInputArg);
            AddShapeInitializer(shapeInputArg.Name(), newShapeVec, graph);
        }
    }
}