void CNTKToONNXHelper::CopyAttributes()

in Source/CNTKv2LibraryDll/proto/onnx/CNTKToONNX.cpp [6615:7162]


void CNTKToONNXHelper::CopyAttributes(const FunctionPtr& src, onnxruntime::Node* node)
{
    auto lookup = Operators::CntkToONNXLookup();
    assert(lookup.count(src->OpName()) != 0);

    std::string opName = ToLegacyString(ToUTF8(src->OpName()));
    if (lookup.count(src->OpName()) == 1 && src->OpName() != L"Unpooling")
    {
        auto attributesMap = lookup.find(src->OpName())->second.map;
        opName = attributesMap[src->OpName()];

        if (src->OpName() == L"Clip")
        {
            if (src->Inputs().size() != 3)
            {
                LogicError("Clip should have 3 inputs.");
            }
            float minValue = src->Inputs()[1].Value()->AsScalar<float>();
            float maxValue = src->Inputs()[2].Value()->AsScalar<float>();
            node->AddAttribute("min", minValue);
            node->AddAttribute("max", maxValue);
        }
        else if (src->OpName() == L"Cast")
        {
            DataType newDataType = static_cast<DataType>(src->Attributes()[L"newDataType"].Value<int>());
            int64_t to = static_cast<int64_t>(ConvertDataTypeCNTKToTensorProto(newDataType));
            node->AddAttribute(attributesMap[L"newDataType"], to);
        }
        if (src->OpName() == L"BatchNormalization")
        {
            auto spatial = (int64_t)((bool)src->Attributes()[L"spatial"].Value<bool>() ? 1 : 0);
            if (spatial != 1)
            {
                LogicError("BatchNormalization spatial should be true.");
            }
            auto normalizationTimeConstant = (float)src->Attributes()[L"normalizationTimeConstant"].Value<double>();
            auto epsilon = (float)src->Attributes()[L"epsilon"].Value<double>();

            //
            // onnx: running_mean = running_mean * momentum + mean * (1 - momentum)
            // cntk: expAvgFactor * MB stats + (1-expAvgFactor) * prev running stats
            //
            auto momentum = 0.0f;
            if (!isfinite(normalizationTimeConstant))
                momentum = 1.0f;
            else if (normalizationTimeConstant > 0)
                momentum = 1.0f + expm1(-48.0f / normalizationTimeConstant);

            node->AddAttribute(attributesMap[L"epsilon"], epsilon);
            node->AddAttribute("momentum", momentum);
        }
        else if (src->OpName() == L"LocalResponseNormalization")
        {
            auto depthRadius = (int64_t)src->Attributes()[L"depthRadius"].Value<size_t>();
            auto bias = (float)src->Attributes()[L"bias"].Value<double>();
            auto alpha = (float)src->Attributes()[L"alpha"].Value<double>();
            auto beta = (float)src->Attributes()[L"beta"].Value<double>();

            node->AddAttribute(attributesMap[L"size"], 2*depthRadius + 1);
            node->AddAttribute(attributesMap[L"bias"], bias);
            node->AddAttribute(attributesMap[L"alpha"], alpha);
            node->AddAttribute(attributesMap[L"beta"], beta);
        }
        else if (src->OpName() == L"ELU")
        {
            float alpha = 1.0f;
            if (src->Attributes().Contains(L"alpha"))
                alpha = (float)src->Attributes()[L"alpha"].Value<double>();
            node->AddAttribute("alpha", alpha);
        }
        else if (src->OpName() == L"LeakyReLU")
        {
            auto alpha = 0.01f;
            if (src->Attributes().Contains(L"alpha"))
                alpha = (float)src->Attributes()[L"alpha"].Value<double>();
            node->AddAttribute("alpha", alpha);
        }
        else if (src->OpName() == L"SELU")
        {
            auto alpha = 1.6732f;
            if (src->Attributes().Contains(L"alpha"))
                alpha = (float)src->Attributes()[L"alpha"].Value<double>();

            auto gamma = 1.0507f;
            if (src->Attributes().Contains(L"gamma"))
                gamma = (float)src->Attributes()[L"gamma"].Value<double>();

            node->AddAttribute("alpha", alpha);
            node->AddAttribute("gamma", gamma);
        }
        else if (src->OpName() == L"Dropout")
        {
            auto dropoutRate = (float)src->Attributes()[L"dropoutRate"].Value<double>();
            node->AddAttribute(attributesMap[L"dropoutRate"], dropoutRate);
        }
        else if ((src->OpName() == L"RandomDistribution") ||
                 (src->OpName() == L"UniformRandom") || (src->OpName() == L"NormalRandom") ||
                 (src->OpName() == L"UniformRandomLike") || (src->OpName() == L"NormalRandomLike"))
        {
            auto randomArgs = AsVector<double>(src->Attributes()[L"randomDistributionArgs"].Value<std::vector<DictionaryValue>>());
            auto seed = (int64_t)src->Attributes()[L"rngSeed"].Value<int>();

            if ((src->OpName() == L"UniformRandom") || (src->OpName() == L"UniformRandomLike"))
            {
                node->AddAttribute("low", (float)randomArgs[0]);
                node->AddAttribute("high", (float)randomArgs[1]);
            }
            else
            {
                node->AddAttribute("mean", (float)randomArgs[0]);
                node->AddAttribute("scale", (float)randomArgs[1]);
            }

            node->AddAttribute(attributesMap[L"rngSeed"], seed);
            if ((src->OpName() == L"UniformRandom") || (src->OpName() == L"NormalRandom"))
            {
                auto shape = (NDShape)src->Attributes()[L"newShape"].Value<NDShape>();
                node->AddAttribute(attributesMap[L"newShape"], ToINTS(shape));
            }
        }
        else if (src->OpName() == L"ReduceL1" || src->OpName() == L"ReduceL2" || src->OpName() == L"ReduceSumSquare")
        {
            SetReduceElementsAttributes(src, node, false);
        }
        else if (src->OpName() == L"TransposeAxes")
        {
            if (src->Attributes().Contains(L"axisVec"))
            {
                std::vector<Axis> permutation = AsVector<Axis>(src->Attributes()[L"axisVec"].Value<std::vector<DictionaryValue>>());
                // CNTK permutation attribute is argsorted. Shall redo argsort (undo) to get the original python/ONNX perm attribute.
                std::vector<int64_t> perm = ConvertPermutationCNTKToONNX(permutation, src->Inputs()[0].HasBatchAxis());
                node->AddAttribute(attributesMap[L"axisVec"], perm);
            }
            else if (src->Attributes().Contains(L"axis1") && src->Attributes().Contains(L"axis2"))
            {
                // swapaxis: permutation is between two axes
                int rank = src->Output().Shape().Rank();
                std::vector<int64_t> perm;
                bool hasBatchAxis = src->Inputs()[0].HasBatchAxis();
                // TODO: handle hasSequenceAxis cases
                for (int index = 0; index < (hasBatchAxis ? (rank + 1) : rank); index++)
                {
                    perm.push_back(index);
                }

                Axis axis1 = (Axis)(src->Attributes()[L"axis1"].Value<Axis>()).StaticAxisIndex();
                Axis axis2 = (Axis)(src->Attributes()[L"axis2"].Value<Axis>()).StaticAxisIndex();
                // It is safe here to assume that the axis is a static axis.
                int64_t axisIndex1 = ConvertAxisToOnnx(axis1, src->Inputs()[0]);
                int64_t axisIndex2 = ConvertAxisToOnnx(axis2, src->Inputs()[0]);
                const NodeArg* inputNodeArg = node->InputDefs()[0];
                const NodeArg* outputNodeArg = node->OutputDefs()[0];
                if (inputNodeArg->Shape()->dim_size() <= (size_t)axisIndex1 ||
                    inputNodeArg->Shape()->dim_size() <= (size_t)axisIndex2 ||
                    outputNodeArg->Shape()->dim_size() <= (size_t)axisIndex1 ||
                    outputNodeArg->Shape()->dim_size() <= (size_t)axisIndex2)
                    LogicError("tranpose axis out of range");

                if ((inputNodeArg->Shape()->dim((int)axisIndex1).dim_param() == FreeSequenceDimParam &&
                     axisIndex1 == 0 && axisIndex2 != 1) ||
                    (inputNodeArg->Shape()->dim((int)axisIndex2).dim_param() == FreeSequenceDimParam &&
                     axisIndex2 == 0 && axisIndex1 != 1) &&
                        inputNodeArg->Shape()->dim(1).dim_value() == BatchSizeProcessor::FreeBatchSize())
                {
                    // permutation with sequience axis. but sequence axis is already swapped with batch axis
                    // so swap back batch axis (at position 1) first and then swap position 1 with the other axis
                    // TODO: more test is needed to cover general cases where batch and sequence axis are involved
                    // in a Transpose.
                    // following example with axisIndex1, axisIndex2 = 2, 0
                    //  (ConvertAxisToOnnx return 0 for FreeDimension sequence axis)
                    // this is what shall happen
                    // CNTK                 ONNX
                    // [#][*, d]            [*, #, d]
                    // [#][d, *]            [#, d, *]
                    // this is what would happen if we do not treat it as a special case
                    // [#][*, d]            [*, #, d]
                    // [#][d, *]            [d, #, *]

                    // move batch axis to 0 position
                    perm[0] = 1;
                    // move sequence and the other axis
                    if (axisIndex1 != 0)
                    {
                        perm[1] = axisIndex1;
                        perm[axisIndex1] = 0;
                    }
                    else
                    {
                        perm[1] = axisIndex2;
                        perm[axisIndex2] = 0;
                    }
                }
                else
                {
                    std::swap(perm[axisIndex1], perm[axisIndex2]);
                }
                node->AddAttribute(attributesMap[L"axisVec"], perm);
            }
        }
        else if (src->OpName() == L"Slice")
        {
            std::vector<int> beginIndex;
            std::vector<int> endIndex;

            if (src->Attributes().Contains(L"axisVec"))
            {
                std::vector<Axis> sliceAxes = AsVector<Axis>(src->Attributes()[L"axisVec"].Value<std::vector<DictionaryValue>>());
                node->AddAttribute(attributesMap[L"axes"], ConvertAxesToOnnx(sliceAxes, src->Inputs()[0]));

                beginIndex = AsVector<int>(src->Attributes()[L"beginIndexVec"].Value<std::vector<DictionaryValue>>());
                endIndex = AsVector<int>(src->Attributes()[L"endIndexVec"].Value<std::vector<DictionaryValue>>());
            }
            else if (src->Attributes().Contains(L"axis"))
            {
                Axis axis = (Axis)(src->Attributes()[L"axis"].Value<Axis>());
                // CNTK slice only support single axis slice.
                // It is safe to assume that the axis is a static axis.
                int64_t axisIndex = ConvertAxisToOnnx(axis, src->Inputs()[0]);
                std::vector<int64_t> sliceAxes;
                sliceAxes.push_back(axisIndex);
                node->AddAttribute(attributesMap[L"axes"], sliceAxes);

                beginIndex.push_back((int)(src->Attributes()[L"beginIndex"].Value<int>()));
                endIndex.push_back((int)(src->Attributes()[L"endIndex"].Value<int>()));
                if (*beginIndex.rbegin() == -1 && *endIndex.rbegin() == 0)
                    *endIndex.rbegin() = std::numeric_limits<int>::max();
            }

            std::vector<int64_t> beginIndex64 = Cast<int, int64_t>(beginIndex);
            std::vector<int64_t> endIndex64 = Cast<int, int64_t>(endIndex);

            node->AddAttribute(attributesMap[L"beginIndexVec"], beginIndex64);
            node->AddAttribute(attributesMap[L"endIndexVec"], endIndex64);
        }
        if (src->OpName() == L"Pad")
        {
            auto value = (float)src->Attributes()[L"paddingConstantValue"].Value<double>();
            auto mode = (size_t)src->Attributes()[L"paddingMode"].Value<size_t>();
            auto head = ToINTS(AsVector<size_t>(src->Attributes()[L"paddingHead"].Value<std::vector<DictionaryValue>>()));
            auto foot = ToINTS(AsVector<size_t>(src->Attributes()[L"paddingFoot"].Value<std::vector<DictionaryValue>>()));
            if (OpInputsHasBatchAxis(src))
            {
                head.insert(head.begin(), 0);
                foot.insert(foot.begin(), 0);
            }

            head.insert(head.end(), foot.begin(), foot.end());
            string modeStr;
            if (mode == 0)
                modeStr = "constant";
            else if (mode == 1)
                modeStr = "reflect";
            else if (mode == 2)
                NOT_IMPLEMENTED
            else
                LogicError("Invalid 'mode' value encountered in CNTK Pad node.");

            node->AddAttribute("mode", modeStr);
            node->AddAttribute("pads", head);
            if (mode == 0)
                node->AddAttribute("value", value);
        }
        else if (src->OpName() == L"DepthToSpace" || src->OpName() == L"SpaceToDepth")
        {
            size_t blockSize = src->Attributes()[L"blockSize"].Value<size_t>();
            node->AddAttribute("blocksize", static_cast<int64_t>(blockSize));
        }
        else if (src->OpName() == L"Hardmax")
        {
            int numDims = src->Inputs()[0].Shape().Rank();
            if (numDims == 0)
            {
                LogicError("Zero-rank input is not supported for ONNX export.");
            }
            int64_t axisIndex = numDims - 1 + src->Inputs()[0].DynamicAxes().size();
            node->AddAttribute(attributesMap[L"axis"], axisIndex);
        }
        else if (src->OpName() == L"Softmax_onnx" || src->OpName() == L"LogSoftmax_onnx" || src->OpName() == L"Hardmax_onnx")
        {
            Axis axis = (Axis)(src->Attributes()[L"axis"].Value<Axis>());
            int64_t axisIndex = ConvertAxisToOnnx(axis, src->Inputs()[0]);
            node->AddAttribute(attributesMap[L"axis"], axisIndex);
        }
        else if (src->OpName() == L"ROIPooling")
        {
            auto roiOutputShape = (NDShape)src->Attributes()[L"roiOutputShape"].Value<NDShape>();
            auto pooled_shape = ToINTS(roiOutputShape, false);

            auto spatialScale = (float)src->Attributes()[L"spatialScale"].Value<double>();

            node->AddAttribute("pooled_shape", pooled_shape);
            node->AddAttribute("spatial_scale", spatialScale);
        }
        else if (src->OpName() == L"HardSigmoid")
        {
            float alpha = (float)src->Attributes()[L"alpha"].Value<float>();
            float beta = (float)src->Attributes()[L"beta"].Value<float>();
            node->AddAttribute("alpha", alpha);
            node->AddAttribute("beta", beta);
        }
        else if (src->OpName() == L"Flatten")
        {
            Axis axis(0);
            if (src->Attributes().Contains(L"axis"))
            {
                axis = (Axis)(src->Attributes()[L"axis"].Value<Axis>());
            }
            // Flatten op takes single axis. It is safe here to assume that the axis is a static axis.
            // ax needs the additional 1 here.
            int64_t ax = ConvertAxisToOnnx(axis, src->Inputs()[0]) + 1;
            // Flatten op in ONNX doesn't count batch axis.
            if (src->Inputs()[0].HasBatchAxis())
                ax--;
            node->AddAttribute(attributesMap[L"axis"], ax);
        }
        else if (src->OpName() == L"Squeeze")
        {
            std::vector<Axis> axes;
            if (src->Attributes().Contains(L"axisVec"))
            {
                axes = AsVector<Axis>(src->Attributes()[L"axisVec"].Value<std::vector<DictionaryValue>>());
            }
            else if (src->Attributes().Contains(L"axis"))
            {
                axes.push_back((Axis)(src->Attributes()[L"axis"].Value<Axis>()));
            }
            if (axes.size() > 0)
            {
                node->AddAttribute("axes", ConvertAxesToOnnx(axes, src->Inputs()[0]));
            }
        }
        else if (src->OpName() == L"Gather")
        {
            if (src->Attributes().Contains(L"axis"))
            {
                Axis axis = (Axis)(src->Attributes()[L"axis"].Value<Axis>());
                // Gather op takes single axis. It is safe here to assume that the axis is a static axis.
                // axis is used to apply to reference input - the second input.
                int64_t ax = ConvertAxisToOnnx(axis, src->Inputs()[1]);
                node->AddAttribute(attributesMap[L"axis"], ax);
            }
        }
        else if (src->OpName() == L"ImageScaler")
        {
            float scale = (float)(src->Attributes()[L"Scaler"].Value<float>());
            std::vector<float> biases = AsVector<float>(src->Attributes()[L"Biases"].Value<std::vector<DictionaryValue>>());

            node->AddAttribute("scale", scale);
            node->AddAttribute("bias", biases);
        }
        else if (src->OpName() == L"MeanVarianceNormalization")
        {
            auto useStatsAcrossChannels = src->Attributes()[L"useStatsAcrossChannels"].Value<bool>();
            auto doVarianceScaling = src->Attributes()[L"doVarianceScaling"].Value<bool>();
            if (src->Attributes().Contains(L"epsilon"))
            {
                fprintf(stderr, "Warning: epsilon in MeanVarianceNormalization is not supported for ONNX export, a default value of 1e-9 will be used.");
            }
            // REVIEW: MeanVarianceNormalization attribute 'epsilon' is not exported to ONNX because
            // ONNX MeanVarianceNormalization does not have a corresponding attribute. This should be
            // added if and when the attribute is added to MeanVarianceNormalization node's ONNX spec.

            if (!doVarianceScaling)
            {
                LogicError("MeanVarianceNormalization: doVarianceScaling = False is not supported for ONNX export.");
            }

            std::vector<int64_t> axes;
            for (size_t i = 0; i < src->Inputs()[1].Shape().Rank() + 1; ++i)
            {
                if (!useStatsAcrossChannels && i == 1) continue;
                axes.push_back(static_cast<int64_t>(i));
            }
            node->AddAttribute("axes", axes);
        }
        else if (src->OpName() == L"Gemm")
        {
            float alpha = static_cast<float>(src->Attributes()[L"alpha"].Value<float>());
            float beta = static_cast<float>(src->Attributes()[L"beta"].Value<float>());
            int64_t transA = static_cast<int64_t>(src->Attributes()[L"transA"].Value<bool>());
            int64_t transB = static_cast<int64_t>(src->Attributes()[L"transB"].Value<bool>());

            node->AddAttribute("alpha", alpha);
            node->AddAttribute("beta", beta);
            // Swap transpose attribute to match the swapped inputs in ONNX order.
            node->AddAttribute("transA", transB);
            node->AddAttribute("transB", transA);
        }
        else if (src->OpName() == L"Unsqueeze")
        {
            std::vector<Axis> axes = AsVector<Axis>(src->Attributes()[L"axisVec"].Value<std::vector<DictionaryValue>>());
            // Pass in output operand, such that Unsqueeze axes can be converted based on output rank.
            std::vector<int64_t> ax = ConvertAxesToOnnx(axes, src->Outputs()[0]);

            node->AddAttribute("axes", ax);
        }
        else if (src->OpName() == L"TopK")
        {
            Axis axis = (Axis)(src->Attributes()[L"axis"].Value<Axis>());
            // TopK op takes single axis. It is safe here to assume that the axis is a static axis.
            int64_t ax = ConvertAxisToOnnx(axis, src->Inputs()[0]);
            node->AddAttribute(attributesMap[L"axis"], ax);

            size_t k = src->Attributes()[L"numItems"].Value<size_t>();
            node->AddAttribute(attributesMap[L"numItems"], static_cast<int64_t>(k));
        }
        else if (src->OpName() == L"EyeLikeOp")
        {
            bool isOutputSparse = src->Attributes().Contains(L"OutputSparse") ? (bool)src->Attributes()[L"OutputSparse"].Value<bool>() : false;
            if(isOutputSparse)
                LogicError("Node '%S': 'OutputSparse' is True. Sparse format export not supported.", src->AsString().c_str());
        }
        else if (src->OpName() == L"Crop")
        {
            const NDShape& inputShape = src->Inputs()[0].Shape();
            const NDShape& targetShape = src->Inputs()[1].Shape();

            // ONNX Crop supports only input tensor of shape [N,C,H,W]. The spatial rank for both input and referent should equal to 3.
            if (inputShape.Rank() != 3 || targetShape.Rank() != 3)
                RuntimeError("ONNX Crop supports only input tensor of shape [N,C,H,W]. Including batch axis, input has rank %zu, referent has rank %zu. ",
                             inputShape.Rank() + 1, targetShape.Rank() + 1);

            size_t xOffset = inputShape[0] - targetShape[0];
            size_t yOffset = inputShape[1] - targetShape[1];

            if (src->Attributes().Contains(L"offset"))
            {
                // crop_manual
                std::vector<size_t> offsets = AsVector<size_t>(src->Attributes()[L"offset"].Value<std::vector<DictionaryValue>>());
                offsets.push_back(xOffset - offsets[0]);
                offsets.push_back(yOffset - offsets[1]);
                std::reverse(offsets.begin(), offsets.end());
                node->AddAttribute(attributesMap[L"offset"], ToINTS(offsets));
            }
            else
            {
                // TODO : crop_automatic
                RuntimeError("Exporting crop_automatic to ONNX is not supported yet.");
            }
        }
    }
    else
    {
        // Some nodes map one to many.
        if (src->OpName() == L"Convolution")
        {
            AssignConvAttributes(src, node);
        }
        else if (src->OpName() == L"Pooling" || src->OpName() == L"Unpooling")
        {
            bool isPooling = src->OpName() == L"Pooling";
            auto kernelShape = (NDShape)src->Attributes()[isPooling ? L"poolingWindowShape" : L"unpoolingWindowShape"].Value<NDShape>();
            auto strides = (NDShape)src->Attributes()[L"strides"].Value<NDShape>();
            bool ceilOutDim = src->Attributes().Contains(L"ceilOutDim") ? (bool)src->Attributes()[L"ceilOutDim"].Value<bool>() : false;
            auto autoPadding = AsVector<bool>(src->Attributes()[L"autoPadding"].Value<std::vector<DictionaryValue>>());
            const NDShape& inputShape = src->Inputs()[0].Shape();

            if (strides.Rank() < kernelShape.Rank())
            {
                // TODO: Try removing this branch. May not be needed after batch dimension fix.
                strides = strides.AppendShape(NDShape(std::vector<size_t>(kernelShape.Rank() - strides.Rank(), 1)));
            }
            if ((strides.Rank() - kernelShape.Rank()) == 1)
            {
                // This can happen, for example, because a CNTK node includes strides for the channel axis as well.
                strides = strides.SubShape(0, strides.Rank() - 1);
            }
            else if ((strides.Rank() - kernelShape.Rank()) > 1)
            {
                // This means that the length of kernel shape and strides is off by two or more which should not happen.
                LogicError("Node '%S': kernel shape and strides dimensionality does not match.", src->AsString().c_str());
            }

            // This is a workaround allowing CNTK V1 pretrained models to continue running after removal of sequence axis from inuput
            if (src->Inputs()[0].Shape().Rank() - 1 != kernelShape.Rank() && kernelShape.Dimensions()[kernelShape.Rank() - 1] == 1)
                kernelShape = kernelShape.SubShape(0, kernelShape.Rank() - 1);

            if (src->Inputs()[0].Shape().Rank() - 1 != strides.Rank() && strides.Dimensions()[strides.Rank() - 1] == 1)
                strides = strides.SubShape(0, strides.Rank() - 1);

            node->AddAttribute("kernel_shape", ToINTS(kernelShape));
            node->AddAttribute("strides", ToINTS(strides));

            auto lowerPad = ToINTS(src->Attributes()[L"lowerPad"].Value<NDShape>());
            auto upperPad = ToINTS(src->Attributes()[L"upperPad"].Value<NDShape>());

            // lowerPad and upperPad have incorrect dimension when the op has both batch and sequence axes.
            if (IsPadValueValid(lowerPad, upperPad, autoPadding, ceilOutDim) && !(src->Inputs()[0].HasBatchAxis() && src->Inputs()[0].HasSequenceAxis()))
            {
                if (ceilOutDim)
                    ValidatePadValueForCeilOutDim(lowerPad, upperPad, autoPadding, kernelShape, inputShape, strides,
                                                  /*dilation=*/std::vector<size_t>(kernelShape.Rank(), 1), /*transpose=*/!isPooling);
                lowerPad.insert(lowerPad.end(), upperPad.cbegin(), upperPad.cend());
                node->AddAttribute("pads", lowerPad);
            }
            else
            {
                if (src->Inputs()[0].HasBatchAxis() && src->Inputs()[0].HasSequenceAxis())
                {
                    if (!std::all_of(lowerPad.begin(), lowerPad.end(), [](int64_t pad) {return pad == 0; }) ||
                        !std::all_of(upperPad.begin(), upperPad.end(), [](int64_t pad) {return pad == 0; }))
                    {
                        fprintf(stderr, "Warning: Cannot set upperPad and lowerPad with pooling ops. Padding values will be computed according to kernel and input shapes.");
                    }
                }
                if (isPooling)
                    PutPadAttrInNode(node, autoPadding, kernelShape, inputShape, strides, /*dilation=*/std::vector<size_t>(kernelShape.Rank(), 1),
                                     ceilOutDim, /*transpose=*/!isPooling);
                else
                    PutPadAttrInNode(node, autoPadding, kernelShape, inputShape, strides, /*dilation=*/std::vector<size_t>(kernelShape.Rank(), 1),
                                     /*outputShape=*/src->Inputs()[1].Shape(), ceilOutDim, /*transpose=*/!isPooling);
            }
        }
        else if (src->OpName() == L"ReduceElements")
        {
            SetReduceElementsAttributes(src, node, false);
        }
        else if ((src->OpName() == L"RandomDistribution") ||
                 (src->OpName() == L"UniformRandom") || (src->OpName() == L"NormalRandom") ||
                 (src->OpName() == L"UniformRandomLike") || (src->OpName() == L"NormalRandomLike"))
        {
            std::string onnxOp = node->OpType();
            auto randomArgs = AsVector<double>(src->Attributes()[L"randomDistributionArgs"].Value<std::vector<DictionaryValue>>());
            auto seed = (int64_t)src->Attributes()[L"rngSeed"].Value<size_t>();

            if ((onnxOp == "RandomNormal") || (onnxOp == "RandomNormalLike"))
            {
                node->AddAttribute("mean", (float)randomArgs[0]);
                node->AddAttribute("scale", (float)randomArgs[1]);
            }
            else
            {
                node->AddAttribute("low", (float)randomArgs[0]);
                node->AddAttribute("high", (float)randomArgs[1]);
            }

            node->AddAttribute("seed", (float)seed);
            if ((onnxOp == "RandomUniform") || (onnxOp == "RandomNormal"))
            {
                auto shape = (NDShape)src->Attributes()[L"newShape"].Value<NDShape>();
                node->AddAttribute("shape", ToINTS(shape));

                DataType dataType = (DataType)src->Attributes()[L"newDataType"].Value<int>();
                node->AddAttribute("dtype", (int64_t)ConvertDataTypeCNTKToTensorProto(dataType));
            }
        }
    }
}