void EmitReceptiveFieldToColumns()

in libraries/nodes/include/ReceptiveFieldMatrixNode.h [202:401]


        void EmitReceptiveFieldToColumns(emitters::IRFunctionEmitter& function,
                                         emitters::LLVMValue inputVolume,
                                         const model::PortMemoryLayout& inputLayout,
                                         int filterWidth,
                                         int stride,
                                         int convPadding, // amount of padding to assume around the image -- determines output size
                                         std::array<int, 3> dataOrder,
                                         int outputWidth,
                                         int outputHeight,
                                         emitters::LLVMValue outputMatrix)
        {
            // Model parameters
            const auto inputHeight = inputLayout.GetLogicalDimensionActiveSize(0);
            const auto inputWidth = inputLayout.GetLogicalDimensionActiveSize(1);
            const auto inputDepth = inputLayout.GetLogicalDimensionActiveSize(2);
            const auto fieldVolumeSize = filterWidth * filterWidth * inputDepth;
            const auto numOutputColumns = static_cast<int>(outputWidth * outputHeight);

            // Input (I): d x h x w (planar)
            // Output (S): (d * k * k) x (outputHeight * outputWidth) ==  fieldVolumeSize x outputImageSize

            // Example
            // k = 3, d = 2
            //
            //      A B C D    a b c d
            // I =  E F G H    e f g h
            //      I J K L    i j k l
            //      M N O P    m n o p
            //
            //      . . . .  . A B C  D E F G  H I J K
            //      . . . .  . a b c  d e f g  h i j k
            //      . . . .  A B C D  E F G H  I J K L
            //      . . . .  a b c d  e f g h  i j k l
            //      . . . .  B C D E  F G H I  J K L M
            //      . . . .  b c d e  f g h i  j k l m
            //
            //      . A B C  D E F G  H I J K  L M N O
            //      . a b c  d e f g  h i j k  l m n o
            // S =  A B C D  E F G H  I J K L  M N O P
            //      a b c d  e f g h  i j k l  m n o p
            //      B C D E  F G H I  J K L M  N O P .
            //      b c d e  f g h i  j k l m  n o p .
            //
            //      D E F G  H I J K  L M N O  . . . .
            //      d e f g  h i j k  l m n o  . . . .
            //      E F G H  I J K L  M N O P  . . . .
            //      e f g h  i j k l  m n o p  . . . .
            //      F G H I  J K L M  N O P .  . . . .
            //      f g h i  j k l m  n o p .  . . . .
            //
            // Note that the middle d=2 rows of S are the entire image, linearized:
            // A B C D E F G H I J K L M N O P a b c d e f g h i j k l m n o p

            // const int extraPadding = (int)convPadding - (int)inputPadding; // extraPadding is the amount of extra padding we need to do, on top of what's in the input data
            const int extraPadding = convPadding;
            const bool useContiguousReshape = (dataOrder == std::array<int, 3>({ { 2, 0, 1 } })) && (stride == 1); // channel, row, column order, unit stride
            if (useContiguousReshape)
            {
                // assert(inputPadding == 0 && "Input data must not be padded");
                // Points to the beginning of the input volume
                emitters::LLVMValue inputPtr = function.PointerOffset(inputVolume, 0);

                // Points to the beginning of the outputMatrix
                emitters::LLVMValue outputPtr = function.PointerOffset(outputMatrix, 0);

                // Unroll outer loops
                for (int fy = 0; fy < filterWidth; ++fy)
                {
                    for (int fx = 0; fx < filterWidth; ++fx)
                    {
                        // `outputRow` is the row of the output matrix to start writing to. Multiplied by `inputDepth`, because
                        // we're going to memcpy `inputDepth` rows at once
                        int outputRow = (fy * filterWidth + fx) * inputDepth;

                        int outputOffset1 = inputWidth * (extraPadding - fy); // where to start writing this row in the output
                        int outputOffset2 = (extraPadding - fx); // where to start writing this row in the output
                        int inputOffset = 0; // where to start reading from for this row
                        if (outputOffset1 < 0)
                        {
                            inputOffset -= outputOffset1;
                            outputOffset1 = 0;
                        }
                        if (outputOffset2 < 0)
                        {
                            inputOffset -= outputOffset2;
                            outputOffset2 = 0;
                        }
                        int outputOffset = outputOffset1 + outputOffset2;
                        int count = (inputWidth * inputHeight * inputDepth) - inputOffset - outputOffset;
                        outputOffset += outputRow * numOutputColumns;

                        // For this output row, copy what we need from the input image
                        function.MemoryCopy<ValueType>(inputPtr, inputOffset, outputPtr, outputOffset, count);
                        const int outputRowOffset = outputRow * numOutputColumns;

                        // Zero out the padding areas
                        // BUG: explicit capture-by-ref entries are here to work around a GCC bug
                        function.For(inputDepth, [=, &fx, &fy, &extraPadding, &inputWidth, &inputHeight, &outputWidth, &numOutputColumns](emitters::IRFunctionEmitter& function, emitters::LLVMValue channelValue) {
                            auto channel = function.LocalScalar(channelValue);
                            auto outputDepthOffset = channel * numOutputColumns;

                            // Points to the beginning of the current channel in the outputMatrix
                            auto outputChannelPtr = function.PointerOffset(outputMatrix, outputDepthOffset);

                            uint8_t paddingValue = 0;
                            if (fy < extraPadding)
                            {
                                // zero out full image rows at beginning of image
                                int count = (extraPadding - fy) * outputWidth;
                                int begin = 0;
                                function.MemorySet<ValueType>(outputChannelPtr, outputRowOffset + begin, function.Literal<uint8_t>(paddingValue), count);
                            }
                            else if (fy > extraPadding)
                            {
                                // zero out full image rows at end of image
                                int count = (fy - extraPadding) * outputWidth;
                                int begin = numOutputColumns - count;
                                assert(begin >= 0);
                                function.MemorySet<ValueType>(outputChannelPtr, outputRowOffset + begin, function.Literal<uint8_t>(paddingValue), count);
                            }

                            if (fx < extraPadding)
                            {
                                // zero out elements at beginning of each row
                                int count = extraPadding - fx;
                                // BUG: explicit capture-by-ref entries are here to work around a GCC bug
                                function.For(inputHeight, [=, &inputWidth, &outputRowOffset](emitters::IRFunctionEmitter& function, emitters::LLVMValue indexValue) {
                                    auto index = function.LocalScalar(indexValue);
                                    auto begin = index * inputWidth;
                                    auto offset = begin + outputRowOffset;
                                    function.MemorySet<ValueType>(outputChannelPtr, offset, function.Literal<uint8_t>(paddingValue), count);
                                });
                            }
                            else if (fx > extraPadding)
                            {
                                // zero out elements at end of each row
                                int count = fx - extraPadding;
                                // BUG: explicit capture-by-ref entries are here to work around a GCC bug
                                function.For(inputHeight, [=, &inputWidth, &outputRowOffset](emitters::IRFunctionEmitter& function, emitters::LLVMValue indexValue) {
                                    auto index = function.LocalScalar(indexValue);
                                    auto begin = ((index + 1) * inputWidth) - count;
                                    auto offset = begin + outputRowOffset;
                                    function.MemorySet<ValueType>(outputChannelPtr, offset, function.Literal<uint8_t>(paddingValue), count);
                                });
                            }
                        });
                    }
                }
            }
            else // Normal, single value-at-a-time method
            {
                // The outer loop iterates over all d * k * k entries in the receptive field
                function.For(fieldVolumeSize, [=](emitters::IRFunctionEmitter& function, emitters::LLVMValue fValue) {
                    auto f = function.LocalScalar(fValue);
                    auto fieldChannel = function.LocalScalar();
                    auto fieldColumn = function.LocalScalar();
                    auto fieldRow = function.LocalScalar();

                    // TODO: use the entries of dataOrder to compute the indices
                    if (dataOrder == std::array<int, 3>({ { 0, 1, 2 } })) // row, column, channel order
                    {
                        fieldChannel = f % inputDepth;
                        auto fDivDepth = f / inputDepth;
                        fieldColumn = fDivDepth % filterWidth;
                        fieldRow = fDivDepth / filterWidth;
                    }
                    else // channel, row, column order
                    {
                        fieldColumn = f % filterWidth;
                        auto fDivColumns = f / filterWidth;
                        fieldRow = fDivColumns % filterWidth;
                        fieldChannel = fDivColumns / filterWidth;
                    }

                    // Now for each receptive field entry, iterate over all h * w locations in the output image
                    function.For(outputHeight, [=, &fieldRow, &fieldColumn](emitters::IRFunctionEmitter& function, emitters::LLVMValue outputImageRowValue) {
                        auto outputImageRow = function.LocalScalar(outputImageRowValue);
                        auto inputRow = outputImageRow * stride;
                        function.For(outputWidth, [=, &fieldRow, &fieldColumn, &inputRow](emitters::IRFunctionEmitter& function, emitters::LLVMValue outputImageColumnValue) {
                            auto outputImageColumn = function.LocalScalar(outputImageColumnValue);
                            auto inputColumn = outputImageColumn * stride;

                            // outRowOffset is the offset to the f'th row in the output S matrix
                            auto outRowOffset = f * (outputHeight * outputWidth);

                            // outColRowOffset is the offset to the column of the S matrix where `outputImageRow` begins
                            auto outColRowOffset = outputImageRow * outputWidth;
                            // outputIndex is the index of the entry in S to write to
                            auto outputIndex = outRowOffset + (outColRowOffset + outputImageColumn);

                            // input row and column in the input image
                            auto entryRow = inputRow + fieldRow;
                            auto entryColumn = inputColumn + fieldColumn;
                            auto volumeValue = GetValueFromPaddedVolume<ValueType>(function, inputVolume, inputLayout, extraPadding, dataOrder, entryRow, entryColumn, fieldChannel);
                            function.SetValueAt(outputMatrix, outputIndex, volumeValue);
                        });
                    });
                });
            }
        }