source/backend/cpu/CPUPool.cpp

// // CPUPool.cpp // MNN // // Created by MNN on 2018/07/15. // Copyright © 2018, Alibaba Group Holding Limited // #include "backend/cpu/CPUBackend.hpp" #include "core/Concurrency.h" #include "backend/cpu/CPUPool.hpp" #include "compute/CommonOptFunction.h" #include "math/Vec.hpp" #include "core/TensorUtils.hpp" using Vec4 = MNN::Math::Vec<float, 4>; using Vec16 = MNN::Math::Vec<int8_t, 16>; namespace MNN { class CPUPool : public Execution { public: CPUPool(Backend *b, const Pool *parameter, void* func, int bytes, bool returnRedice) : MNN::Execution(b), mParameter(parameter) { if(returnRedice){ mComputeRedice = (decltype(mComputeRedice))func; }else{ mCompute = (decltype(mCompute))func; } mBytes = bytes; } virtual ~CPUPool() = default; virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override { auto layer = mParameter; int strideWidth = layer->strideX(); int strideHeight = layer->strideY(); int padWidth = layer->padX(); int padHeight = layer->padY(); auto core = static_cast<CPUBackend*>(backend())->functions(); MNN_ASSERT(DataType_DT_INT8 != TensorUtils::getDescribe(inputs[0])->type); // edit const if global auto input = inputs[0]; auto output = outputs[0]; int kernelWidth = layer->kernelX(); int kernelHeight = layer->kernelY(); if (layer->isGlobal()) { kernelWidth = input->width(); kernelHeight = input->height(); strideWidth = input->width(); strideHeight = input->height(); padWidth = 0; padHeight = 0; } if (layer->padType() == PoolPadType_SAME) { int padNeededWidth = (output->width() - 1) * strideWidth + kernelWidth - input->width(); int padNeededHeight = (output->height() - 1) * strideHeight + kernelHeight - input->height(); padWidth = padNeededWidth > 0 ? padNeededWidth / 2 : 0; padHeight = padNeededHeight > 0 ? padNeededHeight / 2 : 0; } else if (layer->padType() == PoolPadType_VALID) { padWidth = padHeight = 0; } auto totalDepth = input->batch() * UP_DIV(input->channel(), core->pack); auto inputPlaneStride = core->pack * input->width() * input->height(); auto outputPlaneStride = core->pack * output->width() * output->height(); int threadNumber = ((CPUBackend *)backend())->threadNumber(); auto padType = layer->padType(); auto countType = layer->countType(); if (layer->pads() != nullptr && padType == PoolPadType_CAFFE) { padType = PoolPadType_VALID; } if(outputs.size() == 2){ mFunction = std::make_pair(threadNumber, [=](int tId) { for (int channel = (int)tId; channel < totalDepth; channel += threadNumber) { auto inputData = input->host<uint8_t>(); auto outputData = output->host<uint8_t>(); auto rediceData = outputs[1]->host<uint8_t>(); // run mComputeRedice(inputData + channel * inputPlaneStride * mBytes, input->width(), input->height(), outputData + outputPlaneStride * channel * mBytes, output->width(), output->height(), kernelWidth, kernelHeight, strideWidth, strideHeight, padWidth, padHeight, padType, countType, rediceData + outputPlaneStride * channel * mBytes); } }); }else{ mFunction = std::make_pair(threadNumber, [=](int tId) { for (int channel = (int)tId; channel < totalDepth; channel += threadNumber) { auto inputData = input->host<uint8_t>(); auto outputData = output->host<uint8_t>(); // run mCompute(inputData + channel * inputPlaneStride * mBytes, input->width(), input->height(), outputData + outputPlaneStride * channel * mBytes, output->width(), output->height(), kernelWidth, kernelHeight, strideWidth, strideHeight, padWidth, padHeight, padType, countType); } }); } return NO_ERROR; } virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override { MNN_CONCURRENCY_BEGIN(tId, mFunction.first) { mFunction.second((int)tId); } MNN_CONCURRENCY_END(); return NO_ERROR; } private: const Pool *mParameter; void(*mCompute)(const void* channelInput, int inputWidth, int inputHeight, void *channelOutput, int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth, int strideHeight, int padWidth, int padHeight, int padType, int countType); void(*mComputeRedice)(const void* channelInput, int inputWidth, int inputHeight, void *channelOutput, int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth, int strideHeight, int padWidth, int padHeight, int padType, int countType, void *rediceOutput); std::pair<int, std::function<void(int)> > mFunction; int mBytes; }; class CPUPoolCreator : public CPUBackend::Creator { public: virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs, const MNN::Op *op, Backend *backend) const override { void* func = nullptr; bool returnRedice = false; if (inputs[0]->getType() == halide_type_of<int8_t>()) { if (op->main_as_Pool()->type() == PoolType_AVEPOOL) { func = (void*)(poolingAvg<int8_t, Vec16, 4>); } else { func = (void*)(poolingMax<int8_t, Vec16, 4, -128>); } return new CPUPool(backend, op->main_as_Pool(), func, 1, returnRedice); } auto core = static_cast<CPUBackend*>(backend)->functions(); if (op->main_as_Pool()->type() == PoolType_AVEPOOL) { func = (void*)(core->MNNPoolingAvg); } else { func = (void*)(core->MNNPoolingMax); if(outputs.size() == 2){ func = (void*)(core->MNNPoolingMaxWithRedice); returnRedice = true; } } return new CPUPool(backend, op->main_as_Pool(), func, core->bytes, returnRedice); } }; REGISTER_CPU_OP_CREATOR(CPUPoolCreator, OpType_Pooling); } // namespace MNN

source/backend/cpu/CPUPool.cpp (128 lines of code) (raw):