source/backend/cpu/CPUBackend.hpp (201 lines of code) (raw):
//
// CPUBackend.hpp
// MNN
//
// Created by MNN on 2018/07/06.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef CPUBackend_hpp
#define CPUBackend_hpp
#include <map>
#include <memory>
#include <MNN/AutoTime.hpp>
#include "core/Backend.hpp"
#include "core/Execution.hpp"
#include "core/BufferAllocator.hpp"
#include "MNN_generated.h"
#ifdef MNN_KLEIDIAI_ENABLED
#include "arm/mnn_kleidiai.h"
#endif
namespace MNN {
class WorkerThread;
class CPURuntime : public Runtime {
public:
struct DynamicAllocator {
std::shared_ptr<BufferAllocator> mDynamicAllocator;
std::shared_ptr<BufferAllocator> mDynamicAllocatorBackup;
BufferAllocator* mCurrentDynamicAllocator = nullptr;
};
friend class CPUBackend;
CPURuntime(const Backend::Info& info);
virtual ~ CPURuntime();
int onGetRuntimeStatus(RuntimeStatus statusEnum) const override;
virtual Backend* onCreate(const BackendConfig* config, Backend* origin) const override;
virtual void onReset(int numberThread, const BackendConfig* config, bool full) override;
virtual void onGabageCollect(int level) override;
virtual float onGetMemoryInMB() override;
virtual CompilerType onGetCompilerType() const override {
return Compiler_Loop;
}
void onConcurrencyBegin() const;
void onConcurrencyEnd() const;
virtual bool onCheckInfo(Backend::Info& info) const override;
#ifdef MNN_USE_THREAD_POOL
inline bool multiThreadValid() const {
return mThreadOpen;
}
#endif
SingleBufferWithAllocator* buffer(int index) const;
BufferAllocator* createDynamicBufferAlloctor(int index) const;
private:
void _bindCPUCore() const;
void _resetThreadPool();
mutable std::shared_ptr<EagerBufferAllocator> mStaticAllocator;
int mThreadNumber;
#ifdef MNN_USE_THREAD_POOL
mutable int mTaskIndex = -1;
mutable bool mThreadOpen = false;
#endif
BackendConfig::MemoryMode mMemory;
BackendConfig::PowerMode mPower;
BackendConfig::PrecisionMode mPrecision;
// Backend features
// CPU features
static Backend*(*gExtraCreate)(const Runtime* runtime);
size_t mFlags = 0;
mutable int mCurrentTID = 0;
mutable std::vector<SingleBufferWithAllocator> mDynamic;
mutable std::vector<SingleBufferWithAllocator> mDynamicMmap;
mutable std::shared_ptr<DynamicAllocator> mSharedDmaInfo;
mutable std::shared_ptr<EagerBufferAllocator> mStaticAllocatorCache;
};
struct CoreFunctions;
struct CoreInt8Functions;
class CPUResizeCache;
class CPUMemObj : public Backend::MemObj {
public:
CPUMemObj(BufferAllocator* allocator, MemChunk chunk, int size) : mAllocator(allocator), mChunk(chunk), mSize(size) {}
virtual ~ CPUMemObj() {
if (mAllocator) {
mAllocator->free(mChunk);
}
}
virtual MemChunk chunk() {
return mChunk;
}
inline int getSize() const {
return mSize;
}
private:
BufferAllocator* mAllocator;
MemChunk mChunk;
int mSize;
};
class CPUBackend : public Backend {
public:
CPUBackend(const CPURuntime* runtime, BackendConfig::PrecisionMode precision, BackendConfig::MemoryMode memory, MNNForwardType type = MNN_FORWARD_CPU, size_t flags = 0, int initThreadNumber = 0);
virtual ~CPUBackend();
// Return sizeDivide, scheduleNumber aligned memory
std::pair<int, int> multiThreadDivide(int size) const;
virtual bool onSelectDynamicAllocator(int index, int maxIndex) override;
// dividedSize's length should be larger than threadNumber
void computeDivideSizes(int size, int* dst, float computeI = 0.f) const;
public:
virtual MemObj* onAcquire(const Tensor* nativeTensor, StorageType storageType) override;
virtual bool onClearBuffer() override;
virtual void onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const override;
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
const MNN::Op* op) override;
virtual void onExecuteBegin() const override;
virtual void onExecuteEnd() const override;
virtual void* onMapTensor(Tensor::MapType mtype, Tensor::DimensionType dtype, const Tensor* srcTensor) override;
virtual bool onUnmapTensor(Tensor::MapType mtype, Tensor::DimensionType dtype, const Tensor* dstTensor, void* mapPtr) override;
virtual void onResizeBegin() override;
virtual ErrorCode onResizeEnd() override;
const CoreFunctions* functions() const {
return mCoreFunctions;
}
// Return element size for Tensor, conside pack
size_t getTensorSize(const Tensor* tensor, bool multiBytes = false) const;
const CoreInt8Functions* int8Functions() const {
return mInt8CoreFunctions;
}
void _resetDynamicMemory() const;
public:
class Creator {
public:
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
const MNN::Op* op, Backend* backend) const = 0;
};
static bool addCreator(OpType t, Creator* c);
inline int threadNumber() const {
return mThreadNumber;
}
#ifdef MNN_USE_THREAD_POOL
inline bool threadOpen() const {
return mRuntime->mThreadOpen;
}
#endif
BufferAllocator* getBufferAllocator(bool defer_allocator = true) const {
return mDmaInfo->mCurrentDynamicAllocator;
}
BackendConfig::MemoryMode memoryMode() const {
return mMemory;
}
BackendConfig::PrecisionMode precisionMode() const {
return mPrecisionMode;
}
CPUResizeCache* getCache() const {
return mCache;
}
virtual const Runtime* getRuntime() override;
#ifdef MNN_USE_THREAD_POOL
inline int taskIndex() const {return mRuntime->mTaskIndex;}
#endif
static void initCreatorMap();
static int getBytes(const Backend* backend, const Tensor* output);
static DataType getDataType(const Tensor* tensor);
friend class CPURuntime;
void enqueueTask(std::function<int()>&& task);
protected:
MemObj* allocBuffer(size_t size, Tensor* dest, StorageType storageType);
CoreFunctions* mCoreFunctions;
CoreInt8Functions* mInt8CoreFunctions;
private:
mutable std::shared_ptr<WorkerThread> mInitWorkQueue;
int mThreadNumber;
std::vector<std::pair<float, int>> mGroupWithComputeRate;
float mComputeI = 0.f;
std::shared_ptr<CPURuntime::DynamicAllocator> mDmaInfo;
std::shared_ptr<EagerBufferAllocator> mStaticAllocator;
CPURuntime* mRuntime;
BackendConfig::PrecisionMode mPrecisionMode;
BackendConfig::MemoryMode mMemory;
static std::map<OpType, CPUBackend::Creator*>* gCreator;
CPUResizeCache* mCache;
std::vector<std::shared_ptr<CPUResizeCache>> mCacheGroup;
};
/** execution cast wrapper. insert tensor cast dynamic. */
class CastWrapExecution : public Execution {
public:
CastWrapExecution(Backend* backend, DataType runT)
: Execution(backend), mRunType(runT) {}
virtual ErrorCode onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) override;
private:
DataType mRunType;
};
#define REGISTER_CPU_OP_CREATOR(name, opType) \
void ___##name##__##opType##__() { \
static name _temp;\
CPUBackend::addCreator(opType, &_temp); \
}
#ifdef MNN_SUPPORT_DEPRECATED_OP
#define REGISTER_CPU_OP_CREATOR_OLD(name, opType) \
void ___##name##__##opType##__() { \
static name _temp;\
CPUBackend::addCreator(opType, &_temp); \
}
#else
#define REGISTER_CPU_OP_CREATOR_OLD(name, opType) \
void ___##name##__##opType##__() { \
}
#endif
#define REGISTER_CPU_OP_CREATOR_RENDER(name, opType) \
void ___##name##__##opType##__() { \
static name _temp;\
CPUBackend::addCreator(opType, &_temp); \
}
#define REGISTER_CPU_OP_CREATOR_TRANSFORMER(name, opType) \
void ___##name##__##opType##__() { \
static name _temp;\
CPUBackend::addCreator(opType, &_temp); \
}
} // namespace MNN
#endif /* CPUBackend_hpp */