source/core/Schedule.cpp

// // Schedule.cpp // MNN // // Created by MNN on 2018/07/30. // Copyright © 2018, Alibaba Group Holding Limited // #include "core/Schedule.hpp" #include <algorithm> #include <iterator> #include <set> #include <vector> #include <unordered_map> #include "core/Macro.h" #include "core/RuntimeFactory.hpp" #include "core/TensorUtils.hpp" #include "core/FileLoader.hpp" #ifndef MNN_BUILD_MINI #include "shape/SizeComputer.hpp" #include "geometry/GeometryComputerUtils.hpp" #endif #include "utils/InitNet.hpp" //#define MNN_OPEN_TIME_TRACE #include <MNN/AutoTime.hpp> using namespace std; //#define MNN_AUTO_CHECK_COST namespace MNN { void Schedule::OpResizeCache::close(bool pass) { mCanCache = false; mInputInfos.clear(); mPass = pass; } void Schedule::OpResizeCache::addContentIndex(int index) { mNeedCompareContent.emplace_back(index); } bool Schedule::OpResizeCache::match(const std::vector<Tensor*>& inputs, bool& compared) { if (!mCanCache) { return mPass; } if (!mComputed) { return false; } if (mInputInfos.size() != inputs.size()) { return false; } compared = true; for (int u=0; u<mInputInfos.size(); ++u) { auto des = TensorUtils::getDescribe(inputs[u]); if (mInputInfos[u].order != des->dimensionFormat) { return false; } if (mInputInfos[u].type.code != inputs[u]->getType().code || mInputInfos[u].type.bits != inputs[u]->getType().bits) { return false; } if (mInputInfos[u].dim.size() != inputs[u]->dimensions()) { return false; } for (int v=0; v<mInputInfos[u].dim.size(); ++v) { if (mInputInfos[u].dim[v] != inputs[u]->length(v)) { return false; } } if (des->memoryType == Tensor::InsideDescribe::MEMORY_VIRTUAL && (des->stageMask & Tensor::InsideDescribe::COMPUTE_SHAPE_STAGE)) { return false; } } for (auto dim : mNeedCompareContent) { auto t = inputs[dim]; auto& s = mInputInfos[dim]; if (0 != ::memcmp(s.buffer.data(), t->host<void>(), s.buffer.size())) { return false; } } return true; } void Schedule::OpResizeCache::open() { mCanCache = true; } void Schedule::OpResizeCache::copyImmutable(const OpResizeCache& cache) { mNeedCompareContent = cache.mNeedCompareContent; } void Schedule::OpResizeCache::insert(const std::vector<Tensor*>& inputs) { if (!mCanCache) { return; } mComputed = true; mInputInfos.resize(inputs.size()); for (int u=0; u<inputs.size(); ++u) { mInputInfos[u].dim = inputs[u]->shape(); mInputInfos[u].order = TensorUtils::getDescribe(inputs[u])->dimensionFormat; mInputInfos[u].type = inputs[u]->getType(); } for (auto dim : mNeedCompareContent) { const int limit = 10000; auto t = inputs[dim]; auto& s = mInputInfos[dim]; auto size = t->usize(); if (size > limit) { close(); return; } s.buffer.resize(size); ::memcpy(s.buffer.data(), t->host<void>(), size); } } MNNForwardType Schedule::getAppropriateType(const ScheduleConfig& config) { MNNForwardType type = config.type; // FIXME: Support Auto determine if (MNN_FORWARD_AUTO == config.type) { //Define Auto choose priority std::array<MNNForwardType, 8> priorityList { MNN_FORWARD_USER_0, //HIAI MNN_FORWARD_NN, //CoreML MNN_FORWARD_USER_1, //TensoRT MNN_FORWARD_CUDA, //CUDA MNN_FORWARD_OPENCL, //OpenCL MNN_FORWARD_METAL, //METAL MNN_FORWARD_VULKAN, //Vulkan MNN_FORWARD_CPU, //CPU }; for (auto bn : priorityList) { if (MNNGetExtraRuntimeCreator(bn) != nullptr) { type = (MNNForwardType)bn; break; } } } auto creator = MNNGetExtraRuntimeCreator(type); if (nullptr == creator) { MNN_PRINT("Can't Find type=%d backend, use %d instead\n", type, config.backupType); type = config.backupType; } else { // TODO : Not Limited to opencl if(type == MNN_FORWARD_OPENCL && config.backendConfig != nullptr) { if(config.backendConfig->power == BackendConfig::Power_Low) { Backend::Info info; info.type = type; std::shared_ptr<Runtime> bn(creator->onCreate(info)); if (nullptr != bn.get()) { bool isSupportLowPower = bn->onGetRuntimeStatus(RuntimeStatus::STATUS_SUPPORT_POWER_LOW); if(!isSupportLowPower) { MNN_PRINT("type=%d backend don't Support Low Power, use %d instead\n", type, config.backupType); type = config.backupType; } } else{ type = config.backupType; } } } } return type; } static void generateScheduleGraph(vector<const Op*>& ops, const Net* net, const ScheduleConfig& configs, const vector<shared_ptr<Tensor>>& allTensors) { // for (int i = 0; i < net->oplists()->size(); ++i) { // auto op = net->oplists()->Get(i); // MNN_PRINT("generateScheduleGraph, op type:%s, op name:%s\n", EnumNameOpType(op->type()), op->name()->c_str()); // } if (configs.path.inputs.empty() && configs.path.outputs.empty()) { // Use Default Linear schedule ops.clear(); ops.reserve(net->oplists()->size()); for (int i = 0; i < net->oplists()->size(); ++i) { auto op = net->oplists()->GetAs<Op>(i); ops.emplace_back(op); } return; } // 0: not set, 1: output, 2:input std::vector<int> tensorMask(net->tensorName()->size()); ::memset(tensorMask.data(), 0, tensorMask.size() * sizeof(int)); // 0: use, 1: no use std::vector<int> opMask(net->oplists()->size()); ::memset(opMask.data(), 0, opMask.size() * sizeof(int)); // Set Initial Status std::set<std::string> inputNames; std::set<std::string> outputNames; for (auto& n : configs.path.inputs) { inputNames.insert(n); } for (auto& n : configs.path.outputs) { outputNames.insert(n); } if (configs.path.mode == ScheduleConfig::Path::Mode::Tensor) { for (int i=0; i<tensorMask.size(); ++i) { auto name = net->tensorName()->GetAsString(i)->c_str(); if (outputNames.find(name) != outputNames.end()) { tensorMask[i] = 1; } // If both input/output, set as input if (inputNames.find(name) != inputNames.end()) { tensorMask[i] = 2; } } } else { // Op Mode for (int i=0; i<opMask.size(); ++i) { auto op = net->oplists()->GetAs<Op>(i); if (nullptr == op->name()) { continue; } auto name = op->name()->c_str(); if (outputNames.find(name) != outputNames.end()) { opMask[i] = 1; if (nullptr != op->outputIndexes()) { for (int j=0; j<op->outputIndexes()->size(); ++j) { auto index = op->outputIndexes()->data()[j]; if (tensorMask[index] != 2) { tensorMask[index] = 1; } } } if (nullptr != op->inputIndexes()) { for (int j=0; j<op->inputIndexes()->size(); ++j) { auto index = op->inputIndexes()->data()[j]; if (tensorMask[index] != 2) { tensorMask[index] = 1; } } } } if (inputNames.find(name) != inputNames.end()) { opMask[i] = 1; if (nullptr != op->outputIndexes()) { for (int j=0; j<op->outputIndexes()->size(); ++j) { auto index = op->outputIndexes()->data()[j]; tensorMask[index] = 2; } } } } } bool change = false; do { change = false; for (int i=0; i<opMask.size(); ++i) { if (opMask[i] > 0) { continue; } auto op = net->oplists()->GetAs<Op>(i); if (nullptr != op->outputIndexes()) { for (int j=0; j<op->outputIndexes()->size(); ++j) { auto index = op->outputIndexes()->data()[j]; if (tensorMask[index] == 1) { opMask[i] = 1; change = true; } } } if (nullptr != op->inputIndexes() && opMask[i]) { for (int j=0; j<op->inputIndexes()->size(); ++j) { auto index = op->inputIndexes()->data()[j]; if (tensorMask[index] != 2) { tensorMask[index] = 1; } } } } } while (change); for (int i=0; i<opMask.size(); ++i) { if (opMask[i] > 0) { ops.emplace_back(net->oplists()->GetAs<Op>(i)); } } } static vector<Schedule::OpCacheInfo> _scheduleUnit(const Net* net, const ScheduleConfig& configs, const vector<shared_ptr<Tensor>>& allTensors) { vector<Schedule::OpCacheInfo> oplists; vector<const Op*> ops; generateScheduleGraph(ops, net, configs, allTensors); initPipelineInfosFromOps(oplists, ops, allTensors); return oplists; } bool Schedule::schedule(ScheduleInfo& scheduleInfo, const Net* net, const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtimeInfo) { if (nullptr == net->oplists()) { MNN_PRINT("Empty net for schedule\n"); return false; } if (scheduleInfo.defaultBackend.get() == nullptr && scheduleInfo.allTensors.empty()) { // Const not init, init it BackendConfig defaultConfig; defaultConfig.flags = 4; scheduleInfo.defaultBackend.reset(runtimeInfo.second->onCreate(&defaultConfig)); ErrorCode code = NO_ERROR; FileLoader loader(scheduleInfo.externalWeightPath.c_str()); initConstTensors(scheduleInfo.allTensors, net, scheduleInfo.defaultBackend.get(), code, &loader); if (NO_ERROR != code) { MNN_ERROR("Schedule Const init errorcode = %d\n", code); return false; } } bool valid = initTensors(scheduleInfo.allTensors, net); scheduleInfo.validForResize = valid; std::vector<std::shared_ptr<Tensor>>& allTensors = scheduleInfo.allTensors; std::vector<std::pair<Schedule::BackendCache, std::vector<Schedule::OpCacheInfo>>> result; for (auto& config : configs) { Backend::Info compute; compute.type = getAppropriateType(config); compute.numThread = config.numThread; if(config.type == MNN_FORWARD_AUTO) { if(compute.type == MNN_FORWARD_OPENCL || compute.type == MNN_FORWARD_METAL) { // AUTO set default gpu-mode MNN_GPU_TUNING_FAST compute.numThread = 16; } } compute.user = config.backendConfig; auto oplists = _scheduleUnit(net, config, allTensors); Schedule::BackendCache cache; cache.info = std::move(compute); result.emplace_back(std::make_pair(cache, std::move(oplists))); } scheduleInfo.pipelineInfo = std::move(result); // get all used op's output, drop unused op, won't change op order. always insert all Input Ops std::vector<const Op*> oplists; { for (std::pair<Schedule::BackendCache, vector<Schedule::OpCacheInfo>>& pipeline : scheduleInfo.pipelineInfo) { for (auto& info : pipeline.second) { oplists.push_back(info.op); } } } // set tensors' input/output usage by oplists info setInputOutputForOps(allTensors, oplists, net->usage() == Usage_INFERENCE_STATIC); // add output index by config info and outputName std::unordered_map<std::string, int> tensorNameIndexMap; for (int i = 0; i < net->tensorName()->size(); ++i) { tensorNameIndexMap[net->tensorName()->Get(i)->str()] = i; } bool userSetOutput = false; for (auto& config : configs) { userSetOutput = userSetOutput || (!config.saveTensors.empty()); for (const auto& name : config.saveTensors) { auto iter = tensorNameIndexMap.find(name); if (iter != tensorNameIndexMap.end()) { auto t = allTensors[iter->second].get(); if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::NORMAL) { TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::OUTPUT; } scheduleInfo.outputTensor.insert( std::make_pair(net->tensorName()->GetAsString(iter->second)->c_str(), t)); } else { MNN_PRINT("Bad outputname: %s\n", name.c_str()); } } } if (net->outputName()) { userSetOutput = userSetOutput || net->outputName()->size() >= 1; for (int i = 0; i < net->outputName()->size(); ++i) { std::string name = net->outputName()->Get(i)->str(); auto iter = tensorNameIndexMap.find(name); if (iter != tensorNameIndexMap.end()) { auto t = allTensors[iter->second].get(); if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::NORMAL) { TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::OUTPUT; } scheduleInfo.outputTensor.insert( std::make_pair(net->tensorName()->GetAsString(iter->second)->c_str(), t)); } } } if (scheduleInfo.outputTensor.empty()) { userSetOutput = false; } // add input/output tensor to schedule's input/output for (int index = 0; index < allTensors.size(); index++) { auto t = allTensors[index].get(); auto usage = TensorUtils::getDescribe(t)->usage; if (usage == Tensor::InsideDescribe::INPUT) { scheduleInfo.inputTensors.insert(std::make_pair(net->tensorName()->GetAsString(index)->c_str(), t)); } if (usage == Tensor::InsideDescribe::OUTPUT && (!userSetOutput)) { scheduleInfo.outputTensor.insert( std::make_pair(net->tensorName()->GetAsString(index)->c_str(), t)); } } if (net->usage() == Usage_INFERENCE_STATIC) { for (auto& pipInfo : scheduleInfo.pipelineInfo) { pipInfo.first.needComputeGeometry = false; pipInfo.first.needComputeShape = false; } } #ifndef MNN_BUILD_MINI for (auto iter = scheduleInfo.pipelineInfo.begin(); iter != scheduleInfo.pipelineInfo.end();) { if (!iter->first.needComputeGeometry) { // For static model don't need check const iter++; continue; } auto breakIndex = GeometryComputerUtils::buildConstantTensors(iter->second); if (breakIndex >= 0) { scheduleInfo.needInputContentForShape = true; } #ifdef MNN_SEPERTE_SIZE if (breakIndex >= 0 && (breakIndex + 1) < iter->second.size()) { // Split oplist std::vector<Schedule::PipelineInfo> fuse; std::vector<Schedule::PipelineInfo> separate; fuse.insert(fuse.begin(), iter->second.begin(), iter->second.begin() + breakIndex + 1); separate.insert(separate.begin(), iter->second.begin() + breakIndex + 1, iter->second.end()); oplists.clear(); iter->second = std::move(separate); iter = scheduleInfo.pipelineInfo.insert(iter, std::make_pair(iter->first, fuse)); iter++; iter++; } else { iter++; } #else iter++; #endif } #endif return true; } } // namespace MNN

source/core/Schedule.cpp (385 lines of code) (raw):