tools/converter/source/optimizer/PostConverter.cpp (542 lines of code) (raw):

// // PostConverter.cpp // MNNConverter // // Created by MNN on 2019/01/31. // Copyright © 2018, Alibaba Group Holding Limited // #include <unordered_set> #include <MNN/expr/Optimizer.hpp> #include <set> #include <MNN/expr/ExecutorScope.hpp> #include "PostConverter.hpp" #include "PostTreatUtils.hpp" #include "Program.hpp" #include "SubGraphComplete.hpp" #include "GenerateSubGraph.hpp" #include "TemplateMerge.hpp" #include "core/Backend.hpp" #include "RuntimeAttr.hpp" #include <MNN/expr/ExecutorScope.hpp> //#define MNN_POST_CONVERTER_DEBUG namespace MNN { namespace Express { static std::vector<int> NetInputIndices(const MNN::NetT* net) { std::vector<int> input_indices; for (const auto& op : net->oplists) { if (op->type == MNN::OpType_Input) { const auto& indices = op->outputIndexes; input_indices.insert(input_indices.end(), indices.begin(), indices.end()); } } return std::move(input_indices); } SubGraphProtoT* FindSubGraphByName(const std::vector<SubGraphProtoT*>& subgraphs, const std::string& subgraph_name) { for (SubGraphProtoT* subgraph : subgraphs) { if (subgraph->name == subgraph_name) { return subgraph; } } return nullptr; } bool CompleteSubGraph(const std::unordered_map<std::string, VARP>& inputs, const SubGraphProtoT* subgraph) { auto* ctx = Global<OptimizeContext>::Get(); auto config = Global<modelConfig>::Get(); MNN_ASSERT(ctx != nullptr); // Disable verbose for subgraph. bool verbose = ctx->verbose; ctx->verbose = false; std::vector<std::string> outputNames; for (auto o : subgraph->outputs) { outputNames.emplace_back(subgraph->tensors[o]); } std::vector<std::string> inputNames; for (auto index : subgraph->inputs) { inputNames.emplace_back(subgraph->tensors[index]); } SubGraphProtoT* mutable_subgraph = // NOLINT FindSubGraphByName(ctx->subgraphs, subgraph->name); MNN_ASSERT(mutable_subgraph == subgraph); std::unique_ptr<MNN::NetT> subnet(new MNN::NetT); subnet->oplists = std::move(mutable_subgraph->nodes); subnet->tensorName = mutable_subgraph->tensors; subnet->sourceType = ctx->source; subnet->outputName = outputNames; bool gDebug = false; if (gDebug) { flatbuffers::FlatBufferBuilder builder; builder.Finish(MNN::Net::Pack(builder, subnet.get())); std::ofstream output("temp.before_opt.mnn", std::ofstream::binary); output.write((const char*)builder.GetBufferPointer(), builder.GetSize()); } config->inSubGraph = true; std::unique_ptr<MNN::NetT> new_subnet = ctx->RunOptimize(subnet, inputs); config->inSubGraph = false; if (gDebug) { flatbuffers::FlatBufferBuilder builder; builder.Finish(MNN::Net::Pack(builder, new_subnet.get())); std::ofstream output("temp.after_opt.mnn", std::ofstream::binary); output.write((const char*)builder.GetBufferPointer(), builder.GetSize()); } mutable_subgraph->nodes = std::move(subnet->oplists); MNN::SubGraphProtoT* new_subgraph(new MNN::SubGraphProtoT); new_subgraph->name = mutable_subgraph->name; if (ctx->source != NetSource_ONNX) { new_subgraph->inputs = NetInputIndices(new_subnet.get()); } else { new_subgraph->inputs.resize(inputNames.size()); for (int i=0; i<inputNames.size(); ++i) { for (int j=0; j<new_subnet->tensorName.size(); ++j) { if (new_subnet->tensorName[j] == inputNames[i]) { new_subgraph->inputs[i] = j; break; } } } } new_subgraph->outputs.clear(); outputNames = new_subnet->outputName; for (auto& output : outputNames) { bool find = false; for (int i = 0; i < new_subnet->tensorName.size(); ++i) { if (new_subnet->tensorName[i] == output) { find = true; new_subgraph->outputs.emplace_back(i); break; } } if (!find) { MNN_ERROR("Can't find output for %s\n", output.c_str()); } } MNN_ASSERT(new_subgraph->outputs.size() == outputNames.size()); new_subgraph->nodes = std::move(new_subnet->oplists); new_subgraph->tensors = new_subnet->tensorName; MNN_ASSERT(!FindSubGraphByName(ctx->completed_subgraphs, new_subgraph->name)); ctx->completed_subgraphs.push_back(new_subgraph); // Recovery verbose. ctx->verbose = verbose; return true; } void RunNetPass(const std::vector<std::string>& passes, std::unique_ptr<MNN::NetT>& originNet) { for (auto pass : passes) { auto convert = PostConverter::get(pass); if (nullptr == convert) { LOG(INFO) << "Can't find pass of " << pass << "\n"; continue; } bool valid = convert->onExecute(originNet); if (!valid) { LOG(INFO) << "Run " << pass << "Error\n"; } } } std::unique_ptr<MNN::NetT> RunExtraPass(std::unique_ptr<MNN::NetT>& originNet, const std::unordered_map<std::string, VARP>& inputs) { auto program = MNN::Express::Program::create(originNet.get(), true, true); program->input(inputs, true); std::string pass = "TFExtra"; switch (originNet->sourceType) { case MNN::NetSource_TFLITE: pass = "TFliteExtra"; break; case MNN::NetSource_TENSORFLOW: pass = "TFExtra"; break; case MNN::NetSource_CAFFE: pass = "CaffeExtra"; break; case MNN::NetSource_ONNX: pass = "OnnxExtra"; break; case MNN::NetSource_TORCH: pass = "TorchExtra"; break; default: break; } auto& merge = MNN::Express::TemplateMerge::getInstance(pass); merge.onExecute(program->outputs()); originNet->oplists.clear(); originNet->tensorName.clear(); std::unique_ptr<MNN::NetT> newNet(new MNN::NetT); newNet->sourceType = originNet->sourceType; newNet->bizCode = originNet->bizCode; newNet->outputName = originNet->outputName; program->save(newNet.get()); return std::move(newNet); } std::unique_ptr<MNN::NetT> RunMergePass(std::unique_ptr<MNN::NetT>& originNet, const std::unordered_map<std::string, VARP>& inputs, PassPriority priority) { auto program = MNN::Express::Program::create(originNet.get(), true, true); auto boundary = program->input(inputs, true); std::string pass = "Merge"; auto& merge = MNN::Express::TemplateMerge::getInstance(pass); std::map<std::string, VARP> updateVars; merge.onExecute(program->outputs(), priority, updateVars, boundary); auto Update = [&](std::shared_ptr<Program> program, const std::vector<std::string>& tensorName) { program->updateVars(updateVars, tensorName); }; Update(program, originNet->tensorName); originNet->oplists.clear(); originNet->tensorName.clear(); std::unique_ptr<MNN::NetT> newNet(new MNN::NetT); newNet->sourceType = originNet->sourceType; newNet->bizCode = originNet->bizCode; newNet->outputName = originNet->outputName; program->save(newNet.get()); RunNetPass({"RemoveUnusefulOp"}, newNet); return std::move(newNet); } std::unique_ptr<MNN::NetT> optimizeNetImpl(std::unique_ptr<MNN::NetT>& originNet, const std::unordered_map<std::string, VARP>& inputs) { auto current = ExecutorScope::Current(); current->lazyEval = true; current->setLazyComputeMode(Executor::LAZY_FULL); current->getAttr()->externalFile = ".__convert_external_data.bin"; auto* ctx = Global<OptimizeContext>::Get(); MNN_ASSERT(ctx != nullptr); if (ctx->is_training) { LOG(INFO) << "convert model for training, reserve BatchNorm and Dropout"; } if (originNet->oplists.size() <= 0) { return nullptr; } std::vector<std::string> postConvertPass; postConvertPass = { // Separate Tensor for inplace op "RemoveInplace", // Remove Unuseful Op such as NoOp, Identity, Seq2Out, "RemoveUnusefulOp", // Remove Dropout, if `forTraining` flag is set, Dropout will be reserved "RemoveDropout", // Remove Dup op "FuseDupOp", // Remove Invalid Cast "RemoveInvalidCast", // Turn InnerProduct from Caffe / Onnx to Convolution "TransformInnerProduct", // Turn Im2Seq from Caffe to Reshape "TransformIm2Seq", // Turn Caffe's ShuffleChannel to compose op "TransformShuffleChannel", "MoveUnaryOpBeforeReshape", }; if (ctx->is_training) { std::vector<std::string>::iterator iter = postConvertPass.begin(); while (iter != postConvertPass.end()) { if (*iter == "RemoveDropout") { iter = postConvertPass.erase(iter); } else { iter++; } } } RunNetPass(postConvertPass, originNet); std::vector<std::string> midOptPass = { // Remove Dup op "FuseDupOp", // Remove Invalid Cast "RemoveInvalidCast" }; std::vector<std::unique_ptr<TensorDescribeT>> tensorDescribe; if (originNet->extraTensorDescribe.size() > 0) { tensorDescribe = std::move(originNet->extraTensorDescribe); } std::unique_ptr<MNN::NetT> newNet; newNet = std::move(RunExtraPass(originNet, inputs)); RunNetPass(midOptPass, newNet); newNet = std::move(RunMergePass(newNet, inputs, PASS_PRIORITY_FRONT)); newNet = std::move(RunMergePass(newNet, inputs, PASS_PRIORITY_HIGH)); std::vector<std::string> afterProgramConvert = { // Turn BatchNormal to Scale When inference, if `forTraining` flag is set, BN will be reserved "TransformBatchNormal", // expand ShapeN to N Shapes "ResolveTfShapeN", // WARNNING: should merge BN and Scale before Relu and Relu6 // Merge BN info Convolution, if `forTraining` flag is set, BN will be reserved "MergeBNToConvolution", // Merge Scale info Convolution "MergeScaleToConvolution", // Merge Relu Convolution "MergeReluToConvolution", // Merge Relu6 Convolution "MergeRelu6ToConvolution", // Merge Relu BinaryOp "MergeReluToBinaryOp", }; if (ctx->is_training) { std::vector<std::string>::iterator iter = afterProgramConvert.begin(); while (iter != afterProgramConvert.end()) { if (*iter == "TransformBatchNormal" || *iter == "MergeBNToConvolution") { iter = afterProgramConvert.erase(iter); } else { iter++; } } } RunNetPass(afterProgramConvert, newNet); newNet = std::move(RunMergePass(newNet, inputs, PASS_PRIORITY_MIDDLE)); afterProgramConvert = { "RemoveCopy", // Add tensor dimension format convert for NC4HW4 - NHWC / NC4HW4 - NCHW "AddTensorFormatConverter", // Turn group convolution to Slice - Convolution - Concat "TransformGroupConvolution", "TransformGroupConvolution3D", "FuseDupOp", // Remove output tensor convert "RemoveOutputTensorConvert", }; RunNetPass(afterProgramConvert, newNet); // Maybe eliminate the redundant quantize and dequantize ops, then remove // the unuseful `Identity`. newNet = std::move(RunMergePass(newNet, inputs, PASS_PRIORITY_LOW)); // Maybe eliminate the redundant tensor format ops, then remove the unuseful // `Identity`. newNet = std::move(RunMergePass(newNet, inputs, PASS_PRIORITY_LOW)); newNet = std::move(RunMergePass(newNet, inputs, PASS_PRIORITY_FINAL)); if (tensorDescribe.size() > 0) { newNet->extraTensorDescribe = std::move(tensorDescribe); } RunNetPass({"ReIndexTensor"}, newNet); RunNetPass({"ReIndexOnnxIfAlias"}, newNet); return std::move(newNet); } bool fuseConstIntoSubgraph(MNN::NetT* net, const std::vector<MNN::SubGraphProtoT*>& subgraphs) { if (subgraphs.empty()) { return false; } // Create Map for subGraphs // Key, protot, refcount std::map<std::string, std::pair<MNN::SubGraphProtoT*, int>> subGraphMaps; std::set<MNN::SubGraphProtoT*> modifiedSubGraph; for (auto s : subgraphs) { subGraphMaps.insert(std::make_pair(s->name, std::make_pair(s, 0))); } for (int i = 0; i < net->oplists.size(); ++i) { auto& op = net->oplists[i]; if (op->type == MNN::OpType_While) { auto param = op->main.AsWhileParam(); subGraphMaps[param->body_graph].second++; subGraphMaps[param->cond_graph].second++; continue; } if (op->type == MNN::OpType_If) { auto param = op->main.AsIfParam(); subGraphMaps[param->else_graph].second++; subGraphMaps[param->then_graph].second++; continue; } } // Try Merge Const into subgraph // Search all const op std::vector<int> constOpIndexes(net->tensorName.size(), -1); for (int i = 0; i < net->oplists.size(); ++i) { auto& op = net->oplists[i]; if (op->type == MNN::OpType_Const) { constOpIndexes[op->outputIndexes[0]] = i; } } // Try Merge for while std::set<int> removeConstOpIndexes; for (int opIndex = 0; opIndex < net->oplists.size(); ++opIndex) { auto& op = net->oplists[opIndex]; if (op->type != MNN::OpType_While) { continue; } auto param = op->main.AsWhileParam(); if (param->cond_graph.empty()) { // If cond_graph is empty, it come from onnx's loop // TODO: Support Loop from onnx continue; } auto body = subGraphMaps[param->body_graph]; auto cond = subGraphMaps[param->cond_graph]; // Don't support for shared subgrah's optimize if (body.second > 1 || cond.second > 1) { continue; } MNN_ASSERT(op->inputIndexes.size() == param->aliases_inputs.size()); // Merge into subgraph std::set<int> removeInputs; std::set<int> bodyInputRemove; std::set<int> condInputRemove; auto mergeToSubGraph = [](MNN::SubGraphProtoT* subGraph, std::set<int>& inputRemove, const MNN::OpT* constOp, const std::string& inputName) { // Merge Const Index to Body for (auto& inputIndex : subGraph->inputs) { if (subGraph->tensors[inputIndex] == inputName) { inputRemove.insert(inputIndex); for (int v = 0; v < subGraph->nodes.size(); ++v) { auto& subOp = subGraph->nodes[v]; if (subOp->type != MNN::OpType_Input) { continue; } if (subOp->outputIndexes[0] == inputIndex) { auto src = constOp->main.AsBlob(); subOp->type = MNN::OpType_Const; subOp->main.type = MNN::OpParameter_Blob; subOp->main.value = new MNN::BlobT; *subOp->main.AsBlob() = *src; break; } } break; } } return true; }; for (int subI = 0; subI < op->inputIndexes.size(); ++subI) { auto index = op->inputIndexes[subI]; auto constIndex = constOpIndexes[index]; if (constIndex < 0) { continue; } // Don't support for graph shared input if (param->aliases_inputs[subI]->data.size() != 1) { continue; } auto inputName = param->aliases_inputs[subI]->data[0]; // Don't support for const init and update next bool isUpdate = false; for (auto& update : param->aliases_updates) { for (auto updateName : update->data) { if (updateName == inputName) { isUpdate = true; break; } } if (isUpdate) { break; } } if (isUpdate) { continue; } // Count Refcount for const tensor int refCount = 0; for (int sub = constIndex + 1; sub < net->oplists.size(); ++sub) { auto& subOp = net->oplists[sub]; for (auto subIndex : subOp->inputIndexes) { if (subIndex == index) { refCount++; break; } } } if (refCount > 1) { // The const input is shared with other op continue; } auto& constOp = net->oplists[constIndex]; //FUNC_PRINT_ALL(constOp->name.c_str(), s); MNN_ASSERT(constOp->main.type == MNN::OpParameter_Blob); removeConstOpIndexes.insert(constIndex); mergeToSubGraph(body.first, bodyInputRemove, constOp.get(), inputName); mergeToSubGraph(cond.first, condInputRemove, constOp.get(), inputName); removeInputs.insert(subI); modifiedSubGraph.insert(body.first); modifiedSubGraph.insert(cond.first); // Release no needed Const Memory constOp->main.Reset(); } auto removeSubGraphInputs = [](MNN::SubGraphProtoT* subGraph, const std::set<int>& inputRemove) { auto originInput = std::move(subGraph->inputs); subGraph->inputs.clear(); for (auto index : originInput) { if (inputRemove.find(index) == inputRemove.end()) { subGraph->inputs.emplace_back(index); } } }; removeSubGraphInputs(body.first, bodyInputRemove); removeSubGraphInputs(cond.first, condInputRemove); // Remove no use input for while op auto originIndexes = std::move(op->inputIndexes); auto aliInputs = std::move(param->aliases_inputs); for (int subI = 0; subI < originIndexes.size(); ++subI) { if (removeInputs.find(subI) == removeInputs.end()) { op->inputIndexes.emplace_back(originIndexes[subI]); param->aliases_inputs.emplace_back(std::move(aliInputs[subI])); } } } if (removeConstOpIndexes.empty()) { return false; } auto originOpLists = std::move(net->oplists); for (int i = 0; i < originOpLists.size(); ++i) { if (removeConstOpIndexes.find(i) == removeConstOpIndexes.end()) { net->oplists.emplace_back(std::move(originOpLists[i])); } } // Try Optimize Subgraph for more const op get auto* ctx = Global<OptimizeContext>::Get(); std::unordered_map<std::string, VARP> empty; for (auto mutable_subgraph : modifiedSubGraph) { std::unique_ptr<MNN::NetT> subnet(new MNN::NetT); subnet->oplists = std::move(mutable_subgraph->nodes); subnet->tensorName = std::move(mutable_subgraph->tensors); subnet->sourceType = ctx->source; std::vector<std::string> inputNames; std::vector<std::string> outputNames; for (auto v: mutable_subgraph->inputs) { inputNames.emplace_back(subnet->tensorName[v]); } for (auto v: mutable_subgraph->outputs) { outputNames.emplace_back(subnet->tensorName[v]); } #ifdef MNN_POST_CONVERTER_DEBUG for (auto& v : outputNames) { FUNC_PRINT_ALL(v.c_str(), s); } FUNC_PRINT_ALL(mutable_subgraph->name.c_str(), s); #endif subnet->outputName = outputNames; std::unique_ptr<MNN::NetT> new_subnet = optimizeNetImpl(subnet, empty); mutable_subgraph->nodes = std::move(subnet->oplists); MNN::SubGraphProtoT* new_subgraph = mutable_subgraph; for (int i = 0; i < inputNames.size(); ++i) { auto& name = inputNames[i]; for (int v = 0; v < new_subnet->tensorName.size(); ++v) { if (new_subnet->tensorName[v] == name) { mutable_subgraph->inputs[i] = v; break; } } } for (int i = 0; i < outputNames.size(); ++i) { auto& name = outputNames[i]; for (int v = 0; v < new_subnet->tensorName.size(); ++v) { if (new_subnet->tensorName[v] == name) { mutable_subgraph->outputs[i] = v; break; } } } mutable_subgraph->nodes = std::move(new_subnet->oplists); mutable_subgraph->tensors = std::move(new_subnet->tensorName); } return true; } } // namespace Express } // namespace MNN using namespace MNN; using namespace MNN::Express; std::unique_ptr<MNN::NetT> optimizeNet(std::unique_ptr<MNN::NetT>& originNet, bool forTraining, modelConfig& config, const std::vector<std::string>& expectPasses) { Global<modelConfig>::Reset(&config); if (!expectPasses.empty()) { RunNetPass(expectPasses, originNet); return std::move(originNet); } std::unique_ptr<std::ofstream, void(*)(std::ofstream*)> externalFile( new std::ofstream(".__convert_external_data.bin", std::ios::binary), [](std::ofstream* fs){ fs->close(); delete fs; }); if (externalFile.get() && externalFile->is_open() && externalFile->good()) { config.externalFile = externalFile.get(); } else { config.externalFile = nullptr; } if (originNet->sourceType == NetSource_TENSORFLOW) { GenerateSubGraph(originNet); } std::vector<MNN::SubGraphProtoT*> subgraphs; for (auto& subgraph : originNet->subgraphs) { subgraphs.push_back(subgraph.get()); } OptimizeContext ctx; ctx.subgraphs = subgraphs; ctx.is_training = forTraining; ctx.verbose = true; ctx.source = originNet->sourceType; ctx.completed_subgraphs = {}; ctx.RunOptimize = optimizeNetImpl; Global<OptimizeContext>::Reset(&ctx); std::unordered_map<std::string, VARP> inputs, empty; // subgraph may depend on vars of outter subgraph or root net, getting vars of them need Program::create. // But program (create from unoptimize net) may have OpType_Extra op, causing vars can't do getInfo/readMap correctly, // then subgraph depend on it may convert failed (nullptr) or wrong (error shape) // RunOptimize won't use subgraph, so we can do it before other subgraph optimize safely std::unique_ptr<MNN::NetT> net = ctx.RunOptimize(originNet, empty); auto program = Program::create(net.get(), true, true); auto addVars = [&](std::shared_ptr<Program> program, const std::vector<std::string>& tensorName) { for (const auto& iter : program->vars()) { if (iter.first < tensorName.size() && iter.first >= 0) { auto name = tensorName[iter.first]; if (inputs.find(name) == inputs.end()) { inputs[name] = iter.second; } } } }; addVars(program, net->tensorName); // Reversing subgraph so we iterate them by topo order (like tree traversal), so every var used by subgraph be prepared std::reverse(ctx.subgraphs.begin(), ctx.subgraphs.end()); for (int idx = 0; idx < ctx.subgraphs.size(); ++idx) { // complete it first so OpType_Extra be removed CompleteSubGraph(inputs, ctx.subgraphs[idx]); auto new_graph = ctx.completed_subgraphs[idx]; auto subProgram = Program::create(new_graph, true, true); subProgram->input(inputs, true); // add vars of subgraph, so inner subgraph can use them addVars(subProgram, new_graph->tensors); } ctx.first_run = false; ctx.subgraphs = std::move(ctx.completed_subgraphs); // from inner to upper, make some optimize for subgraph is visable to outer graph and root std::reverse(ctx.subgraphs.begin(), ctx.subgraphs.end()); for (auto subgraph : ctx.subgraphs) { CompleteSubGraph(inputs, subgraph); } net = ctx.RunOptimize(net, empty); fuseConstIntoSubgraph(net.get(), ctx.completed_subgraphs); for (auto* subgraph : ctx.completed_subgraphs) { net->subgraphs.emplace_back(subgraph); } return std::move(net); }