ErrorCode Pipeline::encode()

in source/core/Pipeline.cpp [244:473]


ErrorCode Pipeline::encode(bool supportDebug, bool permitCodegen) {
    auto& mBackend = mInfo.first.cache.first;
    auto& mBackupBackend = mInfo.first.cache.second;
    // Static Model just copy info to command buffer
    if (!mInfo.first.needComputeGeometry) {
        for (int i=0; i<mInfo.second.size(); ++i) {
            auto& info = mInfo.second[i];
            std::shared_ptr<Command> cmd(new Command);
            cmd->op      = info.op;
            if (cmd->op->type() == OpType_Raster) {
                // Compability for Origin Static Model
                cmd->outputs  = info.outputs;
                if (TensorUtils::getDescribe(info.outputs[0])->regions.empty() && info.inputs.size() > 0 && TensorUtils::getDescribe(info.inputs[0])->regions.size() > 0) {
                    TensorUtils::getDescribe(info.outputs[0])->regions = std::move(TensorUtils::getDescribe(info.inputs[0])->regions);
                    TensorUtils::setRasterInputs(cmd.get());
                } else {
                    cmd->inputs  = info.inputs;
                }
            } else {
                cmd->inputs  = info.inputs;
                cmd->outputs = info.outputs;
            }
            info.executeBuffer.command = {cmd};
        }
    } else {
#ifndef MNN_BUILD_MINI
        mBackend->onClearBuffer();
        mBackupBackend->onClearBuffer();
        mContext.clear();
        mContext.mNeedRelease = mGeometryNeedRelease;
        FileLoader l(mExternalFile.c_str());
        /** Size Compute and compute Const Begin */
        auto res = GeometryComputerUtils::shapeComputeAndGeometryTransform(mCpuRuntime, &l, mInfo.second, mContext, mInfo.first.cache.second, mUseGeometry, false, permitCodegen);
        if (res != NO_ERROR) {
            return res;
        }
#endif
    }
    // Propagate Scale and insert new command
    if (mIsQuantModel && (mBackend->type() == MNN_FORWARD_CPU || mBackend->type() == MNN_FORWARD_CPU_EXTENSION || mBackend->type() == MNN_FORWARD_CUDA || mBackend->type() == MNN_FORWARD_NN || mBackend->type() == MNN_FORWARD_OPENCL)) {
        // get propagate map
        using PropagateMap = std::map<const MNN::Tensor*, std::set<const MNN::Tensor*>>;
        PropagateMap forwardMap, backwardMap;
        auto insertPropagateMap = [](PropagateMap& propagateMap, const Tensor* s, const Tensor* t) {
            if (propagateMap.find(s) == propagateMap.end()) {
                propagateMap[s] = std::set<const Tensor*>({t});
            } else {
                propagateMap[s].insert(t);
            }
        };
        std::set<OpType> propagateOpTypes = { OpType_Raster, OpType_ReLU, OpType_ReLU6, OpType_Pooling,
                                              OpType_Interp, OpType_CropAndResize, OpType_ROIPooling, OpType_Gather,
                                              OpType_GatherV2, OpType_GatherV2, OpType_ScatterNd};
        for (auto& info : mInfo.second) {
            auto& buffer = info.executeBuffer;
            for (const auto& cmdP : buffer.command) {
                auto& cmd = *cmdP;
                const auto type = cmd.op->type();
                const auto output = cmd.outputs[0];
                if (propagateOpTypes.find(type) != propagateOpTypes.end()) {
                    for (auto t : cmd.inputs) {
                        insertPropagateMap(forwardMap, t, output);
                        insertPropagateMap(backwardMap, output, t);
                    }
                }
            }
        }
        auto getStart = [&forwardMap, &backwardMap](bool forward) {
            auto& propagateMap = forward ? forwardMap : backwardMap;
            auto& antiMap = forward ? backwardMap : forwardMap;
            // delete N->1 Map of Op
            for (const auto& iter : antiMap) {
                if (iter.second.size() > 1) {
                    for (auto t : iter.second) {
                        auto res = propagateMap.find(t);
                        if (res != propagateMap.end()) {
                            propagateMap.erase(res);
                        }
                    }
                }
            }
            std::set<const Tensor*> root, leaf, start;
            for (const auto& iter : propagateMap) {
                root.insert(iter.first);
                for (auto t : iter.second) {
                    leaf.insert(t);
                }
            }
            std::set_difference(root.begin(), root.end(), leaf.begin(), leaf.end(), std::inserter(start, start.begin()));
            return start;
        };
        auto forwardStart = getStart(true);
        auto backwardStart = getStart(false);
        // propagate scale
        auto propagateScale = [](PropagateMap& propagateMap, std::set<const Tensor*>& start) {
            std::function<bool(const Tensor*)> scalePropagate = [&propagateMap, &scalePropagate](const Tensor* t) {
                if (TensorUtils::getDescribe(t)->quantAttr.get() == nullptr) {
                    return false;
                }
                if (propagateMap.find(t) == propagateMap.end()) {
                    return false;
                }
                bool change = false;
                for (auto x : propagateMap[t]) {
                    if (TensorUtils::getDescribe(x)->quantAttr != TensorUtils::getDescribe(t)->quantAttr) {
                        TensorUtils::getDescribe(x)->quantAttr = TensorUtils::getDescribe(t)->quantAttr;
                        change = true;
                    }
                    change |= scalePropagate(x);
                }
                return change;
            };
            bool change = false;
            for (auto t : start) {
                change |= scalePropagate(t);
            }
            return change;
        };
        for (int i = 0; i < 3 && (propagateScale(forwardMap, forwardStart) || propagateScale(backwardMap, backwardStart)); i++);
        
        // Insert cast
        std::map<const Tensor*, Tensor*> cachedCastTensor;
        for (auto& info : mInfo.second) {
            auto bufferCommand = std::move(info.executeBuffer.command);
            bool hasConvert = false;
            for (auto cmdP : bufferCommand) {
                auto& cmd = *cmdP;
                auto& outputs = cmd.outputs;
                auto& inputs = cmd.inputs;
                auto opType = cmd.op->type();
                // Check if need use quant op
                DataType runType = DataType_DT_FLOAT;
                bool useQuant = false;
                if (outputs.size() == 1) {
                    // Quant: output and all input has quantAttr and op support
                    if (TensorUtils::getDescribe(outputs[0])->quantAttr != nullptr) {
                        useQuant = _supportQuant(cmd.op, inputs, outputs, mBackend->type());
                    }
                    if (useQuant) {
                        for (auto t : inputs) {
                            if (TensorUtils::getDescribe(t)->quantAttr == nullptr) {
                                useQuant = false;
                                break;
                            }
                        }
                    }
                }
                if (useQuant) {
                    runType = DataType_DT_INT8;
                }
                
                for (auto o : outputs) {
                    auto quan = TensorUtils::getDescribe(o)->quantAttr;
                    if (nullptr != quan) {
                        TensorUtils::getDescribe(o)->type = runType;
                    }
                }
                auto makeCommand = [&cachedCastTensor, &info](CommandBuffer& cmdBuffer, Tensor* input, DataType runType) {
                    if (cachedCastTensor.find(input) != cachedCastTensor.end()) {
                        return cachedCastTensor[input];
                    }
                    std::shared_ptr<Tensor> wrapTensor(new Tensor);
                    TensorUtils::copyShape(input, wrapTensor.get(), true);
                    TensorUtils::setLinearLayout(wrapTensor.get());
                    auto des = TensorUtils::getDescribe(wrapTensor.get());
                    auto originDes = TensorUtils::getDescribe(input);
                    if (originDes->quantAttr != nullptr) {
                        des->quantAttr.reset(new QuantAttr);
                        *des->quantAttr = *originDes->quantAttr;
                        des->type = runType;
                    }
                    cmdBuffer.extras.emplace_back(wrapTensor);
                    std::shared_ptr<Command> command(new Command);
                    command->inputs = {input};
                    command->outputs = {wrapTensor.get()};
                    info.cacheBuffer.hasWrap = true;
                    flatbuffers::FlatBufferBuilder builder;
                    OpBuilder opB(builder);
                    if (runType == DataType_DT_INT8) {
                        opB.add_type(OpType_FloatToInt8);
                    } else {
                        opB.add_type(OpType_Int8ToFloat);
                    }
                    builder.Finish(opB.Finish());
                    command->buffer.reset(new BufferStorage);
                    command->buffer->storage = builder.ReleaseRaw(command->buffer->allocated_size, command->buffer->offset);
                    command->op = flatbuffers::GetRoot<Op>(command->buffer->buffer());
                    info.executeBuffer.command.emplace_back(std::move(command));
                    return wrapTensor.get();
                };
                // judge is it need CastWrap
                if (OpType_Raster == opType) {
                    for (int v=0; v<cmd.inputs.size(); ++v) {
                        auto input = cmd.inputs[v];
                        bool needCast = CPUBackend::getDataType(input) != runType;
                        if (needCast) {
                            cmd.inputs[v] = makeCommand(info.executeBuffer, input, runType);
                        }
                    }
                } else {
                    for (int i = 0; i < cmd.inputs.size(); i++) {
                        if (OpCommonUtils::opNeedContent(cmd.op, i) && inputs[i]->getType() != halide_type_of<int>()) {
                            bool needCast = CPUBackend::getDataType(inputs[i]) != runType;
                            if (needCast) {
                                cmd.inputs[i] = makeCommand(info.executeBuffer, inputs[i], runType);
                            }
                        }
                    }
                }
                info.executeBuffer.command.emplace_back(cmdP);
            }
        }
    }
    /** Prepare DebugInfo*/
    if (supportDebug) {
        mFlops = 0.0f;
        int totalIndex = 0;
        for (auto& info : mInfo.second) {
            auto& buffer = info.executeBuffer;
            int index = 0;
            for (auto& cmdP : buffer.command) {
                auto& cmd = *cmdP;
                cmd.info.reset(new UnitInfo);
                static_cast<UnitInfo*>(cmd.info.get())->setUp(cmd, index++, info.op, totalIndex++);
                mFlops += cmd.info->flops();
            }
        }
    }
    return NO_ERROR;
}