in source/core/Pipeline.cpp [244:473]
ErrorCode Pipeline::encode(bool supportDebug, bool permitCodegen) {
auto& mBackend = mInfo.first.cache.first;
auto& mBackupBackend = mInfo.first.cache.second;
// Static Model just copy info to command buffer
if (!mInfo.first.needComputeGeometry) {
for (int i=0; i<mInfo.second.size(); ++i) {
auto& info = mInfo.second[i];
std::shared_ptr<Command> cmd(new Command);
cmd->op = info.op;
if (cmd->op->type() == OpType_Raster) {
// Compability for Origin Static Model
cmd->outputs = info.outputs;
if (TensorUtils::getDescribe(info.outputs[0])->regions.empty() && info.inputs.size() > 0 && TensorUtils::getDescribe(info.inputs[0])->regions.size() > 0) {
TensorUtils::getDescribe(info.outputs[0])->regions = std::move(TensorUtils::getDescribe(info.inputs[0])->regions);
TensorUtils::setRasterInputs(cmd.get());
} else {
cmd->inputs = info.inputs;
}
} else {
cmd->inputs = info.inputs;
cmd->outputs = info.outputs;
}
info.executeBuffer.command = {cmd};
}
} else {
#ifndef MNN_BUILD_MINI
mBackend->onClearBuffer();
mBackupBackend->onClearBuffer();
mContext.clear();
mContext.mNeedRelease = mGeometryNeedRelease;
FileLoader l(mExternalFile.c_str());
/** Size Compute and compute Const Begin */
auto res = GeometryComputerUtils::shapeComputeAndGeometryTransform(mCpuRuntime, &l, mInfo.second, mContext, mInfo.first.cache.second, mUseGeometry, false, permitCodegen);
if (res != NO_ERROR) {
return res;
}
#endif
}
// Propagate Scale and insert new command
if (mIsQuantModel && (mBackend->type() == MNN_FORWARD_CPU || mBackend->type() == MNN_FORWARD_CPU_EXTENSION || mBackend->type() == MNN_FORWARD_CUDA || mBackend->type() == MNN_FORWARD_NN || mBackend->type() == MNN_FORWARD_OPENCL)) {
// get propagate map
using PropagateMap = std::map<const MNN::Tensor*, std::set<const MNN::Tensor*>>;
PropagateMap forwardMap, backwardMap;
auto insertPropagateMap = [](PropagateMap& propagateMap, const Tensor* s, const Tensor* t) {
if (propagateMap.find(s) == propagateMap.end()) {
propagateMap[s] = std::set<const Tensor*>({t});
} else {
propagateMap[s].insert(t);
}
};
std::set<OpType> propagateOpTypes = { OpType_Raster, OpType_ReLU, OpType_ReLU6, OpType_Pooling,
OpType_Interp, OpType_CropAndResize, OpType_ROIPooling, OpType_Gather,
OpType_GatherV2, OpType_GatherV2, OpType_ScatterNd};
for (auto& info : mInfo.second) {
auto& buffer = info.executeBuffer;
for (const auto& cmdP : buffer.command) {
auto& cmd = *cmdP;
const auto type = cmd.op->type();
const auto output = cmd.outputs[0];
if (propagateOpTypes.find(type) != propagateOpTypes.end()) {
for (auto t : cmd.inputs) {
insertPropagateMap(forwardMap, t, output);
insertPropagateMap(backwardMap, output, t);
}
}
}
}
auto getStart = [&forwardMap, &backwardMap](bool forward) {
auto& propagateMap = forward ? forwardMap : backwardMap;
auto& antiMap = forward ? backwardMap : forwardMap;
// delete N->1 Map of Op
for (const auto& iter : antiMap) {
if (iter.second.size() > 1) {
for (auto t : iter.second) {
auto res = propagateMap.find(t);
if (res != propagateMap.end()) {
propagateMap.erase(res);
}
}
}
}
std::set<const Tensor*> root, leaf, start;
for (const auto& iter : propagateMap) {
root.insert(iter.first);
for (auto t : iter.second) {
leaf.insert(t);
}
}
std::set_difference(root.begin(), root.end(), leaf.begin(), leaf.end(), std::inserter(start, start.begin()));
return start;
};
auto forwardStart = getStart(true);
auto backwardStart = getStart(false);
// propagate scale
auto propagateScale = [](PropagateMap& propagateMap, std::set<const Tensor*>& start) {
std::function<bool(const Tensor*)> scalePropagate = [&propagateMap, &scalePropagate](const Tensor* t) {
if (TensorUtils::getDescribe(t)->quantAttr.get() == nullptr) {
return false;
}
if (propagateMap.find(t) == propagateMap.end()) {
return false;
}
bool change = false;
for (auto x : propagateMap[t]) {
if (TensorUtils::getDescribe(x)->quantAttr != TensorUtils::getDescribe(t)->quantAttr) {
TensorUtils::getDescribe(x)->quantAttr = TensorUtils::getDescribe(t)->quantAttr;
change = true;
}
change |= scalePropagate(x);
}
return change;
};
bool change = false;
for (auto t : start) {
change |= scalePropagate(t);
}
return change;
};
for (int i = 0; i < 3 && (propagateScale(forwardMap, forwardStart) || propagateScale(backwardMap, backwardStart)); i++);
// Insert cast
std::map<const Tensor*, Tensor*> cachedCastTensor;
for (auto& info : mInfo.second) {
auto bufferCommand = std::move(info.executeBuffer.command);
bool hasConvert = false;
for (auto cmdP : bufferCommand) {
auto& cmd = *cmdP;
auto& outputs = cmd.outputs;
auto& inputs = cmd.inputs;
auto opType = cmd.op->type();
// Check if need use quant op
DataType runType = DataType_DT_FLOAT;
bool useQuant = false;
if (outputs.size() == 1) {
// Quant: output and all input has quantAttr and op support
if (TensorUtils::getDescribe(outputs[0])->quantAttr != nullptr) {
useQuant = _supportQuant(cmd.op, inputs, outputs, mBackend->type());
}
if (useQuant) {
for (auto t : inputs) {
if (TensorUtils::getDescribe(t)->quantAttr == nullptr) {
useQuant = false;
break;
}
}
}
}
if (useQuant) {
runType = DataType_DT_INT8;
}
for (auto o : outputs) {
auto quan = TensorUtils::getDescribe(o)->quantAttr;
if (nullptr != quan) {
TensorUtils::getDescribe(o)->type = runType;
}
}
auto makeCommand = [&cachedCastTensor, &info](CommandBuffer& cmdBuffer, Tensor* input, DataType runType) {
if (cachedCastTensor.find(input) != cachedCastTensor.end()) {
return cachedCastTensor[input];
}
std::shared_ptr<Tensor> wrapTensor(new Tensor);
TensorUtils::copyShape(input, wrapTensor.get(), true);
TensorUtils::setLinearLayout(wrapTensor.get());
auto des = TensorUtils::getDescribe(wrapTensor.get());
auto originDes = TensorUtils::getDescribe(input);
if (originDes->quantAttr != nullptr) {
des->quantAttr.reset(new QuantAttr);
*des->quantAttr = *originDes->quantAttr;
des->type = runType;
}
cmdBuffer.extras.emplace_back(wrapTensor);
std::shared_ptr<Command> command(new Command);
command->inputs = {input};
command->outputs = {wrapTensor.get()};
info.cacheBuffer.hasWrap = true;
flatbuffers::FlatBufferBuilder builder;
OpBuilder opB(builder);
if (runType == DataType_DT_INT8) {
opB.add_type(OpType_FloatToInt8);
} else {
opB.add_type(OpType_Int8ToFloat);
}
builder.Finish(opB.Finish());
command->buffer.reset(new BufferStorage);
command->buffer->storage = builder.ReleaseRaw(command->buffer->allocated_size, command->buffer->offset);
command->op = flatbuffers::GetRoot<Op>(command->buffer->buffer());
info.executeBuffer.command.emplace_back(std::move(command));
return wrapTensor.get();
};
// judge is it need CastWrap
if (OpType_Raster == opType) {
for (int v=0; v<cmd.inputs.size(); ++v) {
auto input = cmd.inputs[v];
bool needCast = CPUBackend::getDataType(input) != runType;
if (needCast) {
cmd.inputs[v] = makeCommand(info.executeBuffer, input, runType);
}
}
} else {
for (int i = 0; i < cmd.inputs.size(); i++) {
if (OpCommonUtils::opNeedContent(cmd.op, i) && inputs[i]->getType() != halide_type_of<int>()) {
bool needCast = CPUBackend::getDataType(inputs[i]) != runType;
if (needCast) {
cmd.inputs[i] = makeCommand(info.executeBuffer, inputs[i], runType);
}
}
}
}
info.executeBuffer.command.emplace_back(cmdP);
}
}
}
/** Prepare DebugInfo*/
if (supportDebug) {
mFlops = 0.0f;
int totalIndex = 0;
for (auto& info : mInfo.second) {
auto& buffer = info.executeBuffer;
int index = 0;
for (auto& cmdP : buffer.command) {
auto& cmd = *cmdP;
cmd.info.reset(new UnitInfo);
static_cast<UnitInfo*>(cmd.info.get())->setUp(cmd, index++, info.op, totalIndex++);
mFlops += cmd.info->flops();
}
}
}
return NO_ERROR;
}