tools/quantization/calibration.cpp

// // calibration.cpp // MNN // // Created by MNN on 2019/04/23. // Copyright © 2018, Alibaba Group Holding Limited // #include "calibration.hpp" #include <cmath> #include <fstream> #include <iostream> #include <sstream> #include <string> #include <set> #include <algorithm> #include <MNN/ImageProcess.hpp> #include "flatbuffers/util.h" #include "logkit.h" #include "quantizeWeight.hpp" #include "rapidjson/document.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/prettywriter.h" //#define MNN_OPEN_TIME_TRACE #include <MNN/AutoTime.hpp> #include "Helper.hpp" #include "core/TensorUtils.hpp" #include "core/IDSTEncoder.hpp" #include <MNN/expr/ExprCreator.hpp> #include <MNN/expr/Executor.hpp> #include <MNN/expr/Module.hpp> #include "train/source/nn/NN.hpp" #include "train/source/datasets/ImageNoLabelDataset.hpp" #include "train/source/datasets/ImageDataset.hpp" #include "train/source/optimizer/SGD.hpp" #include "train/source/transformer/Transformer.hpp" #include "cpp/ConvertToFullQuant.hpp" #include "core/ConvolutionCommon.hpp" #include <MNN/expr/Expr.hpp> #include <fstream> #include <iostream> #define DUMP_NUM_DATA(type) \ auto data = tensor->host<type>(); \ for (int z = 0; z < outside; ++z) { \ for (int x = 0; x < width; ++x) { \ outputOs << data[x + z * width] << "\t"; \ } \ outputOs << "\n"; \ } #define DUMP_CHAR_DATA(type) \ auto data = tensor->host<type>(); \ for (int z = 0; z < outside; ++z) { \ for (int x = 0; x < width; ++x) { \ outputOs << static_cast<int>(data[x + z * width]) << "\t"; \ } \ outputOs << "\n"; \ } static void dumpTensor2File(const Tensor* tensor, const char* file) { std::ofstream outputOs(file); auto type = tensor->getType(); int dimension = tensor->buffer().dimensions; int width = 1; if (dimension > 1) { width = tensor->length(dimension - 1); } const int outside = tensor->elementSize() / width; const auto dataType = type.code; const auto dataBytes = type.bytes(); if (dataType == halide_type_float) { DUMP_NUM_DATA(float); } if (dataType == halide_type_int && dataBytes == 4) { DUMP_NUM_DATA(int32_t); } if (dataType == halide_type_uint && dataBytes == 1) { DUMP_CHAR_DATA(uint8_t); } if (dataType == halide_type_int && dataBytes == 1) { #ifdef MNN_USE_SSE auto data = tensor->host<uint8_t>(); for (int z = 0; z < outside; ++z) { for (int x = 0; x < width; ++x) { outputOs << (static_cast<int>(data[x + z * width]) - 128) << "\t"; } outputOs << "\n"; } #else DUMP_CHAR_DATA(int8_t); #endif } } using namespace MNN::CV; using namespace MNN::Train; using namespace MNN::Express; static std::vector<VARP> getModuleInputs(std::string file, const Module::Info* netInfo, std::vector<std::string> inputNames, std::vector<VARP> varInputs = {}) { #define LOAD_DATA(TYPE)\ std::ostringstream fileNameOs;\ fileNameOs << file << "/" << inputName << ".txt";\ auto fileName = fileNameOs.str();\ std::ifstream inputOs(fileName.c_str());\ if (inputOs.fail()) {\ MNN_ERROR("TESTERROR Can't open %s\n", fileName.c_str());\ continue;\ }\ for (int i=0; i<info->size; ++i) {\ double tempValue;\ inputOs >> tempValue;\ ptr[i] = tempValue;\ }\ #define LOAD_DATA_SHAPE() \ std::ostringstream file_name_os;\ file_name_os << file << "/input.json";\ auto file_name = file_name_os.str();\ std::ifstream input_os(file_name.c_str());\ if (input_os.fail()) {\ MNN_ERROR("Error: input.json does not exit in %s, use default shape.\n", file.c_str());\ } else {\ rapidjson::Document document;\ std::ostringstream json_os;\ json_os << input_os.rdbuf();\ document.Parse(json_os.str().c_str());\ if (document.HasParseError()) {\ MNN_ERROR("Invalid json: %s\n", file_name.c_str());\ }\ auto cfgObj = document.GetObject();\ if (cfgObj.HasMember("inputs")) {\ auto inputsInfo = document["inputs"].GetArray();\ for (auto iter = inputsInfo.begin(); iter !=inputsInfo.end(); iter++) {\ auto obj = iter->GetObject();\ std::string name = obj["name"].GetString();\ if (obj.HasMember("shape")) {\ auto dims = obj["shape"].GetArray();\ std::vector<int> shapes;\ for (auto iter = dims.begin(); iter != dims.end(); iter++) {\ shapes.emplace_back(iter->GetInt());\ }\ dyInputShape.insert(std::make_pair(name, shapes));\ }\ }\ }\ else {\ MNN_ERROR("input.json must have inputs infomation!\n");\ }\ }\ MNN_ASSERT(netInfo != nullptr || varInputs.size() > 0); auto sizeInput = 1; Dimensionformat dimFormat = NCHW; std::vector<Variable::Info> inputsInfo; if (netInfo) { sizeInput = netInfo->inputs.size(); inputsInfo = netInfo->inputs; dimFormat = netInfo->defaultFormat; } else { sizeInput = varInputs.size(); for (int i = 0; i < sizeInput; ++i) { inputsInfo.emplace_back(*(varInputs[i]->getInfo())); inputNames[i] = varInputs[i]->name(); } } std::vector<VARP> inputs; inputs.resize(sizeInput); // Load inputs std::map<std::string, std::vector<int>> dyInputShape; LOAD_DATA_SHAPE() for (int i=0; i<inputs.size(); ++i) { auto inputName = inputNames[i]; // Resize auto shapeIter = dyInputShape.find(inputName); if (shapeIter != dyInputShape.end()) { auto s = shapeIter->second; inputs[i] = _Input(s, dimFormat, inputsInfo[i].type); } auto info = inputs[i]->getInfo(); if (info->type == halide_type_of<float>()){ auto ptr = inputs[i]->writeMap<float>(); LOAD_DATA(float) } else { auto floatVar = _Input(info->dim, info->order, halide_type_of<float>()); auto ptr = floatVar->writeMap<float>(); LOAD_DATA(float) auto temp = _Cast(floatVar, info->type); inputs[i]->input(temp); } inputs[i] = _Convert(inputs[i], inputsInfo[i].order); } #undef LOAD_DATA #undef LOAD_DATA_SHAPE return inputs; } static int _getBpp(CV::ImageFormat format) { switch (format) { case CV::RGB: case CV::BGR: case CV::YCrCb: case CV::YUV: case CV::HSV: case CV::XYZ: return 3; case CV::RGBA: case CV::BGRA: return 4; case CV::GRAY: return 1; case CV::BGR555: case CV::BGR565: return 2; default: break; } return 0; } Calibration::Calibration(MNN::NetT* model, const uint8_t* modelBuffer, const int bufferSize, const std::string& configPath, std::string originalModelFile, std::string destModelFile) : _originalModel(model), _originalModelFile(originalModelFile), _destModelFile(destModelFile) { // when the format of input image is RGB/BGR, channels equal to 3, GRAY is 1 _channels = 3; rapidjson::Document document; { std::ifstream fileNames(configPath.c_str()); std::ostringstream output; output << fileNames.rdbuf(); auto outputStr = output.str(); document.Parse(outputStr.c_str()); if (document.HasParseError()) { MNN_ERROR("Invalid json\n"); mValid = false; return; } } auto picObj = document.GetObject(); _imageProcessConfig.filterType = CV::BILINEAR; _imageProcessConfig.destFormat = BGR; { if (picObj.HasMember("format")) { auto format = picObj["format"].GetString(); static std::map<std::string, ImageFormat> formatMap{{"BGR", BGR}, {"RGB", RGB}, {"GRAY", GRAY}, {"RGBA", RGBA}, {"BGRA", BGRA}}; if (formatMap.find(format) != formatMap.end()) { _imageProcessConfig.destFormat = formatMap.find(format)->second; } } } switch (_imageProcessConfig.destFormat) { case GRAY: _channels = 1; break; case RGB: case BGR: _channels = 3; break; case RGBA: case BGRA: _channels = 4; break; default: break; } _imageProcessConfig.sourceFormat = RGBA; _calibrationFileNum = 0; if (picObj.HasMember("mean")) { auto mean = picObj["mean"].GetArray(); int cur = 0; for (auto iter = mean.begin(); iter != mean.end(); iter++) { _imageProcessConfig.mean[cur++] = iter->GetFloat(); } } if (picObj.HasMember("normal")) { auto normal = picObj["normal"].GetArray(); int cur = 0; for (auto iter = normal.begin(); iter != normal.end(); iter++) { _imageProcessConfig.normal[cur++] = iter->GetFloat(); } } if (picObj.HasMember("center_crop_h")) { _preprocessConfig.centerCropHeight = picObj["center_crop_h"].GetFloat(); } if (picObj.HasMember("center_crop_w")) { _preprocessConfig.centerCropWidth = picObj["center_crop_w"].GetFloat(); } if (picObj.HasMember("width")) { _width = picObj["width"].GetInt(); _preprocessConfig.targetWidth = _width; } if (picObj.HasMember("height")) { _height = picObj["height"].GetInt(); _preprocessConfig.targetHeight = _height; } if (picObj.HasMember("batch_size")) { _batch = picObj["batch_size"].GetInt(); } if (picObj.HasMember("quant_bits")) { _quant_bits = picObj["quant_bits"].GetInt(); } if (!picObj.HasMember("path")) { MNN_ERROR("calibration data path not set in .json config file\n"); return; } _calibrationFilePath = picObj["path"].GetString(); if (picObj.HasMember("used_image_num")) { _calibrationFileNum = picObj["used_image_num"].GetInt(); } if (picObj.HasMember("used_sample_num")) { _calibrationFileNum = picObj["used_sample_num"].GetInt(); } if (picObj.HasMember("feature_quantize_method")) { std::string method = picObj["feature_quantize_method"].GetString(); if (Helper::featureQuantizeMethod.find(method) != Helper::featureQuantizeMethod.end()) { _featureQuantizeMethod = method; } else { MNN_ERROR("not supported feature quantization method: %s\n", method.c_str()); return; } } if (picObj.HasMember("weight_quantize_method")) { std::string method = picObj["weight_quantize_method"].GetString(); if (Helper::weightQuantizeMethod.find(method) != Helper::weightQuantizeMethod.end()) { _weightQuantizeMethod = method; } else { MNN_ERROR("not supported weight quantization method: %s\n", method.c_str()); return; } } DLOG(INFO) << "Use feature quantization method: " << _featureQuantizeMethod; DLOG(INFO) << "Use weight quantization method: " << _weightQuantizeMethod; if (picObj.HasMember("feature_clamp_value")) { float value = (int)picObj["feature_clamp_value"].GetFloat(); if (value < 0.0f || value > 127.0f) { MNN_ERROR("feature_clamp_value should be in (0, 127], got: %f\n", value); return; } _featureClampValue = value; } if (picObj.HasMember("weight_clamp_value")) { float value = (int)picObj["weight_clamp_value"].GetFloat(); if (value < 0.0f || value > 127.0f) { MNN_ERROR("weight_clamp_value should be in (0, 127], got: %f\n", value); return; } _weightClampValue = value; if (_quant_bits < 8) { _weightClampValue = (float)(1 << (_quant_bits - 1)) - 1.0f; } } DLOG(INFO) << "feature_clamp_value: " << _featureClampValue; DLOG(INFO) << "weight_clamp_value: " << _weightClampValue; if (_featureQuantizeMethod == "EMA") { _winogradOpt = true; } else { DLOG(INFO) << "winogradOpt only be available under EMA"; } if (picObj.HasMember("winogradOpt") && picObj["winogradOpt"].GetBool() == false) { DLOG(INFO) << "Close winogradOpt because set winogradOpt as false"; _winogradOpt = false; } if (picObj.HasMember("skip_quant_op_names")) { auto skip_quant_op_names = picObj["skip_quant_op_names"].GetArray(); for (auto iter = skip_quant_op_names.begin(); iter != skip_quant_op_names.end(); iter++) { std::string skip_quant_op_name = iter->GetString(); _skip_quant_ops.emplace_back(skip_quant_op_name); DLOG(INFO) << "skip quant op name: " << skip_quant_op_name; } } if (picObj.HasMember("debug")) { _debug = picObj["debug"].GetBool(); } _inputType = Helper::InputType::IMAGE; if (picObj.HasMember("input_type")) { std::string type = picObj["input_type"].GetString(); if (type == "sequence") { _inputType = Helper::InputType::SEQUENCE; } } _module.reset(Module::load({}, {}, originalModelFile.c_str())); auto moduleInfo = _module->getInfo(); for (int i = 0; i < moduleInfo->inputNames.size(); ++i) { mInputNames.emplace_back(moduleInfo->inputNames[i]); } for (int i = 0; i < moduleInfo->outputNames.size(); ++i) { mOutputNames.emplace_back(moduleInfo->outputNames[i]); } bool checkOutput = false; if (picObj.HasMember("inputs")) { auto inputsInfo = document["inputs"].GetArray(); for (auto iter = inputsInfo.begin(); iter !=inputsInfo.end(); iter++) { auto obj = iter->GetObject(); std::string name = obj["name"].GetString(); MNN_PRINT("input: %s\n", name.c_str()); if (obj.HasMember("value")) { float value = obj["value"].GetFloat(); mInputInfo.insert(std::make_pair(name, value)); } if (obj.HasMember("shape")) { auto dims = obj["shape"].GetArray(); std::vector<int> shapes; for (auto iter = dims.begin(); iter != dims.end(); iter++) { shapes.emplace_back(iter->GetInt()); } mInputShape.insert(std::make_pair(name, shapes)); } } } if (!picObj.HasMember("inputs")) { // User do not provide input names and shapes. std::string name = mInputNames[0]; std::vector<int> shape(4); auto dimensionFormat = moduleInfo->inputs[0].order; shape.resize(4); shape[0] = 1; if (dimensionFormat == NCHW || dimensionFormat == NC4HW4) { shape[1] = _getBpp(_imageProcessConfig.destFormat); shape[2] = _preprocessConfig.targetHeight; shape[3] = _preprocessConfig.targetWidth; } else { // NHWC shape[3] = _getBpp(_imageProcessConfig.destFormat); shape[1] = _preprocessConfig.targetHeight; shape[2] = _preprocessConfig.targetWidth; } mInputShape.insert(std::make_pair(name, shape)); } std::shared_ptr<ImageProcess> process(ImageProcess::create(_imageProcessConfig), ImageProcess::destroy); _process = process; // read images file names if (_calibrationFilePath.back() != '/') { _calibrationFilePath = _calibrationFilePath + "/"; } Helper::readClibrationFiles(_calibrationFiles, _calibrationFilePath.c_str(), &_calibrationFileNum); std::sort(_calibrationFiles.begin(), _calibrationFiles.end()); for (auto& op : _originalModel->oplists) { if (op->type == MNN::OpType_BatchNorm) { _featureQuantizeMethod = "EMA"; DLOG(INFO) << "this model has BatchNorm, use EMA quantize method instead"; break; } } for (auto& subgraph : _originalModel->subgraphs) { for (auto& op : subgraph->nodes) { if (op->type == MNN::OpType_BatchNorm) { _featureQuantizeMethod = "EMA"; DLOG(INFO) << "this model has BatchNorm, use EMA quantize method instead"; break; } } } MNN::ScheduleConfig config; config.backupType = MNN_FORWARD_CPU; config.numThread = 1; std::shared_ptr<Executor::RuntimeManager> rtmgr(Executor::RuntimeManager::createRuntimeManager(config)); if (_featureQuantizeMethod == "KL" || _featureQuantizeMethod == "ADMM") { rtmgr->setMode(Interpreter::Session_Debug); } _module.reset(Module::load(mInputNames, mOutputNames, originalModelFile.c_str(), rtmgr)); // rtmgr.mode->debug if (_debug) { _moduleOrigin.reset(Module::load(mInputNames, mOutputNames, originalModelFile.c_str(), rtmgr)); // rtmgr.mode->debug } if (_featureQuantizeMethod == "KL" || _featureQuantizeMethod == "ADMM") { _initMNNSession(modelBuffer, bufferSize); auto netInfo = _module->getInfo(); if (_inputType == Helper::SEQUENCE) { mInputs = getModuleInputs(_calibrationFiles[0], _module->getInfo(), mInputNames); } else { mInputs.resize(1); mInputs[0] = _Input(mInputShape[mInputNames[0]], netInfo->inputs[0].order, netInfo->inputs[0].type); auto inputTensor = (MNN::Tensor*)mInputs[0]->getTensor(); Helper::preprocessInput(_process.get(), _preprocessConfig, _calibrationFiles[0], inputTensor, _inputType, mInputs[0]); } _initMaps(); } } void Calibration::_initMNNSession(const uint8_t* modelBuffer, const int bufferSize) { _fake_quant_weights(); } void Calibration::_initMaps() { _featureInfo.clear(); _featureInfoOrigin.clear(); _tensorMap.clear(); // run mnn once, initialize featureMap, opInfo map MNN::TensorCallBackWithInfo before = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { std::string opName = info->name(); std::vector<std::string>::iterator iter = std::find(_skip_quant_ops.begin(), _skip_quant_ops.end(), opName); if (iter != _skip_quant_ops.end()) { return false; } for (auto t : nTensors) { auto des = TensorUtils::getDescribe(t); if (des->index >= 0) { _tensorMap[des->index] = t; } } if (Helper::gNotNeedFeatureOp.find(info->type()) == Helper::gNotNeedFeatureOp.end()) { int i = 0; for (auto t : nTensors) { if (TensorUtils::getDescribe(t)->index < 0) { continue; } if (_featureInfo.find(t) == _featureInfo.end() && MNN::TensorUtils::getDescribe(t)->memoryType != MNN::Tensor::InsideDescribe::MEMORY_VIRTUAL) { _featureInfo[t] = std::shared_ptr<TensorStatistic>( new TensorStatistic(t, _featureQuantizeMethod, opName + " input_tensor_" + flatbuffers::NumToString(i), _featureClampValue)); } i++; } } return false; }; MNN::TensorCallBackWithInfo after = [this](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { std::string opName = info->name(); std::vector<std::string>::iterator iter = std::find(_skip_quant_ops.begin(), _skip_quant_ops.end(), opName); if (iter != _skip_quant_ops.end()) { return true; } for (auto t : nTensors) { auto des = TensorUtils::getDescribe(t); if (des->index >= 0) { _tensorMap[des->index] = t; } } if (Helper::gNotNeedFeatureOp.find(info->type()) == Helper::gNotNeedFeatureOp.end()) { int i = 0; for (auto t : nTensors) { if (TensorUtils::getDescribe(t)->index < 0) { continue; } if (_featureInfo.find(t) == _featureInfo.end()) { _featureInfo[t] = std::shared_ptr<TensorStatistic>(new TensorStatistic(t, _featureQuantizeMethod, opName + " output_tensor_" + flatbuffers::NumToString(i), _featureClampValue)); } i++; } } return true; }; Express::Executor::getGlobalExecutor()->setCallBack(std::move(before), std::move(after)); auto outputs = _module->onForward(mInputs); for (auto& output_: outputs) { output_->readMap<float>(); } _featureInfoOrigin = _featureInfo; if (_featureQuantizeMethod == "KL") { // set the tensor-statistic method of input tensor as THRESHOLD_MAX auto inputTensorStatistic = _featureInfo.find(mInputs[0]->getTensor()); if (inputTensorStatistic != _featureInfo.end()) { inputTensorStatistic->second->setThresholdMethod(THRESHOLD_MAX); } } } void Calibration::_computeFeatureMapsRange() { // feed input data according to input images int count = 0; auto netInfo = _module->getInfo(); for (const auto& file: _calibrationFiles) { std::vector<VARP> inputs; for (auto& iter : _featureInfo) { iter.second->setVisited(false); } for (auto& iter : _featureInfo) { iter.second->resetUpdatedRangeFlags(); } if (_inputType == Helper::SEQUENCE) { mInputs = getModuleInputs(file, netInfo, mInputNames); } else { auto inputTensor = (MNN::Tensor*)mInputs[0]->getTensor(); Helper::preprocessInput(_process.get(), _preprocessConfig, file, inputTensor, _inputType, mInputs[0]); } MNN::TensorCallBackWithInfo before = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { for (auto t : nTensors) { if (TensorUtils::getDescribe(t)->index < 0) { continue; } if (_featureInfo.find(t) != _featureInfo.end()) { if (_featureInfo[t]->visited() == false) { _featureInfo[t]->updateRange(); } } } return true; }; MNN::TensorCallBackWithInfo after = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { for (auto t : nTensors) { if (TensorUtils::getDescribe(t)->index < 0) { continue; } if (_featureInfo.find(t) != _featureInfo.end()) { if (_featureInfo[t]->visited() == false) { _featureInfo[t]->updateRange(); } } } return true; }; Express::Executor::getGlobalExecutor()->setCallBack(std::move(before), std::move(after)); auto outputs = _module->onForward(mInputs); } } void Calibration::_collectFeatureMapsDistribution() { for (auto& iter : _featureInfo) { iter.second->resetDistribution(); } // feed input data according to input images int count = 0; for (const auto& file : _calibrationFiles) { count++; for (auto& iter : _featureInfo) { iter.second->setVisited(false); } for (auto& iter : _featureInfo) { iter.second->resetUpdatedDistributionFlag(); } auto netInfo = _module->getInfo(); std::vector<VARP> inputs; // Load inputs if (_inputType == Helper::SEQUENCE) { mInputs = getModuleInputs(file, netInfo, mInputNames); } else { auto inputTensor = (MNN::Tensor*)mInputs[0]->getTensor(); Helper::preprocessInput(_process.get(), _preprocessConfig, file, inputTensor, _inputType, mInputs[0]); } MNN::TensorCallBackWithInfo before = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { for (auto t : nTensors) { if (TensorUtils::getDescribe(t)->index < 0) { continue; } if (_featureInfo.find(t) != _featureInfo.end()) { if (_featureInfo[t]->visited() == false) { _featureInfo[t]->updateDistribution(); } } } return true; }; MNN::TensorCallBackWithInfo after = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { for (auto t : nTensors) { if (TensorUtils::getDescribe(t)->index < 0) { continue; } if (_featureInfo.find(t) != _featureInfo.end()) { if (_featureInfo[t]->visited() == false) { _featureInfo[t]->updateDistribution(); } } } return true; }; Express::Executor::getGlobalExecutor()->setCallBack(std::move(before), std::move(after)); auto outputs = _module->onForward(mInputs); MNN_PRINT("\rCollectFeatureDistribution: %.2lf %%", (float)count * 100.0f / (float)_calibrationFileNum); fflush(stdout); } MNN_PRINT("\n"); } void Calibration::_computeFeatureScaleKL() { _computeFeatureMapsRange(); _collectFeatureMapsDistribution(); _scales.clear(); for (auto& iter : _featureInfo) { AUTOTIME; _scales[iter.first] = iter.second->finishAndCompute(); } } void Calibration::_computeFeatureScaleADMM() { // feed input data according to input images int count = 0; auto netInfo = _module->getInfo(); std::vector<VARP> inputs(mInputNames.size()); std::vector<const MNN::Tensor*> inputTensors(mInputNames.size()); if (_inputType == Helper::IMAGE) { for (int i = 0; i < inputs.size(); ++i) { auto shape = mInputShape[mInputNames[i]]; shape[0] = _calibrationFileNum; inputs[i] = _Input(shape, netInfo->inputs[i].order, netInfo->inputs[i].type); inputTensors[i] = inputs[i]->getTensor(); } } auto dimType = MNN::Tensor::CAFFE_C4; if (netInfo->inputs[0].order == NHWC) { dimType = MNN::Tensor::TENSORFLOW; } for (const auto& file : _calibrationFiles) { if (_inputType == Helper::SEQUENCE) { inputs = getModuleInputs(file, netInfo, mInputNames); } else { auto inputPtr = inputTensors[0]->host<float>() + count * inputTensors[0]->stride(0); auto name = mInputNames[0]; std::shared_ptr<MNN::Tensor> tensor(MNN::Tensor::create(mInputShape[name], netInfo->inputs[0].type, inputPtr, dimType), MNN::Tensor::destroy); Helper::preprocessInput(_process.get(), _preprocessConfig, file, tensor.get(), _inputType, mInputs[0]); } count++; MNN_PRINT("\rProcessCalibrationFiles: %.2lf %%", (float)count * 100.0f / (float)_calibrationFileNum); fflush(stdout); } MNN_PRINT("\n"); _scales.clear(); const int totalLayers = static_cast<int32_t>(_featureInfo.size()); count = 0; MNN::TensorCallBackWithInfo before = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { if (Helper::gNotNeedFeatureOp.find(info->type()) == Helper::gNotNeedFeatureOp.end()) { for (auto t : nTensors) { if (_featureInfo.find(t) != _featureInfo.end()) { if (_featureInfo[t]->visited() == false) { _scales[t] = _featureInfo[t]->computeScaleADMM(); count++; MNN_PRINT("\rComputeADMM: %.2lf %%", (float)count * 100.0f / (float)totalLayers); fflush(stdout); } } } } return true; }; MNN::TensorCallBackWithInfo after = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { if (Helper::gNotNeedFeatureOp.find(info->type()) == Helper::gNotNeedFeatureOp.end()) { for (auto t : nTensors) { if (_featureInfo.find(t) != _featureInfo.end()) { if (_featureInfo[t]->visited() == false) { _scales[t] = _featureInfo[t]->computeScaleADMM(); count++; MNN_PRINT("\rComputeADMM: %.2lf %%", (float)count * 100.0f / (float)totalLayers); fflush(stdout); } } } } return true; }; Express::Executor::getGlobalExecutor()->setCallBack(std::move(before), std::move(after)); _module->onForward(inputs); MNN_PRINT("\n"); } void Calibration::_fake_quant_weights() { auto findAbsMax = [&] (const float* weights, const int size) { float absMax = 0; for (int i = 0; i < size; i++) { if (std::fabs(weights[i]) > absMax) { absMax = std::fabs(weights[i]); } } return absMax; }; for (const auto& op : _originalModel->oplists) { std::vector<std::string>::iterator iter = std::find(_skip_quant_ops.begin(), _skip_quant_ops.end(), op->name); if (iter != _skip_quant_ops.end()) { continue; } const auto opType = op->type; if (opType != MNN::OpType_Convolution && opType != MNN::OpType_ConvolutionDepthwise) { continue; } auto param = op->main.AsConvolution2D(); const int kernelNum = param->common->outputCount; std::vector<float> weights = param->weight; const int weightSize = static_cast<int32_t>(weights.size()); const int kernelSize = weightSize / kernelNum; for (int i = 0; i < kernelNum; i++) { const int offset = i * kernelSize; float absMax = findAbsMax(weights.data() + offset, kernelSize); float scale = absMax / _weightClampValue; if (absMax < 1e-6f) { scale = absMax; } for (int j = 0; j < kernelSize; j++) { float value = weights[offset + j]; float quantValue = std::round(value / scale); float clampedValue = std::max(std::min(quantValue, _weightClampValue), -_weightClampValue); float dequantValue = scale * clampedValue; param->weight[offset + j] = dequantValue; } } } DLOG(INFO) << "fake quant weights done."; } void Calibration::_insertScale() { for (const auto iter : _scales) { std::unique_ptr<MNN::TensorDescribeT> describe(new MNN::TensorDescribeT); auto des = TensorUtils::getDescribe(iter.first); if (des->index < 0) { continue; } describe->index = des->index; describe->quantInfo.reset(new MNN::TensorQuantInfoT); describe->quantInfo->scale = iter.second.first; describe->quantInfo->zero = iter.second.second; describe->quantInfo->type = MNN::DataType_DT_INT8; describe->quantInfo->min = -1 * _featureClampValue; describe->quantInfo->max = 1 * _featureClampValue; _originalModel->extraTensorDescribe.emplace_back(std::move(describe)); } for (const auto& op : _originalModel->oplists) { const auto opType = op->type; std::vector<std::string>::iterator iter = std::find(_skip_quant_ops.begin(), _skip_quant_ops.end(), op->name); if (iter != _skip_quant_ops.end()) { continue; } if (opType != MNN::OpType_Convolution && opType != MNN::OpType_ConvolutionDepthwise && opType != MNN::OpType_Deconvolution) { continue; } if (op->inputIndexes.size() > 1) { continue; } auto inputTensor = _tensorMap[op->inputIndexes[0]]; auto outputTensor = _tensorMap[op->outputIndexes[0]]; if (inputTensor == nullptr || outputTensor == nullptr) { continue; } // below is Conv/DepthwiseConv weight quant const float inputScale = _scales[inputTensor].first; const float outputScale = _scales[outputTensor].first; const int inputChannel = inputTensor->channel(); const int outputChannel = outputTensor->channel(); auto param = op->main.AsConvolution2D(); param->common->inputCount = inputChannel; const int channles = param->common->outputCount; param->symmetricQuan.reset(new MNN::QuantizedFloatParamT); param->symmetricQuan->nbits = _quant_bits; const float* originWeight = param->weight.data(); int originWeightSize = static_cast<int32_t>(param->weight.size()); auto conv2d = param; std::shared_ptr<ConvolutionCommon::Int8Common> quanCommon; std::unique_ptr<Tensor> externalWeightTensor, externalBiasTensor; if (nullptr != conv2d->quanParameter.get()) { flatbuffers::FlatBufferBuilder tempBuilder; /* tempBuilder.Finish(IDSTQuan::Pack(tempBuilder, conv2d->quanParameter.get())); tempBuilder.Finish(Convolution2D::Pack(tempBuilder, conv2d)); auto conv2d = flatbuffers::GetRoot<Convolution2D>(tempBuilder.GetBufferPointer()); */ tempBuilder.Finish(Op::Pack(tempBuilder, op.get())); auto pack_op = flatbuffers::GetRoot<Op>(tempBuilder.GetBufferPointer()); bool forceFloat = true; quanCommon = ConvolutionCommon::load(pack_op, nullptr, true, true); // Back to float originWeight = quanCommon->weightFloat.get(); originWeightSize = quanCommon->weightFloat.size(); } const int weightSize = originWeightSize; std::vector<int8_t> quantizedWeight(weightSize); std::vector<float> quantizedWeightScale(outputChannel); if (_weightQuantizeMethod == "MAX_ABS"){ SymmetricQuantizeWeight(originWeight, weightSize, quantizedWeight.data(), quantizedWeightScale.data(), outputChannel, _weightClampValue); } else if (_weightQuantizeMethod == "ADMM") { QuantizeWeightADMM(originWeight, weightSize, quantizedWeight.data(), quantizedWeightScale.data(), outputChannel, _weightClampValue); } param->quanParameter = IDSTEncoder::encode(originWeight, quantizedWeightScale, weightSize/channles, channles, false, quantizedWeight.data(), -_weightClampValue); param->quanParameter->scaleIn = inputScale; param->quanParameter->scaleOut = outputScale; if (param->common->relu6) { param->common->relu = true; param->common->relu6 = false; } param->weight.clear(); } } void Calibration::_computeQuantError() { std::map<std::string, std::vector<float>> overflowRatiosMap; std::map<std::string, std::vector<float>> tensorCosDistanceMap; std::map<std::string, std::vector<float>> fakeQuantedFeatures; MNN::TensorCallBackWithInfo before = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { if (info->type() == "Raster") { return true; } for (auto t : nTensors) { if (_featureInfo.find(t) != _featureInfo.end()) { if (_featureInfo[t]->visited() == false) { auto dequantFeatureAndOverflowRatio = _featureInfo[t]->fakeQuantFeature(); fakeQuantedFeatures[_featureInfo[t]->name()] = dequantFeatureAndOverflowRatio.first; overflowRatiosMap[_featureInfo[t]->name()].emplace_back(dequantFeatureAndOverflowRatio.second); } } } return true; }; MNN::TensorCallBackWithInfo after = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { for (auto t : nTensors) { if (_featureInfo.find(t) != _featureInfo.end()) { if (_featureInfo[t]->visited() == false) { auto dequantFeatureAndOverflowRatio = _featureInfo[t]->fakeQuantFeature(); fakeQuantedFeatures[_featureInfo[t]->name()] = dequantFeatureAndOverflowRatio.first; overflowRatiosMap[_featureInfo[t]->name()].emplace_back(dequantFeatureAndOverflowRatio.second); } } } return true; }; MNN::TensorCallBackWithInfo beforeOrigin = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { if (info->type() == "Raster") { return true; } for (auto t : nTensors) { if (_featureInfoOrigin.find(t) != _featureInfoOrigin.end()) { if (_featureInfoOrigin[t]->visited() == false) { auto name = _featureInfoOrigin[t]->name(); float cosDis = _featureInfoOrigin[t]->computeDistance(fakeQuantedFeatures[name]); tensorCosDistanceMap[name].emplace_back(cosDis); } } } return true; }; MNN::TensorCallBackWithInfo afterOrigin = [&](const std::vector<MNN::Tensor*>& nTensors, const MNN::OperatorInfo* info) { for (auto t : nTensors) { if (_featureInfoOrigin.find(t) != _featureInfoOrigin.end()) { if (_featureInfoOrigin[t]->visited() == false) { auto name = _featureInfoOrigin[t]->name(); float cosDis = _featureInfoOrigin[t]->computeDistance(fakeQuantedFeatures[name]); tensorCosDistanceMap[name].emplace_back(cosDis); } } } return true; }; int count = 0; for (const auto& file : _calibrationFiles) { count++; for (auto& iter : _featureInfo) { iter.second->setVisited(false); } for (auto& iter : _featureInfoOrigin) { iter.second->setVisited(false); } std::vector<VARP> inputs(mInputNames.size()); auto netInfo = _module->getInfo(); if (_inputType == Helper::SEQUENCE) { inputs = getModuleInputs(file, netInfo, mInputNames); } else { inputs.resize(1); inputs[0] = _Input(mInputShape[mInputNames[0]], netInfo->inputs[0].order, netInfo->inputs[0].type); Helper::preprocessInput(_process.get(), _preprocessConfig, file, (MNN::Tensor*)inputs[0]->getTensor(), _inputType, inputs[0]); } Express::Executor::getGlobalExecutor()->setCallBack(std::move(before), std::move(after)); _module->onForward(inputs); Express::Executor::getGlobalExecutor()->setCallBack(std::move(beforeOrigin), std::move(afterOrigin)); _moduleOrigin->onForward(inputs); MNN_PRINT("\rcomputeDistance: %.2lf %%", (float)count * 100.0f / (float)_calibrationFileNum); fflush(stdout); } MNN_PRINT("\n\nDebug info:\n\n"); for (auto& iter : tensorCosDistanceMap) { auto name = iter.first; float sumCos = 0.0f, sumOverflow = 0.0f; for (int i = 0; i < iter.second.size(); i++) { sumCos += iter.second[i]; sumOverflow += overflowRatiosMap[name][i]; } float avgCosDistance = sumCos / _calibrationFiles.size(); float avgOverflowRatio = sumOverflow / _calibrationFiles.size(); MNN_PRINT("%s: cos distance: %f, overflow ratio: %f\n", name.c_str(), avgCosDistance, avgOverflowRatio); } } void Calibration::_quantizeModelEMA() { auto varMap = Variable::loadMap(_originalModelFile.c_str()); if (varMap.empty()) { MNN_ERROR("Can not load model %s\n", _originalModelFile.c_str()); return; } auto inputOutputs = Variable::getInputAndOutput(varMap); auto varInputs = Variable::mapToSequence(inputOutputs.first); auto varOutputs = Variable::mapToSequence(inputOutputs.second); auto originInfo = varInputs[0]->getInfo(); auto originFormat = NC4HW4; auto originType = halide_type_of<float>(); std::vector<int> originDims; if (nullptr != originInfo) { originFormat = originInfo->order; originDims = originInfo->dim; originType = originInfo->type; } _module.reset(NN::extract(varInputs, varOutputs, true), Module::destroy); NN::turnQuantize(_module.get(), _quant_bits, NN::PerTensor, NN::MovingAverage, _winogradOpt); auto exe = Executor::getGlobalExecutor(); BackendConfig config; exe->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 1); std::shared_ptr<SGD> solver(new SGD(_module)); solver->setLearningRate(1e-5); solver->setMomentum(0.9f); solver->setWeightDecay(0.00004f); DLOG(INFO) << "batch size: " << _batch; DLOG(INFO) << "quant bits: " << _quant_bits; if (_calibrationFileNum < _batch) { MNN_ERROR("_calibrationFileNum %d < batch size %d, set batch size as %d\n", _calibrationFileNum, _batch, _calibrationFileNum); _batch = _calibrationFileNum; if (_inputType == Helper::SEQUENCE) { _batch = 1; } } const int trainIterations = _calibrationFileNum / _batch; _module->clearCache(); exe->gc(Executor::FULL); _module->setIsTraining(true); for (int it = 0; it < trainIterations; it++) { std::vector<VARP> inputs(varInputs.size()); // inputs[i].dim=[batch, c, h, w] int indicesStart = it * _batch; int indicesEnd = indicesStart + _batch; // Init batch size inputs if (_inputType == Helper::IMAGE) { inputs[0] = _Input({_batch, originDims[1], originDims[2], originDims[3]}, originFormat, originType); } // Compose batch input for (int k = indicesStart; k < indicesEnd; ++k) { const auto file = _calibrationFiles[k]; if (_inputType == Helper::SEQUENCE) { inputs = getModuleInputs(file, nullptr, mInputNames, varInputs); } else { auto singleInput = _Input(originDims, originFormat, originType); auto inputTensor = (MNN::Tensor*)singleInput->getTensor(); Helper::preprocessInput(_process.get(), _preprocessConfig, file, inputTensor, _inputType, inputs[0]); ::memcpy(inputs[0]->writeMap<float>() + (k - indicesStart) * inputTensor->elementSize(), inputTensor->host<float>(), inputTensor->elementSize() * sizeof(float)); } } auto predicts = _module->onForward(inputs); for (auto &output: predicts) { auto ptr = output->readMap<float>(); } MNN_PRINT("\rquantize with EMA: %.2lf %%", (it + 1) * 100.0f / trainIterations); fflush(stdout); solver->step(_Scalar<float>(0.0f)); } MNN_PRINT("\n"); _module->setIsTraining(false); exe->gc(Executor::PART); std::vector<VARP> inputsForward; inputsForward.resize(varInputs.size()); for (int i = 0; i < inputsForward.size(); ++i) { auto name = varInputs[i]->name(); auto input = _Input(varInputs[i]->getInfo()->dim, varInputs[i]->getInfo()->order, varInputs[i]->getInfo()->type); input->setName(name); inputsForward[i] = input; } auto predicts = _module->onForward(inputsForward); Transformer::turnModelToInfer()->onExecute(predicts); for (int i = 0; i < predicts.size(); i++) { predicts[i]->setName(varOutputs[i]->name()); } Variable::save(predicts, _destModelFile.c_str()); ConvertToFullQuant::convert(_destModelFile); std::unique_ptr<MNN::NetT> netT; { std::ifstream input(_destModelFile, std::ifstream::in | std::ifstream::binary); std::ostringstream outputOs; outputOs << input.rdbuf(); netT = MNN::UnPackNet(outputOs.str().c_str()); } ComputeUnaryBuffer(netT.get()); { flatbuffers::FlatBufferBuilder builderOutput(1024); builderOutput.ForceDefaults(true); auto len = MNN::Net::Pack(builderOutput, netT.get()); builderOutput.Finish(len); std::ofstream output(_destModelFile, std::ofstream::binary); output.write((const char*)builderOutput.GetBufferPointer(), builderOutput.GetSize()); } } void Calibration::runQuantizeModel() { if (_featureQuantizeMethod == "EMA") { _quantizeModelEMA(); return; } if (_featureQuantizeMethod == "KL") { _computeFeatureScaleKL(); } else if (_featureQuantizeMethod == "ADMM") { _computeFeatureScaleADMM(); } if (_debug) { _computeQuantError(); } _insertScale(); ComputeUnaryBuffer(_originalModel); { flatbuffers::FlatBufferBuilder builderOutput(1024); builderOutput.ForceDefaults(true); auto len = MNN::Net::Pack(builderOutput, _originalModel); builderOutput.Finish(len); std::ofstream output(_destModelFile, std::ofstream::binary); output.write((const char*)builderOutput.GetBufferPointer(), builderOutput.GetSize()); } } void Calibration::dumpTensorScales(const std::string& modelFile) { rapidjson::StringBuffer sb; rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(sb); writer.StartArray(); for (auto iter = _originalModel->oplists.begin(); iter != _originalModel->oplists.end(); iter++) { auto op = iter->get(); const auto opType = op->type; const auto name = op->name; if (opType == MNN::OpType_Raster) { continue; } writer.StartObject(); writer.Key("name"); writer.String(rapidjson::StringRef(name.c_str(), name.size())); auto& inputIndexes = op->inputIndexes; const int inputSize = static_cast<int32_t>(inputIndexes.size()); if (inputSize > 0) { writer.Key("inputs"); writer.StartArray(); for (int i = 0; i < inputSize; ++i) { const auto curInputIndex = inputIndexes[i]; auto input = _tensorMap[curInputIndex]; auto inputOpScale = _scales[input]; writer.StartObject(); writer.Key("tensorIndex"); writer.Int(curInputIndex); writer.Key("scales"); writer.StartArray(); writer.Double(inputOpScale.first); writer.EndArray(); writer.Key("zeropoint"); writer.StartArray(); writer.Double(inputOpScale.second); writer.EndArray(); writer.EndObject(); } writer.EndArray(); } auto& outputIndexes = op->outputIndexes; const int outputSize = static_cast<int32_t>(outputIndexes.size()); if (outputSize > 0) { writer.Key("outputs"); writer.StartArray(); for (int i = 0; i < outputSize; ++i) { const auto curOutputIndex = outputIndexes[i]; auto output = _tensorMap[curOutputIndex]; auto outputOpScale = _scales[output]; writer.StartObject(); writer.Key("tensorIndex"); writer.Int(curOutputIndex); writer.Key("scales"); writer.StartArray(); writer.Double(outputOpScale.first); writer.EndArray(); writer.Key("zeropoint"); writer.StartArray(); writer.Double(outputOpScale.second); writer.EndArray(); writer.EndObject(); } writer.EndArray(); } writer.EndObject(); } writer.EndArray(); std::string scaleFile = modelFile + ".json"; std::ofstream os(scaleFile); if (os.is_open()) { os << sb.GetString() << std::endl; os.close(); } else { std::cerr << "open scale file " << scaleFile << " fail. error code:" << os.failbit << std::endl; } } typedef VARP (*unaryProc)(VARP input); static unaryProc selectUnaryProc(int type) { switch (type) { case UnaryOpOperation_ABS: return MNN::Express::_Abs; case UnaryOpOperation_SQUARE: return MNN::Express::_Square; case UnaryOpOperation_NEG: return MNN::Express::_Negative; case UnaryOpOperation_RSQRT: return MNN::Express::_Rsqrt; case UnaryOpOperation_EXP: return MNN::Express::_Exp; case UnaryOpOperation_COS: return MNN::Express::_Cos; case UnaryOpOperation_SIN: return MNN::Express::_Sin; case UnaryOpOperation_SIGMOID: return MNN::Express::_Sigmoid; case UnaryOpOperation_TANH: return MNN::Express::_Tanh; case UnaryOpOperation_TAN: return MNN::Express::_Tan; case UnaryOpOperation_ATAN: return MNN::Express::_Atan; case UnaryOpOperation_SQRT: return MNN::Express::_Sqrt; case UnaryOpOperation_RECIPROCAL: return MNN::Express::_Reciprocal; case UnaryOpOperation_LOG1P: return MNN::Express::_Log1p; case UnaryOpOperation_LOG: return MNN::Express::_Log; case UnaryOpOperation_ACOSH: return MNN::Express::_Acosh; case UnaryOpOperation_SINH: return MNN::Express::_Sinh; case UnaryOpOperation_ASINH: return MNN::Express::_Asinh; case UnaryOpOperation_ATANH: return MNN::Express::_Atanh; case UnaryOpOperation_SIGN: return MNN::Express::_Sign; case UnaryOpOperation_COSH: return MNN::Express::_Cosh; case UnaryOpOperation_ERF: return MNN::Express::_Erf; case UnaryOpOperation_ERFC: return MNN::Express::_Erfc; case UnaryOpOperation_ERFINV: return MNN::Express::_Erfinv; case UnaryOpOperation_EXPM1: return MNN::Express::_Expm1; case UnaryOpOperation_ASIN: return MNN::Express::_Asin; case UnaryOpOperation_ACOS: return MNN::Express::_Acos; case UnaryOpOperation_HARDSWISH: return MNN::Express::_Hardswish; case UnaryOpOperation_GELU: return MNN::Express::_Gelu; case UnaryOpOperation_SILU: return MNN::Express::_Silu; default: MNN_ASSERT(false); break; } return nullptr; } void Calibration::ComputeUnaryBuffer(MNN::NetT* net) { for (auto iter = net->oplists.begin(); iter != net->oplists.end(); ++iter) { auto op = iter->get(); auto opType = op->type; std::map<int, TensorDescribeT*> describes; for (auto& des : _originalModel->extraTensorDescribe) { describes.insert(std::make_pair(des->index, des.get())); } if (opType == MNN::OpType_Sigmoid || opType == MNN::OpType_TanH) { op->type = OpType_UnaryOp; op->main.value = new UnaryOpT; op->main.type = OpParameter_UnaryOp; op->main.AsUnaryOp()->opType = UnaryOpOperation_SIGMOID; if (opType == MNN::OpType_TanH) { op->main.AsUnaryOp()->opType = UnaryOpOperation_TANH; } opType = op->type; } if (opType == MNN::OpType_UnaryOp) { auto type = op->main.AsUnaryOp()->opType; if (type == UnaryOpOperation_ABS || type == UnaryOpOperation_NEG || type == UnaryOpOperation_SIGN) { continue; } auto outputId = op->outputIndexes[0]; if (describes.find(outputId) == describes.end()) { continue; } auto unaryDes = describes.find(outputId)->second; float outScale = unaryDes->quantInfo->scale; float outZero = unaryDes->quantInfo->zero; auto inputId = op->inputIndexes[0]; if (describes.find(inputId) == describes.end()) { continue; } op->main.AsUnaryOp()->tableInt8.resize(255); auto unaryParam = op->main.AsUnaryOp()->tableInt8.data(); unaryDes = describes.find(inputId)->second; float inpScale = unaryDes->quantInfo->scale; float inpZero = unaryDes->quantInfo->zero; // Read input data. std::vector<float> dataInput; float fx = 0.f; auto input = _Input({255}, NCHW, halide_type_of<float>()); input->setName("input_tensor"); auto ptr_in = input->template writeMap<float>(); for (int i = -127; i <= 127; ++i) { fx = (i - inpZero) * inpScale; dataInput.push_back(fx); ptr_in[i + 127] = fx; } input->unMap(); // Compute output data. VARP output; auto func = selectUnaryProc(type); if (nullptr == func) { MNN_ERROR("Don't support quantizing UnaryOP: %s to Int8\n", op->name.c_str()); } output = func(input); auto gotOutput = output->template readMap<float>(); // Write output data. int val; for (int i = 0; i < 255; ++i) { val = (int)roundf(gotOutput[i] / outScale) + outZero; if (val > 127) { val = 127; } if (val < -127) { val = -127; } unaryParam[i] = val; } } } } int quant_main(int argc, const char* argv[]) { if (argc < 4) { DLOG(INFO) << "Usage: ./quantized.out src.mnn dst.mnn preTreatConfig.json\n"; return 0; } const char* modelFile = argv[1]; const char* preTreatConfig = argv[3]; const char* dstFile = argv[2]; DLOG(INFO) << ">>> modelFile: " << modelFile; DLOG(INFO) << ">>> preTreatConfig: " << preTreatConfig; DLOG(INFO) << ">>> dstFile: " << dstFile; std::unique_ptr<MNN::NetT> netT; { std::shared_ptr<MNN::Interpreter> interp(MNN::Interpreter::createFromFile(modelFile), MNN::Interpreter::destroy); if (nullptr == interp.get()) { return 0; } netT = MNN::UnPackNet(interp->getModelBuffer().first); } // temp build net for inference flatbuffers::FlatBufferBuilder builder(1024); auto offset = MNN::Net::Pack(builder, netT.get()); builder.Finish(offset); int size = builder.GetSize(); auto ocontent = builder.GetBufferPointer(); // model buffer for creating mnn Interpreter std::unique_ptr<uint8_t> modelForInference(new uint8_t[size]); memcpy(modelForInference.get(), ocontent, size); std::unique_ptr<uint8_t> modelOriginal(new uint8_t[size]); memcpy(modelOriginal.get(), ocontent, size); netT.reset(); netT = MNN::UnPackNet(modelOriginal.get()); // quantize model's weight DLOG(INFO) << "Calibrate the feature and quantize model..."; std::shared_ptr<Calibration> calibration( new Calibration(netT.get(), modelForInference.get(), size, preTreatConfig, std::string(modelFile), std::string(dstFile))); if (!calibration->valid()) { return 0; } calibration->runQuantizeModel(); calibration->dumpTensorScales(dstFile); DLOG(INFO) << "Quantize model done!"; return 0; }

tools/quantization/calibration.cpp (1,302 lines of code) (raw):