in inference/src/translator/translation_model.cpp [56:104]
void TranslationModel::loadBackend(size_t idx) {
auto &graph = backend_[idx].graph;
auto &scorerEnsemble = backend_[idx].scorerEnsemble;
marian::DeviceId device_(idx, DeviceType::cpu);
graph = New<ExpressionGraph>(/*inference=*/true); // set the graph to be inference only
auto prec = options_->get<std::vector<std::string>>("precision", {"float32"});
graph->setDefaultElementType(typeFromString(prec[0]));
graph->setDevice(device_);
graph->getBackend()->configureDevice(options_);
graph->reserveWorkspaceMB(5);
// if memory_.models is populated, then all models were of binary format
if (memory_.models.size() >= 1) {
const std::vector<const void *> container = std::invoke([&]() {
std::vector<const void *> model_ptrs(memory_.models.size());
for (size_t i = 0; i < memory_.models.size(); ++i) {
const AlignedMemory &model = memory_.models[i];
ABORT_IF(model.size() == 0 || model.begin() == nullptr, "The provided memory is empty. Cannot load the model.");
ABORT_IF(
(uintptr_t)model.begin() % 256 != 0,
"The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
if (options_->get<bool>("check-bytearray", false)) {
ABORT_IF(!validateBinaryModel(model, model.size()),
"The binary file is invalid. Incomplete or corrupted download?");
}
model_ptrs[i] = model.begin();
LOG(debug, "Loaded model {} of {} from memory", (i + 1), model_ptrs.size());
}
return model_ptrs;
});
scorerEnsemble = createScorers(options_, container);
} else {
// load npz format models, or a mixture of binary/npz formats
scorerEnsemble = createScorers(options_);
LOG(debug, "Loaded {} model(s) from file", scorerEnsemble.size());
}
for (auto scorer : scorerEnsemble) {
scorer->init(graph);
if (shortlistGenerator_) {
scorer->setShortlistGenerator(shortlistGenerator_);
}
}
graph->forward();
}