in src/fasttext.cc [680:723]
std::shared_ptr<Matrix> FastText::getInputMatrixFromFile(
const std::string& filename) const {
std::ifstream in(filename);
std::vector<std::string> words;
std::shared_ptr<DenseMatrix> mat; // temp. matrix for pretrained vectors
int64_t n, dim;
if (!in.is_open()) {
throw std::invalid_argument(filename + " cannot be opened for loading!");
}
in >> n >> dim;
if (dim != args_->dim) {
throw std::invalid_argument(
"Dimension of pretrained vectors (" + std::to_string(dim) +
") does not match dimension (" + std::to_string(args_->dim) + ")!");
}
mat = std::make_shared<DenseMatrix>(n, dim);
for (size_t i = 0; i < n; i++) {
std::string word;
in >> word;
words.push_back(word);
dict_->add(word);
for (size_t j = 0; j < dim; j++) {
in >> mat->at(i, j);
}
}
in.close();
dict_->threshold(1, 0);
dict_->init();
std::shared_ptr<DenseMatrix> input = std::make_shared<DenseMatrix>(
dict_->nwords() + args_->bucket, args_->dim);
input->uniform(1.0 / args_->dim, args_->thread, args_->seed);
for (size_t i = 0; i < n; i++) {
int32_t idx = dict_->getId(words[i]);
if (idx < 0 || idx >= dict_->nwords()) {
continue;
}
for (size_t j = 0; j < dim; j++) {
input->at(idx, j) = mat->at(i, j);
}
}
return input;
}