in src/starspace.cpp [281:316]
void StarSpace::loadBaseDocs() {
if (args_->basedoc.empty()) {
if (args_->fileFormat == "labelDoc") {
std::cerr << "Must provide base labels when label is featured.\n";
exit(EXIT_FAILURE);
}
for (int i = 0; i < dict_->nlabels(); i++) {
baseDocs_.push_back({ make_pair(i + dict_->nwords(), 1.0) });
baseDocVectors_.push_back(
model_->projectRHS({ make_pair(i + dict_->nwords(), 1.0) })
);
}
cout << "Predictions use " << dict_->nlabels() << " known labels." << endl;
} else {
cout << "Loading base docs from file : " << args_->basedoc << endl;
ifstream fin(args_->basedoc);
if (!fin.is_open()) {
std::cerr << "Base doc file cannot be opened for loading!" << std::endl;
exit(EXIT_FAILURE);
}
string line;
while (getline(fin, line)) {
vector<Base> ids;
parseDoc(line, ids, "\t ");
baseDocs_.push_back(ids);
auto docVec = model_->projectRHS(ids);
baseDocVectors_.push_back(docVec);
}
fin.close();
if (baseDocVectors_.size() == 0) {
std::cerr << "ERROR: basedoc file '" << args_->basedoc << "' is empty." << std::endl;
exit(EXIT_FAILURE);
}
cout << "Finished loading " << baseDocVectors_.size() << " base docs.\n";
}
}