in src/model.cpp [735:775]
void EmbedModel::loadTsvLine(string& line, int lineNum,
int cols, const string sep) {
vector<string> pieces;
static const string zero = "0.0";
// Strip trailing spaces
while (line.size() && isspace(line[line.size() - 1])) {
line.resize(line.size() - 1);
}
boost::split(pieces, line, boost::is_any_of(sep));
if (pieces.size() > (unsigned int)(cols + 1)) {
cout << "Hmm, truncating long (" << pieces.size() <<
") record at line " << lineNum;
if (true) {
for (size_t i = cols; i < pieces.size(); i++) {
cout << "Warning excess fields " << pieces[i]
<< "; misformatted file?";
}
}
pieces.resize(cols + 1);
}
if (pieces.size() == (unsigned int)cols) {
cout << "Missing record at line " << lineNum <<
"; assuming empty string";
pieces.insert(pieces.begin(), "");
}
while (pieces.size() < (unsigned int)(cols + 1)) {
cout << "Zero-padding short record at line " << lineNum;
pieces.push_back(zero);
}
auto idx = dict_->getId(pieces[0]);
if (idx == -1) {
if (pieces[0].size() > 0) {
cerr << "Failed to insert record: " << line << "\n";
}
return;
}
auto row = LHSEmbeddings_->row(idx);
for (int i = 0; i < cols; i++) {
row(i) = boost::lexical_cast<Real>(pieces[i + 1].c_str());
}
}