void EmbedModel::loadTsvLine()

in src/model.cpp [735:775]


void EmbedModel::loadTsvLine(string& line, int lineNum,
                             int cols, const string sep) {
  vector<string> pieces;
  static const string zero = "0.0";
  // Strip trailing spaces
  while (line.size() && isspace(line[line.size() - 1])) {
    line.resize(line.size() - 1);
  }
  boost::split(pieces, line, boost::is_any_of(sep));
  if (pieces.size() > (unsigned int)(cols + 1)) {
    cout << "Hmm, truncating long (" << pieces.size() <<
        ") record at line " << lineNum;
    if (true) {
      for (size_t i = cols; i < pieces.size(); i++) {
        cout << "Warning excess fields " << pieces[i]
                      << "; misformatted file?";
      }
    }
    pieces.resize(cols + 1);
  }
  if (pieces.size() == (unsigned int)cols) {
    cout << "Missing record at line " << lineNum <<
      "; assuming empty string";
    pieces.insert(pieces.begin(), "");
  }
  while (pieces.size() < (unsigned int)(cols + 1)) {
    cout << "Zero-padding short record at line " << lineNum;
    pieces.push_back(zero);
  }
  auto idx = dict_->getId(pieces[0]);
  if (idx == -1) {
    if (pieces[0].size() > 0) {
      cerr << "Failed to insert record: " << line << "\n";
    }
    return;
  }
  auto row = LHSEmbeddings_->row(idx);
  for (int i = 0; i < cols; i++) {
    row(i) = boost::lexical_cast<Real>(pieces[i + 1].c_str());
  }
}