bool LayerDataParser::parse()

in src/doc_parser.cpp [25:69]


bool LayerDataParser::parse(
    string& s,
    vector<Base>& feats,
    const string& sep) {

  // split each part into tokens
  vector<string> tokens;
  boost::split(tokens, s, boost::is_any_of(string(sep)));

  int start_idx = 0;
  float ex_weight = 1.0;
  if (tokens[0].find("__weight__") != std::string::npos) {
    std::size_t pos = tokens[0].find(args_->weightSep);
    if (pos != std::string::npos) {
        ex_weight = atof(tokens[0].substr(pos + 1).c_str());
    }
    start_idx = 1;
  }

  for (unsigned int i = start_idx; i < tokens.size(); i++) {
    string t = tokens[i];
    float weight = 1.0;
    if (args_->useWeight) {
      std::size_t pos = tokens[i].find(args_->weightSep);
      if (pos != std::string::npos) {
        t = tokens[i].substr(0, pos);
        weight = atof(tokens[i].substr(pos + 1).c_str());
      }
    }

    if (args_->normalizeText) {
      normalize_text(t);
    }
    int32_t wid = dict_->getId(t);
    if (wid != -1)  {
      feats.push_back(make_pair(wid, weight * ex_weight));
    }
  }

  if (args_->ngrams > 1) {
    addNgrams(tokens, feats, args_->ngrams);
  }

  return feats.size() > 0;
}