bool DataParser::parse()

in src/parser.cpp [111:155]


bool DataParser::parse(
    const std::vector<std::string>& tokens,
    ParseResults& rslts) {

  for (auto &token: tokens) {
    if (token.find("__weight__") != std::string::npos) {
      std::size_t pos = token.find(args_->weightSep);
      if (pos != std::string::npos) {
        rslts.weight = atof(token.substr(pos + 1).c_str());
      }
      continue;
    }
    string t = token;
    float weight = 1.0;
    if (args_->useWeight) {
      std::size_t pos = token.find(args_->weightSep);
      if (pos != std::string::npos) {
        t = token.substr(0, pos);
        weight = atof(token.substr(pos + 1).c_str());
      }
    }

    if (args_->normalizeText) {
      normalize_text(t);
    }
    int32_t wid = dict_->getId(t);
    if (wid < 0) {
      continue;
    }

    entry_type type = dict_->getType(wid);
    if (type == entry_type::word) {
      rslts.LHSTokens.push_back(make_pair(wid, weight));
    }
    if (type == entry_type::label) {
      rslts.RHSTokens.push_back(make_pair(wid, weight));
    }
  }

  if (args_->ngrams > 1) {
    addNgrams(tokens, rslts.LHSTokens, args_->ngrams);
  }

  return check(rslts);
}