in src/parser.cpp [111:155]
bool DataParser::parse(
const std::vector<std::string>& tokens,
ParseResults& rslts) {
for (auto &token: tokens) {
if (token.find("__weight__") != std::string::npos) {
std::size_t pos = token.find(args_->weightSep);
if (pos != std::string::npos) {
rslts.weight = atof(token.substr(pos + 1).c_str());
}
continue;
}
string t = token;
float weight = 1.0;
if (args_->useWeight) {
std::size_t pos = token.find(args_->weightSep);
if (pos != std::string::npos) {
t = token.substr(0, pos);
weight = atof(token.substr(pos + 1).c_str());
}
}
if (args_->normalizeText) {
normalize_text(t);
}
int32_t wid = dict_->getId(t);
if (wid < 0) {
continue;
}
entry_type type = dict_->getType(wid);
if (type == entry_type::word) {
rslts.LHSTokens.push_back(make_pair(wid, weight));
}
if (type == entry_type::label) {
rslts.RHSTokens.push_back(make_pair(wid, weight));
}
}
if (args_->ngrams > 1) {
addNgrams(tokens, rslts.LHSTokens, args_->ngrams);
}
return check(rslts);
}