in src/data.cpp [93:152]
void InternDataHandler::convert(
const ParseResults& example,
ParseResults& rslt) const {
rslt.weight = example.weight;
rslt.LHSTokens.clear();
rslt.RHSTokens.clear();
rslt.LHSTokens.insert(rslt.LHSTokens.end(),
example.LHSTokens.begin(), example.LHSTokens.end());
if (args_->trainMode == 0) {
// lhs is the same, pick one random label as rhs
assert(example.LHSTokens.size() > 0);
assert(example.RHSTokens.size() > 0);
auto idx = rand() % example.RHSTokens.size();
rslt.RHSTokens.push_back(example.RHSTokens[idx]);
} else {
assert(example.RHSTokens.size() > 1);
if (args_->trainMode == 1) {
// pick one random label as rhs and the rest is lhs
auto idx = rand() % example.RHSTokens.size();
for (unsigned int i = 0; i < example.RHSTokens.size(); i++) {
auto tok = example.RHSTokens[i];
if (i == idx) {
rslt.RHSTokens.push_back(tok);
} else {
rslt.LHSTokens.push_back(tok);
}
}
} else
if (args_->trainMode == 2) {
// pick one random label as lhs and the rest is rhs
auto idx = rand() % example.RHSTokens.size();
for (unsigned int i = 0; i < example.RHSTokens.size(); i++) {
auto tok = example.RHSTokens[i];
if (i == idx) {
rslt.LHSTokens.push_back(tok);
} else {
rslt.RHSTokens.push_back(tok);
}
}
} else
if (args_->trainMode == 3) {
// pick two random labels, one as lhs and the other as rhs
auto idx = rand() % example.RHSTokens.size();
unsigned int idx2;
do {
idx2 = rand() % example.RHSTokens.size();
} while (idx2 == idx);
rslt.LHSTokens.push_back(example.RHSTokens[idx]);
rslt.RHSTokens.push_back(example.RHSTokens[idx2]);
} else
if (args_->trainMode == 4) {
// the first one as lhs and the second one as rhs
rslt.LHSTokens.push_back(example.RHSTokens[0]);
rslt.RHSTokens.push_back(example.RHSTokens[1]);
}
}
}