float EmbedModel::trainOneBatch()

in src/model.cpp [353:485]


float EmbedModel::trainOneBatch(shared_ptr<InternDataHandler> data,
                           const vector<ParseResults>& batch_exs,
                           size_t negSearchLimit,
                           Real rate0,
                           bool trainWord) {

  using namespace boost::numeric::ublas;
  // Keep all the activations on the stack so we can asynchronously
  // update.

  int batch_sz = batch_exs.size();
  std::vector<Matrix<Real>> lhs(batch_sz), rhsP(batch_sz);
  std::vector<Real> posSim(batch_sz);
  std::vector<Real> labelRate(batch_sz, -rate0);

  auto cols = args_->dim;
  for (auto i = 0; i < batch_sz; i++) {
    const auto& items = batch_exs[i].LHSTokens;
    const auto& labels = batch_exs[i].RHSTokens;
    projectLHS(items, lhs[i]);
    check(lhs[i]);

    projectRHS(labels, rhsP[i]);
    check(rhsP[i]);
    posSim[i] = similarity(lhs[i], rhsP[i]);
  }

  // Some simple helpers to characterize the current triple we're
  // considering.
  auto tripleLoss = [&] (Real posSim, Real negSim) {
    auto val = args_->margin - posSim + negSim;
    assert(!isnan(posSim));
    assert(!isnan(negSim));
    assert(!isinf(posSim));
    assert(!isinf(negSim));
    // We want the max representable loss to have some wiggle room to
    // compute with.
    const auto kMaxLoss = 10e7;
    auto retval = (std::max)((std::min)(val, kMaxLoss), 0.0);
    return retval;
  };

  // Get a random batch of negatives
  std::vector<Matrix<Real>> rhsN(negSearchLimit);
  std::vector<std::vector<Base>> batch_negLabels;

  for (unsigned int i = 0; i < negSearchLimit; i++) {
    std::vector<Base> negLabels;
    if (trainWord) {
      data->getRandomWord(negLabels);
    } else {
      data->getRandomRHS(negLabels);
    }
    projectRHS(negLabels, rhsN[i]);;
    check(rhsN[i]);
    batch_negLabels.push_back(negLabels);
  }

  // Select negative examples
  Real total_loss = 0.0;
  std::vector<Real> loss(batch_sz);
  std::vector<Matrix<Real>> negMean(batch_sz);
  std::vector<int> num_negs(batch_sz);
  std::vector<std::vector<Real>> nRate(batch_sz);

  std::vector<std::vector<bool>> update_flag;
  update_flag.resize(batch_sz);

  for (int i = 0; i < batch_sz; i++) {
    num_negs[i] = 0;
    loss[i] = 0.0;
    negMean[i].matrix = zero_matrix<Real>(1, cols);
    update_flag[i].resize(negSearchLimit, false);
    nRate[i].resize(negSearchLimit, 0);

    for (unsigned int j = 0; j < negSearchLimit; j++) {
      nRate[i][j] = 0.0;
      if (batch_exs[i].RHSTokens == batch_negLabels[j]) {
        continue;
      }
      auto thisLoss = tripleLoss(posSim[i], similarity(lhs[i], rhsN[j]));
      if (thisLoss > 0.0) {
        num_negs[i]++;
        loss[i] += thisLoss;
        negMean[i].add(rhsN[j]);
        assert(loss[i] >= 0.0);
        update_flag[i][j] = true;
        if (num_negs[i] == args_->maxNegSamples) {
          break;
        }
      }
    }
    if (num_negs[i] == 0) {
      continue;
    }
    loss[i] /= negSearchLimit;
    negMean[i].matrix /= num_negs[i];
    total_loss += loss[i];
    // gradW for i
    negMean[i].add(rhsP[i], -1);
    for (unsigned int j = 0; j < negSearchLimit; j++) {
      if (update_flag[i][j]) {
        nRate[i][j] = rate0 / num_negs[i];
      }
    }
  }

  // Couldn't find a negative example given reasonable effort, so
  // give up.
  if (total_loss == 0.0) return 0.0;
  assert(!std::isinf(total_loss));
  if (rate0 == 0.0) return total_loss;

  // Let w be the average of the input features, t+ be the positive
  // example and t- be the average of the negative examples.
  // Our error E is:
  //
  //    E = k - dot(w, t+) + dot(w, t-)
  //
  // Differentiating term-by-term we get:
  //
  //     dE / dw  = t- - t+
  //     dE / dt- = w
  //     dE / dt+ = -w
  //
  // gradW = \sum_i t_i- - t+. We're done with negMean, so reuse it.

  backward(batch_exs, batch_negLabels,
           negMean, lhs, num_negs,
           rate0, labelRate, nRate);

  return total_loss;
}