float CongestionControlEnv::computeReward()

in congestion_control/CongestionControlEnv.cpp [182:224]


float CongestionControlEnv::computeReward(
    const quic::utils::vector<NetworkState> &states) const {
  // Reward function is a combinaton of throughput, delay and lost bytes.
  // For throughput and delay, it makes sense to take the average, whereas
  // for loss, we compute the total bytes lost over these states.
  float avgThroughput = 0.0;
  float avgDelay = 0.0;
  float maxDelay = 0.0;
  float totalLost = 0.0;
  for (const auto &state : states) {
    avgThroughput += state[Field::THROUGHPUT];
    avgDelay += state[Field::DELAY];
    maxDelay = std::max(maxDelay, state[Field::DELAY]);
    totalLost += state[Field::LOST];
  }
  avgThroughput /= states.size();
  avgDelay /= states.size();

  // Undo normalization and convert to MB/sec for throughput and ms for
  // delay.
  float throughputMBps = avgThroughput * normBytes() * kBytesToMB;
  float avgDelayMs = avgDelay * normMs();
  float maxDelayMs = maxDelay * normMs();
  float delayMs = (cfg_.maxDelayInReward ? maxDelayMs : avgDelayMs);
  float lostMbits = totalLost * normBytes() * kBytesToMB;

  float reward = 0.f;
  if (cfg_.rewardLogRatio) {
    reward =
        cfg_.throughputFactor * log(cfg_.throughputLogOffset + throughputMBps) -
        cfg_.delayFactor * log(cfg_.delayLogOffset + delayMs) -
        cfg_.packetLossFactor * log(cfg_.packetLossLogOffset + lostMbits);
  } else {
    reward = cfg_.throughputFactor * throughputMBps -
             cfg_.delayFactor * delayMs - cfg_.packetLossFactor * lostMbits;
  }
  VLOG(1) << "Num states = " << states.size()
          << " avg throughput = " << throughputMBps
          << " MB/sec, avg delay = " << avgDelayMs
          << " ms, max delay = " << maxDelayMs
          << " ms, total Mb lost = " << lostMbits << ", reward = " << reward;
  return reward;
}