in congestion_control/CongestionControlEnv.cpp [182:224]
float CongestionControlEnv::computeReward(
const quic::utils::vector<NetworkState> &states) const {
// Reward function is a combinaton of throughput, delay and lost bytes.
// For throughput and delay, it makes sense to take the average, whereas
// for loss, we compute the total bytes lost over these states.
float avgThroughput = 0.0;
float avgDelay = 0.0;
float maxDelay = 0.0;
float totalLost = 0.0;
for (const auto &state : states) {
avgThroughput += state[Field::THROUGHPUT];
avgDelay += state[Field::DELAY];
maxDelay = std::max(maxDelay, state[Field::DELAY]);
totalLost += state[Field::LOST];
}
avgThroughput /= states.size();
avgDelay /= states.size();
// Undo normalization and convert to MB/sec for throughput and ms for
// delay.
float throughputMBps = avgThroughput * normBytes() * kBytesToMB;
float avgDelayMs = avgDelay * normMs();
float maxDelayMs = maxDelay * normMs();
float delayMs = (cfg_.maxDelayInReward ? maxDelayMs : avgDelayMs);
float lostMbits = totalLost * normBytes() * kBytesToMB;
float reward = 0.f;
if (cfg_.rewardLogRatio) {
reward =
cfg_.throughputFactor * log(cfg_.throughputLogOffset + throughputMBps) -
cfg_.delayFactor * log(cfg_.delayLogOffset + delayMs) -
cfg_.packetLossFactor * log(cfg_.packetLossLogOffset + lostMbits);
} else {
reward = cfg_.throughputFactor * throughputMBps -
cfg_.delayFactor * delayMs - cfg_.packetLossFactor * lostMbits;
}
VLOG(1) << "Num states = " << states.size()
<< " avg throughput = " << throughputMBps
<< " MB/sec, avg delay = " << avgDelayMs
<< " ms, max delay = " << maxDelayMs
<< " ms, total Mb lost = " << lostMbits << ", reward = " << reward;
return reward;
}