quic/loss/QuicLossFunctions.h (391 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <folly/Chrono.h>
#include <folly/Optional.h>
#include <folly/io/async/AsyncTimeout.h>
#include <quic/QuicConstants.h>
#include <quic/api/Observer.h>
#include <quic/codec/Types.h>
#include <quic/common/TimeUtil.h>
#include <quic/congestion_control/CongestionController.h>
#include <quic/d6d/QuicD6DStateFunctions.h>
#include <quic/flowcontrol/QuicFlowController.h>
#include <quic/logging/QLoggerConstants.h>
#include <quic/state/QuicStateFunctions.h>
#include <quic/state/SimpleFrameFunctions.h>
#include <quic/state/StateData.h>
namespace quic {
// Forward-declaration
bool hasAckDataToWrite(const QuicConnectionStateBase& conn);
WriteDataReason hasNonAckDataToWrite(const QuicConnectionStateBase& conn);
std::chrono::microseconds calculatePTO(const QuicConnectionStateBase& conn);
/**
* Whether conn is having persistent congestion.
*
* Persistent congestion requires a time period much longer than crypto timer.
* This means no handshake packet should be in a persistent congestion range.
* Thus persistent congestion is per pnSpace, and it's AppData space only.
*
*/
bool isPersistentCongestion(
folly::Optional<std::chrono::microseconds> pto,
TimePoint lostPeriodStart,
TimePoint lostPeriodEnd,
const CongestionController::AckEvent& ack) noexcept;
inline std::ostream& operator<<(
std::ostream& os,
const LossState::AlarmMethod& alarmMethod) {
switch (alarmMethod) {
case LossState::AlarmMethod::EarlyRetransmitOrReordering:
os << "EarlyRetransmitOrReordering";
break;
case LossState::AlarmMethod::PTO:
os << "PTO";
break;
}
return os;
}
template <class ClockType = Clock>
std::pair<std::chrono::milliseconds, LossState::AlarmMethod>
calculateAlarmDuration(const QuicConnectionStateBase& conn) {
std::chrono::microseconds alarmDuration;
folly::Optional<LossState::AlarmMethod> alarmMethod;
TimePoint lastSentPacketTime =
conn.lossState.lastRetransmittablePacketSentTime;
auto lossTimeAndSpace = earliestLossTimer(conn);
if (lossTimeAndSpace.first) {
if (*lossTimeAndSpace.first > lastSentPacketTime) {
// We do this so that lastSentPacketTime + alarmDuration = lossTime
alarmDuration = std::chrono::duration_cast<std::chrono::microseconds>(
*lossTimeAndSpace.first - lastSentPacketTime);
} else {
// This should trigger an immediate alarm.
alarmDuration = 0us;
}
alarmMethod = LossState::AlarmMethod::EarlyRetransmitOrReordering;
} else {
auto ptoTimeout = calculatePTO(conn);
ptoTimeout *= 1ULL << std::min(conn.lossState.ptoCount, (uint32_t)31);
alarmDuration = ptoTimeout;
alarmMethod = LossState::AlarmMethod::PTO;
}
TimePoint now = ClockType::now();
std::chrono::milliseconds adjustedAlarmDuration{0};
// The alarm duration is calculated based on the last packet that was sent
// rather than the current time.
if (lastSentPacketTime + alarmDuration > now) {
adjustedAlarmDuration = folly::chrono::ceil<std::chrono::milliseconds>(
lastSentPacketTime + alarmDuration - now);
} else {
auto lastSentPacketNum =
conn.outstandings.packets.back().packet.header.getPacketSequenceNum();
VLOG(10) << __func__ << " alarm already due method=" << *alarmMethod
<< " lastSentPacketNum=" << lastSentPacketNum
<< " lastSentPacketTime="
<< lastSentPacketTime.time_since_epoch().count()
<< " now=" << now.time_since_epoch().count()
<< " alarm=" << alarmDuration.count() << "us"
<< " deadline="
<< (lastSentPacketTime + alarmDuration).time_since_epoch().count()
<< " " << conn;
}
DCHECK(alarmMethod.hasValue()) << "Alarm method must have a value";
return std::make_pair(adjustedAlarmDuration, *alarmMethod);
}
/*
* This function should be invoked after some event that is possible to change
* the loss detection timer, for example, write happened, timeout happened or
* packets are acked.
*/
template <class Timeout, class ClockType = Clock>
void setLossDetectionAlarm(QuicConnectionStateBase& conn, Timeout& timeout) {
/*
* We might have new data or lost data to send even if we don't have any
* outstanding packets. When we get a PTO event, it is possible that only
* cloned packets might be outstanding. Since cwnd might be set to min cwnd,
* we might not be able to send data. However we might still have data sitting
* in the buffers which is unsent or known to be lost. We should set a timer
* in this case to be able to send this data on the next PTO.
*/
bool hasDataToWrite = hasAckDataToWrite(conn) ||
(hasNonAckDataToWrite(conn) != WriteDataReason::NO_WRITE);
auto totalPacketsOutstanding = conn.outstandings.numOutstanding();
auto totalD6DProbesOutstanding = conn.d6d.outstandingProbes;
/*
* We have this condition to disambiguate the case where we have.
* (1) All outstanding packets (except for d6d probes) that are clones that
* are processed and there is no data to write.
* (2) All outstanding (except for d6d probes) are clones that are processed
* and there is data to write.
* If there are only clones with no data, then we don't need to set the timer.
* This will free up the evb. However after a PTO verified event, clones take
* up space in cwnd. If we have data left to write, we would not be able to
* write them since we could be blocked by cwnd. So we must set the loss timer
* so that we can write this data with the slack packet space for the clones.
*/
if (!hasDataToWrite && conn.outstandings.packetEvents.empty() &&
(totalPacketsOutstanding - totalD6DProbesOutstanding) ==
conn.outstandings.numClonedPackets()) {
VLOG(10) << __func__ << " unset alarm pure ack or processed packets only"
<< " outstanding=" << totalPacketsOutstanding
<< " handshakePackets="
<< conn.outstandings.packetCount[PacketNumberSpace::Handshake]
<< " " << conn;
conn.pendingEvents.setLossDetectionAlarm = false;
timeout.cancelLossTimeout();
return;
}
/**
* Either previous timer or an Ack can clear the lossTime without setting a
* new one, for example, if such timer or ack marks everything as loss, or
* every as acked. In that case, if an early retransmit timer is already set,
* we should clear it.
*/
if (conn.lossState.currentAlarmMethod ==
LossState::AlarmMethod::EarlyRetransmitOrReordering &&
!earliestLossTimer(conn).first) {
VLOG(10) << __func__
<< " unset alarm due to invalidated early retran timer";
timeout.cancelLossTimeout();
}
if (!conn.pendingEvents.setLossDetectionAlarm) {
VLOG_IF(10, !timeout.isLossTimeoutScheduled())
<< __func__ << " alarm not scheduled"
<< " outstanding=" << totalPacketsOutstanding << " initialPackets="
<< conn.outstandings.packetCount[PacketNumberSpace::Initial]
<< " handshakePackets="
<< conn.outstandings.packetCount[PacketNumberSpace::Handshake] << " "
<< nodeToString(conn.nodeType) << " " << conn;
return;
}
timeout.cancelLossTimeout();
auto alarmDuration = calculateAlarmDuration<ClockType>(conn);
conn.lossState.currentAlarmMethod = alarmDuration.second;
VLOG(10) << __func__ << " setting transmission"
<< " alarm=" << alarmDuration.first.count() << "ms"
<< " method=" << conn.lossState.currentAlarmMethod
<< " haDataToWrite=" << hasDataToWrite
<< " outstanding=" << totalPacketsOutstanding
<< " outstanding clone=" << conn.outstandings.numClonedPackets()
<< " packetEvents=" << conn.outstandings.packetEvents.size()
<< " initialPackets="
<< conn.outstandings.packetCount[PacketNumberSpace::Initial]
<< " handshakePackets="
<< conn.outstandings.packetCount[PacketNumberSpace::Handshake] << " "
<< nodeToString(conn.nodeType) << " " << conn;
timeout.scheduleLossTimeout(alarmDuration.first);
conn.pendingEvents.setLossDetectionAlarm = false;
}
/*
* This function should be invoked after some event that is possible to
* trigger loss detection, for example: packets are acked
*/
template <class LossVisitor>
folly::Optional<CongestionController::LossEvent> detectLossPackets(
QuicConnectionStateBase& conn,
folly::Optional<PacketNum> largestAcked,
const LossVisitor& lossVisitor,
TimePoint lossTime,
PacketNumberSpace pnSpace) {
getLossTime(conn, pnSpace).reset();
std::chrono::microseconds rttSample =
std::max(conn.lossState.srtt, conn.lossState.lrtt);
std::chrono::microseconds delayUntilLost = rttSample *
conn.transportSettings.timeReorderingThreshDividend /
conn.transportSettings.timeReorderingThreshDivisor;
VLOG(10) << __func__ << " outstanding=" << conn.outstandings.numOutstanding()
<< " largestAcked=" << largestAcked.value_or(0)
<< " delayUntilLost=" << delayUntilLost.count() << "us"
<< " " << conn;
CongestionController::LossEvent lossEvent(lossTime);
folly::Optional<Observer::LossEvent> observerLossEvent;
if (!conn.observers->empty()) {
observerLossEvent.emplace(lossTime);
}
// Note that time based loss detection is also within the same PNSpace.
auto iter = getFirstOutstandingPacket(conn, pnSpace);
bool shouldSetTimer = false;
while (iter != conn.outstandings.packets.end()) {
auto& pkt = *iter;
auto currentPacketNum = pkt.packet.header.getPacketSequenceNum();
if (!largestAcked.has_value() || currentPacketNum >= *largestAcked) {
break;
}
auto currentPacketNumberSpace = pkt.packet.header.getPacketNumberSpace();
if (currentPacketNumberSpace != pnSpace) {
iter++;
continue;
}
bool lostByTimeout = (lossTime - pkt.metadata.time) > delayUntilLost;
bool lostByReorder =
(*largestAcked - currentPacketNum) > conn.lossState.reorderingThreshold;
if (!(lostByTimeout || lostByReorder)) {
// We can exit early here because if packet N doesn't meet the
// threshold, then packet N + 1 will not either.
shouldSetTimer = true;
break;
}
if (pkt.metadata.isD6DProbe) {
// It's a D6D probe, we'll mark it as lost to avoid its stale
// ack from affecting PMTU. We don't add it to loss event to
// avoid affecting congestion control when there's probably no
// congestion
CHECK(conn.d6d.lastProbe.hasValue());
// Check the decalredLost field first, to avoid double counting
// the lost probe since we don't erase them from op list yet
if (!pkt.declaredLost) {
++conn.outstandings.declaredLostCount;
pkt.declaredLost = true;
if (lostByTimeout && rttSample.count() > 0) {
pkt.lossTimeoutDividend = (lossTime - pkt.metadata.time) *
conn.transportSettings.timeReorderingThreshDivisor / rttSample;
}
if (lostByReorder) {
pkt.lossReorderDistance = *largestAcked - currentPacketNum;
}
++conn.d6d.meta.totalLostProbes;
if (currentPacketNum == conn.d6d.lastProbe->packetNum) {
onD6DLastProbeLost(conn);
}
}
iter++;
continue;
}
detectPMTUBlackhole(conn, pkt);
lossEvent.addLostPacket(pkt);
if (observerLossEvent) {
observerLossEvent->addLostPacket(lostByTimeout, lostByReorder, pkt);
}
if (pkt.isDSRPacket) {
CHECK_GT(conn.outstandings.dsrCount, 0);
--conn.outstandings.dsrCount;
}
if (pkt.associatedEvent) {
CHECK(conn.outstandings.clonedPacketCount[pnSpace]);
--conn.outstandings.clonedPacketCount[pnSpace];
}
// Invoke LossVisitor if the packet doesn't have a associated PacketEvent;
// or if the PacketEvent is present in conn.outstandings.packetEvents.
bool processed = pkt.associatedEvent &&
!conn.outstandings.packetEvents.count(*pkt.associatedEvent);
lossVisitor(conn, pkt.packet, processed);
// Remove the PacketEvent from the outstandings.packetEvents set
if (pkt.associatedEvent) {
conn.outstandings.packetEvents.erase(*pkt.associatedEvent);
}
if (!processed) {
CHECK(conn.outstandings.packetCount[currentPacketNumberSpace]);
--conn.outstandings.packetCount[currentPacketNumberSpace];
}
VLOG(10) << __func__ << " lost packetNum=" << currentPacketNum
<< " handshake=" << pkt.metadata.isHandshake << " " << conn;
// Rather than erasing here, instead mark the packet as lost so we can
// determine if this was spurious later.
conn.lossState.totalPacketsMarkedLost++;
if (lostByTimeout && rttSample.count() > 0) {
conn.lossState.totalPacketsMarkedLostByPto++;
pkt.lossTimeoutDividend = (lossTime - pkt.metadata.time) *
conn.transportSettings.timeReorderingThreshDivisor / rttSample;
}
if (lostByReorder) {
conn.lossState.totalPacketsMarkedLostByReorderingThreshold++;
iter->lossReorderDistance = *largestAcked - currentPacketNum;
}
conn.outstandings.declaredLostCount++;
iter->declaredLost = true;
iter++;
} // while (iter != conn.outstandings.packets.end()) {
// if there are observers, enqueue a function to call it
if (observerLossEvent && observerLossEvent->hasPackets()) {
for (const auto& observer : *(conn.observers)) {
conn.pendingCallbacks.emplace_back(
[observer, observerLossEvent](QuicSocket* qSocket) {
if (observer->getConfig().lossEvents) {
observer->packetLossDetected(qSocket, *observerLossEvent);
}
});
}
}
auto earliest = getFirstOutstandingPacket(conn, pnSpace);
for (; earliest != conn.outstandings.packets.end();
earliest = getNextOutstandingPacket(conn, pnSpace, earliest + 1)) {
if (!earliest->associatedEvent ||
conn.outstandings.packetEvents.count(*earliest->associatedEvent)) {
break;
}
}
if (shouldSetTimer && earliest != conn.outstandings.packets.end()) {
// We are eligible to set a loss timer and there are a few packets which
// are unacked, so we can set the early retransmit timer for them.
VLOG(10) << __func__ << " early retransmit timer outstanding="
<< conn.outstandings.packets.empty() << " delayUntilLost"
<< delayUntilLost.count() << "us"
<< " " << conn;
getLossTime(conn, pnSpace) = delayUntilLost + earliest->metadata.time;
}
if (lossEvent.largestLostPacketNum.hasValue()) {
DCHECK(lossEvent.largestLostSentTime && lossEvent.smallestLostSentTime);
if (conn.qLogger) {
conn.qLogger->addPacketsLost(
lossEvent.largestLostPacketNum.value(),
lossEvent.lostBytes,
lossEvent.lostPackets);
}
conn.lossState.rtxCount += lossEvent.lostPackets;
if (conn.congestionController) {
return lossEvent;
}
}
return folly::none;
}
void onPTOAlarm(QuicConnectionStateBase& conn);
/*
* Function invoked when loss detection timer fires
*/
template <class LossVisitor, class ClockType = Clock>
void onLossDetectionAlarm(
QuicConnectionStateBase& conn,
const LossVisitor& lossVisitor) {
auto now = ClockType::now();
if (conn.outstandings.packets.empty()) {
VLOG(10) << "Transmission alarm fired with no outstanding packets " << conn;
return;
}
if (conn.lossState.currentAlarmMethod ==
LossState::AlarmMethod::EarlyRetransmitOrReordering) {
auto lossTimeAndSpace = earliestLossTimer(conn);
CHECK(lossTimeAndSpace.first);
auto lossEvent = detectLossPackets<LossVisitor>(
conn,
getAckState(conn, lossTimeAndSpace.second).largestAckedByPeer,
lossVisitor,
now,
lossTimeAndSpace.second);
if (conn.congestionController && lossEvent) {
DCHECK(lossEvent->largestLostSentTime && lossEvent->smallestLostSentTime);
conn.congestionController->onPacketAckOrLoss(
nullptr, lossEvent.get_pointer());
}
} else {
onPTOAlarm(conn);
}
conn.pendingEvents.setLossDetectionAlarm =
conn.outstandings.numOutstanding() > 0;
VLOG(10) << __func__ << " setLossDetectionAlarm="
<< conn.pendingEvents.setLossDetectionAlarm
<< " outstanding=" << conn.outstandings.numOutstanding()
<< " initialPackets="
<< conn.outstandings.packetCount[PacketNumberSpace::Initial]
<< " handshakePackets="
<< conn.outstandings.packetCount[PacketNumberSpace::Handshake] << " "
<< conn;
}
/*
* Process streams in a RegularQuicWritePacket for loss
*
* processed: whether this packet is a already processed clone
*/
void markPacketLoss(
QuicConnectionStateBase& conn,
RegularQuicWritePacket& packet,
bool processed);
template <class LossVisitor>
folly::Optional<CongestionController::LossEvent> handleAckForLoss(
QuicConnectionStateBase& conn,
const LossVisitor& lossVisitor,
CongestionController::AckEvent& ack,
PacketNumberSpace pnSpace) {
auto& largestAcked = getAckState(conn, pnSpace).largestAckedByPeer;
if (ack.largestNewlyAckedPacket.has_value()) {
conn.lossState.ptoCount = 0;
largestAcked = std::max<PacketNum>(
largestAcked.value_or(*ack.largestNewlyAckedPacket),
*ack.largestNewlyAckedPacket);
}
auto lossEvent = detectLossPackets(
conn,
getAckState(conn, pnSpace).largestAckedByPeer,
lossVisitor,
ack.ackTime,
pnSpace);
conn.pendingEvents.setLossDetectionAlarm =
conn.outstandings.numOutstanding() > 0;
VLOG(10) << __func__ << " largestAckedInPacket="
<< ack.largestNewlyAckedPacket.value_or(0)
<< " setLossDetectionAlarm="
<< conn.pendingEvents.setLossDetectionAlarm
<< " outstanding=" << conn.outstandings.numOutstanding()
<< " initialPackets="
<< conn.outstandings.packetCount[PacketNumberSpace::Initial]
<< " handshakePackets="
<< conn.outstandings.packetCount[PacketNumberSpace::Handshake] << " "
<< conn;
return lossEvent;
}
/**
* We force mark zero rtt packets as lost during zero rtt rejection.
*/
template <class LossVisitor, class ClockType = Clock>
void markZeroRttPacketsLost(
QuicConnectionStateBase& conn,
const LossVisitor& lossVisitor) {
CongestionController::LossEvent lossEvent(ClockType::now());
auto iter = getFirstOutstandingPacket(conn, PacketNumberSpace::AppData);
while (iter != conn.outstandings.packets.end()) {
DCHECK_EQ(
iter->packet.header.getPacketNumberSpace(), PacketNumberSpace::AppData);
auto isZeroRttPacket =
iter->packet.header.getProtectionType() == ProtectionType::ZeroRtt;
if (isZeroRttPacket) {
auto& pkt = *iter;
DCHECK(!pkt.metadata.isHandshake);
bool processed = pkt.associatedEvent &&
!conn.outstandings.packetEvents.count(*pkt.associatedEvent);
lossVisitor(conn, pkt.packet, processed);
// Remove the PacketEvent from the outstandings.packetEvents set
if (pkt.associatedEvent) {
conn.outstandings.packetEvents.erase(*pkt.associatedEvent);
CHECK(conn.outstandings.clonedPacketCount[PacketNumberSpace::AppData]);
--conn.outstandings.clonedPacketCount[PacketNumberSpace::AppData];
}
lossEvent.addLostPacket(pkt);
if (!processed) {
CHECK(conn.outstandings.packetCount[PacketNumberSpace::AppData]);
--conn.outstandings.packetCount[PacketNumberSpace::AppData];
}
iter = conn.outstandings.packets.erase(iter);
iter = getNextOutstandingPacket(conn, PacketNumberSpace::AppData, iter);
} else {
iter =
getNextOutstandingPacket(conn, PacketNumberSpace::AppData, iter + 1);
}
}
conn.lossState.rtxCount += lossEvent.lostPackets;
if (conn.congestionController && lossEvent.largestLostPacketNum.hasValue()) {
conn.congestionController->onRemoveBytesFromInflight(lossEvent.lostBytes);
}
VLOG(10) << __func__ << " marked=" << lossEvent.lostPackets;
}
} // namespace quic