in fboss/agent/hw/bcm/BcmEgress.cpp [652:825]
bool BcmEcmpEgress::addEgressIdHwLocked(
int unit,
EgressId ecmpId,
const EgressId2Weight& egressId2WeightInSw,
EgressId toAdd,
SwitchRunState runState,
bool ucmpEnabled,
bool wideEcmpSupported,
bool useHsdk) {
if (egressId2WeightInSw.find(toAdd) == egressId2WeightInSw.end()) {
// Egress id is not part of this ecmp group. Nothing
// to do.
return false;
}
int numPaths = 0;
if (ucmpEnabled) {
numPaths = egressId2WeightInSw.size();
} else {
for (auto path : egressId2WeightInSw) {
numPaths += path.second;
}
}
// We always check for egress Id already existing before adding it to the
// ECMP group for 2 use cases:
// a) Port X goes down and we remove Egress object (say) PE from all ecmp
// objects. Subsquently we add another ecmp egress object say Y that
// includes PE, now when port comes back up we will try to add PE back to
// all ecmp egress objects that include it, but Y already has PE and we
// will get an error. So check before adding.
// b) On warm boot host table calls linkUpHwLocked for all ports that are up
// (since ports may have come up while we were not running). Thus there
// may be ports which were up both before and after, for which we would
// try to add egressIds that already are in h/w, hence the check.
//
// (a) can be tackled by guarding against routes pointing to unresolved
// egress entries, but not (b)
// It might appear that there is a race b/w getting the ECMP entry from HW,
// checking for presence of egressId to be added and then adding that.
// Indeed we could get a removeEgressIdHwNotLocked event which removes the
// concerned egressId just after we checked that the egressId is present.
// This is actually safe removeEgressIdHwNotLocked is only called from the
// link scan handler when ports go down. On port going down, we remove the
// concerned egress entries, mark ARP/NDP for those entries as pending. Then
// on port up, ARP/NDP will resolve and we will get a second chance to add
// the egress ID back.
bcm_l3_egress_ecmp_t existing;
bcm_l3_egress_ecmp_t_init(&existing);
existing.ecmp_intf = ecmpId;
int countInHw = 0;
int ret;
// @lint-ignore CLANGTIDY
bcm_l3_ecmp_member_t membersInHw[numPaths];
int totalMembersInHw = -1;
int memberIndex = -1;
std::set<EgressId> activeMembers;
if (useHsdk) {
ret = bcm_l3_ecmp_get(
unit, &existing, numPaths, membersInHw, &totalMembersInHw);
bcmCheckError(ret, "Unable to get ecmp entry ", ecmpId);
for (size_t i = 0; i < totalMembersInHw; ++i) {
if (toAdd == membersInHw[i].egress_if) {
if (ucmpEnabled) {
countInHw = membersInHw[i].weight;
memberIndex = i;
break;
} else {
++countInHw;
}
}
}
} else {
// @lint-ignore CLANGTIDY
// In WideECMP case, HW will have kMaxWeightedEcmpPaths entries
bcm_if_t pathsInHw[kMaxWeightedEcmpPaths];
int totalPathsInHw;
ret = bcm_l3_egress_ecmp_get(
unit, &existing, kMaxWeightedEcmpPaths, pathsInHw, &totalPathsInHw);
bcmCheckError(ret, "Unable to get ecmp entry ", ecmpId);
for (size_t i = 0; i < totalPathsInHw; ++i) {
activeMembers.emplace(pathsInHw[i]);
if (toAdd == pathsInHw[i]) {
++countInHw;
}
}
}
auto countInSw = egressId2WeightInSw.at(toAdd);
if (countInSw <= countInHw) {
return false; // Already exists no need to update
}
if (useHsdk) {
if (ucmpEnabled) {
if (memberIndex != -1) {
// member already exists, just update its weight
membersInHw[memberIndex].weight = countInSw;
ret = bcm_l3_ecmp_create(
unit,
BCM_L3_ECMP_O_CREATE_WITH_ID | BCM_L3_ECMP_O_REPLACE,
&existing,
totalMembersInHw,
membersInHw);
bcmCheckError(
ret,
"Error updating weight of member ",
toAdd,
" in ecmp entry ",
ecmpId);
} else {
// member does not exist, add it
bcm_l3_ecmp_member_t member;
bcm_l3_ecmp_member_t_init(&member);
member.egress_if = toAdd;
member.weight = countInSw;
ret = bcm_l3_ecmp_member_add(unit, ecmpId, &member);
bcmCheckError(
ret, "Error adding member ", toAdd, " to ecmp entry ", ecmpId);
}
XLOG(DBG1) << "Added " << toAdd << " to " << ecmpId
<< " with native ucmp enabled";
} else {
for (int i = 0; i < countInSw - countInHw; ++i) {
bcm_l3_ecmp_member_t member;
bcm_l3_ecmp_member_t_init(&member);
member.egress_if = toAdd;
ret = bcm_l3_ecmp_member_add(unit, ecmpId, &member);
bcmCheckError(ret, "Error adding ", toAdd, " to ", ecmpId);
if (runState < SwitchRunState::INITIALIZED) {
XLOG(DBG1) << "Added " << toAdd << " to " << ecmpId
<< " with native ucmp disabled"
<< " before transitioning to INIT state";
} else {
XLOG(DBG1) << "Added " << toAdd << " to " << ecmpId
<< " with native ucmp disabled";
}
}
}
} else {
// check whether we need to create weighted ecmp
if (numPaths <= kMaxNonWeightedEcmpPaths) {
for (int i = 0; i < countInSw - countInHw; ++i) {
// Egress id exists in s/w but not in HW, add it
bcm_l3_egress_ecmp_t obj;
bcm_l3_egress_ecmp_t_init(&obj);
obj.ecmp_intf = ecmpId;
ret = bcm_l3_egress_ecmp_add(unit, &obj, toAdd);
bcmCheckError(ret, "Error adding ", toAdd, " to ", ecmpId);
if (runState < SwitchRunState::INITIALIZED) {
// If a port transitioned to down state before warm boot
// and in the rare scenario that ARP/ND was not removed,
// the ECMP member will be added back here. However this
// will be short lived since linkUp/DownHwLocked will be
// involked at end of warmboot init.
XLOG(DBG1) << "Added " << toAdd << " to " << ecmpId
<< " before transitioning to INIT state";
} else {
XLOG(DBG1) << "Added " << toAdd << " to " << ecmpId;
}
}
} else {
if (!isWideEcmpEnabled(wideEcmpSupported)) {
XLOG(ERR) << "Wide ECMP is not enabled. NumPaths : " << numPaths
<< " Ecmp Width : " << FLAGS_ecmp_width;
return false;
}
// create weighted ECMP
activeMembers.emplace(toAdd);
programWideEcmp(unit, ecmpId, egressId2WeightInSw, activeMembers);
XLOG(DBG1) << "Added " << toAdd << " to wide ecmp " << ecmpId;
}
}
return true;
}