bool BcmEcmpEgress::addEgressIdHwLocked()

in fboss/agent/hw/bcm/BcmEgress.cpp [652:825]


bool BcmEcmpEgress::addEgressIdHwLocked(
    int unit,
    EgressId ecmpId,
    const EgressId2Weight& egressId2WeightInSw,
    EgressId toAdd,
    SwitchRunState runState,
    bool ucmpEnabled,
    bool wideEcmpSupported,
    bool useHsdk) {
  if (egressId2WeightInSw.find(toAdd) == egressId2WeightInSw.end()) {
    // Egress id is not part of this ecmp group. Nothing
    // to do.
    return false;
  }
  int numPaths = 0;
  if (ucmpEnabled) {
    numPaths = egressId2WeightInSw.size();
  } else {
    for (auto path : egressId2WeightInSw) {
      numPaths += path.second;
    }
  }

  // We always check for egress Id already existing before adding it to the
  // ECMP group for 2 use cases:
  // a) Port X goes down and we remove Egress object (say) PE from all ecmp
  //    objects. Subsquently we add another ecmp egress object say Y that
  //    includes PE, now when port comes back up we will try to add PE back to
  //    all ecmp egress objects that include it, but Y already has PE and we
  //    will get an error. So check before adding.
  // b) On warm boot host table calls linkUpHwLocked for all ports that are up
  //    (since ports may have come up while we were not running). Thus there
  //    may be ports which were up both before and after, for which we would
  //    try to add egressIds that already are in h/w, hence the check.
  //
  // (a) can be tackled by guarding against routes pointing to unresolved
  // egress entries, but not (b)
  // It might appear that there is a race b/w getting the ECMP entry from HW,
  // checking for presence of egressId to be added and then adding that.
  // Indeed we could get a removeEgressIdHwNotLocked event which removes the
  // concerned egressId just after we checked that the egressId is present.
  // This is actually safe removeEgressIdHwNotLocked is only called from the
  // link scan handler when ports go down. On port going down, we remove the
  // concerned egress entries, mark ARP/NDP for those entries as pending. Then
  // on port up, ARP/NDP will resolve and we will get a second chance to add
  // the egress ID back.
  bcm_l3_egress_ecmp_t existing;
  bcm_l3_egress_ecmp_t_init(&existing);
  existing.ecmp_intf = ecmpId;
  int countInHw = 0;
  int ret;
  // @lint-ignore CLANGTIDY
  bcm_l3_ecmp_member_t membersInHw[numPaths];
  int totalMembersInHw = -1;
  int memberIndex = -1;
  std::set<EgressId> activeMembers;

  if (useHsdk) {
    ret = bcm_l3_ecmp_get(
        unit, &existing, numPaths, membersInHw, &totalMembersInHw);
    bcmCheckError(ret, "Unable to get ecmp entry ", ecmpId);
    for (size_t i = 0; i < totalMembersInHw; ++i) {
      if (toAdd == membersInHw[i].egress_if) {
        if (ucmpEnabled) {
          countInHw = membersInHw[i].weight;
          memberIndex = i;
          break;
        } else {
          ++countInHw;
        }
      }
    }
  } else {
    // @lint-ignore CLANGTIDY
    // In WideECMP case, HW will have kMaxWeightedEcmpPaths entries
    bcm_if_t pathsInHw[kMaxWeightedEcmpPaths];
    int totalPathsInHw;
    ret = bcm_l3_egress_ecmp_get(
        unit, &existing, kMaxWeightedEcmpPaths, pathsInHw, &totalPathsInHw);
    bcmCheckError(ret, "Unable to get ecmp entry ", ecmpId);
    for (size_t i = 0; i < totalPathsInHw; ++i) {
      activeMembers.emplace(pathsInHw[i]);
      if (toAdd == pathsInHw[i]) {
        ++countInHw;
      }
    }
  }
  auto countInSw = egressId2WeightInSw.at(toAdd);
  if (countInSw <= countInHw) {
    return false; // Already exists no need to update
  }

  if (useHsdk) {
    if (ucmpEnabled) {
      if (memberIndex != -1) {
        // member already exists, just update its weight
        membersInHw[memberIndex].weight = countInSw;
        ret = bcm_l3_ecmp_create(
            unit,
            BCM_L3_ECMP_O_CREATE_WITH_ID | BCM_L3_ECMP_O_REPLACE,
            &existing,
            totalMembersInHw,
            membersInHw);
        bcmCheckError(
            ret,
            "Error updating weight of member ",
            toAdd,
            " in ecmp entry ",
            ecmpId);
      } else {
        // member does not exist, add it
        bcm_l3_ecmp_member_t member;
        bcm_l3_ecmp_member_t_init(&member);
        member.egress_if = toAdd;
        member.weight = countInSw;
        ret = bcm_l3_ecmp_member_add(unit, ecmpId, &member);
        bcmCheckError(
            ret, "Error adding member ", toAdd, " to ecmp entry ", ecmpId);
      }
      XLOG(DBG1) << "Added " << toAdd << " to " << ecmpId
                 << " with native ucmp enabled";
    } else {
      for (int i = 0; i < countInSw - countInHw; ++i) {
        bcm_l3_ecmp_member_t member;
        bcm_l3_ecmp_member_t_init(&member);
        member.egress_if = toAdd;
        ret = bcm_l3_ecmp_member_add(unit, ecmpId, &member);
        bcmCheckError(ret, "Error adding ", toAdd, " to ", ecmpId);
        if (runState < SwitchRunState::INITIALIZED) {
          XLOG(DBG1) << "Added " << toAdd << " to " << ecmpId
                     << " with native ucmp disabled"
                     << " before transitioning to INIT state";
        } else {
          XLOG(DBG1) << "Added " << toAdd << " to " << ecmpId
                     << " with native ucmp disabled";
        }
      }
    }
  } else {
    // check whether we need to create weighted ecmp
    if (numPaths <= kMaxNonWeightedEcmpPaths) {
      for (int i = 0; i < countInSw - countInHw; ++i) {
        // Egress id exists in s/w but not in HW, add it
        bcm_l3_egress_ecmp_t obj;
        bcm_l3_egress_ecmp_t_init(&obj);
        obj.ecmp_intf = ecmpId;
        ret = bcm_l3_egress_ecmp_add(unit, &obj, toAdd);
        bcmCheckError(ret, "Error adding ", toAdd, " to ", ecmpId);
        if (runState < SwitchRunState::INITIALIZED) {
          // If a port transitioned to down state before warm boot
          // and in the rare scenario that ARP/ND was not removed,
          // the ECMP member will be added back here. However this
          // will be short lived since linkUp/DownHwLocked will be
          // involked at end of warmboot init.
          XLOG(DBG1) << "Added " << toAdd << " to " << ecmpId
                     << " before transitioning to INIT state";
        } else {
          XLOG(DBG1) << "Added " << toAdd << " to " << ecmpId;
        }
      }
    } else {
      if (!isWideEcmpEnabled(wideEcmpSupported)) {
        XLOG(ERR) << "Wide ECMP is not enabled. NumPaths : " << numPaths
                  << " Ecmp Width : " << FLAGS_ecmp_width;
        return false;
      }
      // create weighted ECMP
      activeMembers.emplace(toAdd);
      programWideEcmp(unit, ecmpId, egressId2WeightInSw, activeMembers);
      XLOG(DBG1) << "Added " << toAdd << " to wide ecmp " << ecmpId;
    }
  }
  return true;
}