fboss/agent/hw/bcm/BcmSwitch.cpp

/* * Copyright (c) 2004-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. An additional grant * of patent rights can be found in the PATENTS file in the same directory. * */ #include "fboss/agent/hw/bcm/BcmSwitch.h" #include <boost/cast.hpp> #include <boost/filesystem/operations.hpp> #include <fstream> #include <map> #include <optional> #include <utility> #include <folly/Conv.h> #include <folly/FileUtil.h> #include <folly/Memory.h> #include <folly/hash/Hash.h> #include <folly/logging/xlog.h> #include "fboss/agent/Constants.h" #include "fboss/agent/FbossError.h" #include "fboss/agent/FibHelpers.h" #include "fboss/agent/LacpTypes.h" #include "fboss/agent/SwSwitch.h" #include "fboss/agent/SwitchStats.h" #include "fboss/agent/Utils.h" #include "fboss/agent/gen-cpp2/switch_config_types.h" #include "fboss/agent/hw/BufferStatsLogger.h" #include "fboss/agent/hw/HwSwitchStats.h" #include "fboss/agent/hw/bcm/BcmAPI.h" #include "fboss/agent/hw/bcm/BcmAclEntry.h" #include "fboss/agent/hw/bcm/BcmAclTable.h" #include "fboss/agent/hw/bcm/BcmAddressFBConvertors.h" #include "fboss/agent/hw/bcm/BcmBstStatsMgr.h" #include "fboss/agent/hw/bcm/BcmControlPlane.h" #include "fboss/agent/hw/bcm/BcmCosManager.h" #include "fboss/agent/hw/bcm/BcmEgressManager.h" #include "fboss/agent/hw/bcm/BcmEgressQueueFlexCounter.h" #include "fboss/agent/hw/bcm/BcmError.h" #include "fboss/agent/hw/bcm/BcmFacebookAPI.h" #include "fboss/agent/hw/bcm/BcmFieldProcessorUtils.h" #include "fboss/agent/hw/bcm/BcmHost.h" #include "fboss/agent/hw/bcm/BcmHostKey.h" #include "fboss/agent/hw/bcm/BcmIngressFieldProcessorFlexCounter.h" #include "fboss/agent/hw/bcm/BcmIntf.h" #include "fboss/agent/hw/bcm/BcmLabelMap.h" #include "fboss/agent/hw/bcm/BcmLabelSwitchingUtils.h" #include "fboss/agent/hw/bcm/BcmLogBuffer.h" #include "fboss/agent/hw/bcm/BcmMacTable.h" #include "fboss/agent/hw/bcm/BcmMirrorTable.h" #include "fboss/agent/hw/bcm/BcmNextHop.h" #include "fboss/agent/hw/bcm/BcmPlatform.h" #include "fboss/agent/hw/bcm/BcmPort.h" #include "fboss/agent/hw/bcm/BcmPortGroup.h" #include "fboss/agent/hw/bcm/BcmPortTable.h" #include "fboss/agent/hw/bcm/BcmPtpTcMgr.h" #include "fboss/agent/hw/bcm/BcmQcmManager.h" #include "fboss/agent/hw/bcm/BcmQosPolicyTable.h" #include "fboss/agent/hw/bcm/BcmRoute.h" #include "fboss/agent/hw/bcm/BcmRouteCounter.h" #include "fboss/agent/hw/bcm/BcmRtag7LoadBalancer.h" #include "fboss/agent/hw/bcm/BcmRxPacket.h" #include "fboss/agent/hw/bcm/BcmSdkVer.h" #include "fboss/agent/hw/bcm/BcmSflowExporter.h" #include "fboss/agent/hw/bcm/BcmStatUpdater.h" #include "fboss/agent/hw/bcm/BcmSwitchEventCallback.h" #include "fboss/agent/hw/bcm/BcmSwitchEventUtils.h" #include "fboss/agent/hw/bcm/BcmSwitchSettings.h" #include "fboss/agent/hw/bcm/BcmTableStats.h" #include "fboss/agent/hw/bcm/BcmTrunk.h" #include "fboss/agent/hw/bcm/BcmTrunkTable.h" #include "fboss/agent/hw/bcm/BcmTxPacket.h" #include "fboss/agent/hw/bcm/BcmUnit.h" #include "fboss/agent/hw/bcm/BcmWarmBootCache.h" #include "fboss/agent/hw/bcm/BcmWarmBootHelper.h" #include "fboss/agent/hw/bcm/BcmWarmBootState.h" #include "fboss/agent/hw/bcm/PacketTraceUtils.h" #include "fboss/agent/hw/bcm/RxUtils.h" #include "fboss/agent/hw/bcm/gen-cpp2/packettrace_types.h" #include "fboss/agent/hw/mock/MockRxPacket.h" #include "fboss/agent/hw/switch_asics/HwAsic.h" #include "fboss/agent/state/AclEntry.h" #include "fboss/agent/state/AggregatePort.h" #include "fboss/agent/state/ArpEntry.h" #include "fboss/agent/state/DeltaFunctions.h" #include "fboss/agent/state/ForwardingInformationBase.h" #include "fboss/agent/state/ForwardingInformationBaseContainer.h" #include "fboss/agent/state/ForwardingInformationBaseDelta.h" #include "fboss/agent/state/ForwardingInformationBaseMap.h" #include "fboss/agent/state/Interface.h" #include "fboss/agent/state/InterfaceMap.h" #include "fboss/agent/state/LoadBalancer.h" #include "fboss/agent/state/LoadBalancerMap.h" #include "fboss/agent/state/NodeMapDelta-defs.h" #include "fboss/agent/state/NodeMapDelta.h" #include "fboss/agent/state/Port.h" #include "fboss/agent/state/PortMap.h" #include "fboss/agent/state/Route.h" #include "fboss/agent/state/TransceiverMap.h" #include "fboss/agent/state/SflowCollector.h" #include "fboss/agent/state/SflowCollectorMap.h" #include "fboss/agent/state/StateDelta.h" #include "fboss/agent/state/SwitchState-defs.h" #include "fboss/agent/state/SwitchState.h" #include "fboss/agent/state/Vlan.h" #include "fboss/agent/state/VlanMap.h" #include "fboss/agent/state/VlanMapDelta.h" #include "fboss/agent/types.h" extern "C" { #include <bcm/link.h> #include <bcm/port.h> #include <bcm/stg.h> #include <bcm/switch.h> #include <bcm/vlan.h> #if (defined(IS_OPENNSA)) #define BCM_WARM_BOOT_SUPPORT #include <soc/opensoc.h> #else #include <soc/drv.h> #endif } using std::make_pair; using std::make_shared; using std::make_unique; using std::move; using std::shared_ptr; using std::string; using std::unique_ptr; using std::chrono::duration_cast; using std::chrono::seconds; using facebook::fboss::DeltaFunctions::forEachAdded; using facebook::fboss::DeltaFunctions::forEachChanged; using facebook::fboss::DeltaFunctions::forEachRemoved; using folly::IPAddress; using folly::IPAddressV4; using folly::IPAddressV6; using namespace std::chrono; using namespace facebook::fboss::utility; DEFINE_int32(linkscan_interval_us, 250000, "The Broadcom linkscan interval"); DEFINE_bool(force_init_fp, true, "Force full field processor initialization"); DEFINE_string( script_pre_asic_init, "script_pre_asic_init", "Broadcom script file to be run before ASIC init"); DEFINE_int32( copp_acl_entry_priority_start, 1, "Starting ACL entry priority for CoPP"); DEFINE_int32(acl_g_pri, 0, "Group priority for ACL field group"); DEFINE_int32( qcm_ifp_gid, 5, "Content aware processor group ID for ACLs specific to qcm"); // Put lowest priority for this group among all i.e. lower than acl_g_pri. DEFINE_int32(qcm_ifp_pri, -1, "Group priority for ACL field group"); DECLARE_int32(update_watermark_stats_interval_s); enum : uint8_t { kRxCallbackPriority = 1, }; namespace { constexpr auto kHostTable = "hostTable"; constexpr int kLogBcmErrorFreqMs = 3000; // On new platforms we found sflow samplig rate to be inconsistent // BRCM found this seed provides better results (see CS00011944544) constexpr auto kHSDKSflowSamplingSeed = 0x2f64c448; void rethrowIfHwNotFull(const facebook::fboss::BcmError& error) { if (error.getBcmError() != BCM_E_FULL) { // If this is not because of TCAM being full, rethrow the exception. throw error; } XLOG(WARNING) << error.what(); } /* * For the devices/SDK we use on pre-TH4, the only events we should get (and * process) are ADD and DELETE. Learning generates ADD, aging generates DELETE, * while Mac move results in DELETE followed by ADD. We ASSERT this explicitly * via HW tests. */ const std::map<int, facebook::fboss::L2EntryUpdateType> kL2AddrBasicUpdateOperationsOfInterest = { {BCM_L2_CALLBACK_DELETE, facebook::fboss::L2EntryUpdateType::L2_ENTRY_UPDATE_TYPE_DELETE}, {BCM_L2_CALLBACK_ADD, facebook::fboss::L2EntryUpdateType::L2_ENTRY_UPDATE_TYPE_ADD}, }; /* * For the devices/SDK we use on TH4, the only events we should get and process * are AGE, LEARN and MOVE. * Calling bcm_l2_addr_add() to add new entry will trigger ADD * Calling bcm_l2_addr_add() to update entry (e.g. class id) will trigger * DELETE followed by ADD. * Calling bcm_l2_addr_delete() to remove existing entry will trigger DELETE. * Learning generates LEARN, aging generates AGE, while Mac move results in * DELETE followed by MOVE. * Thus, we may only focus on AGE, LEARN, MOVE, and ignore ADD, DELETE. * We ASSERT this explicitly via HW tests. */ const std::map<int, facebook::fboss::L2EntryUpdateType> kL2AddrDetailedUpdateOperationsOfInterest = { {BCM_L2_CALLBACK_AGE_EVENT, facebook::fboss::L2EntryUpdateType::L2_ENTRY_UPDATE_TYPE_DELETE}, {BCM_L2_CALLBACK_LEARN_EVENT, facebook::fboss::L2EntryUpdateType::L2_ENTRY_UPDATE_TYPE_ADD}, {BCM_L2_CALLBACK_MOVE_EVENT, facebook::fboss::L2EntryUpdateType::L2_ENTRY_UPDATE_TYPE_ADD}, }; using facebook::fboss::AggregatePortID; using facebook::fboss::L2Entry; using facebook::fboss::L2EntryUpdateType; using facebook::fboss::macFromBcm; using facebook::fboss::PortDescriptor; using facebook::fboss::PortID; using facebook::fboss::VlanID; using facebook::fboss::cfg::AclLookupClass; L2Entry createL2Entry(const bcm_l2_addr_t* l2Addr, bool isPending) { CHECK(l2Addr); auto l2EntryType = isPending ? L2Entry::L2EntryType::L2_ENTRY_TYPE_PENDING : L2Entry::L2EntryType::L2_ENTRY_TYPE_VALIDATED; if (!(l2Addr->flags & BCM_L2_TRUNK_MEMBER)) { return L2Entry( macFromBcm(l2Addr->mac), VlanID(l2Addr->vid), PortDescriptor(PortID(l2Addr->port)), l2EntryType, l2Addr->group != 0 ? std::optional<AclLookupClass>(AclLookupClass(l2Addr->group)) : std::nullopt); } else { return L2Entry( macFromBcm(l2Addr->mac), VlanID(l2Addr->vid), PortDescriptor(AggregatePortID(l2Addr->tgid)), l2EntryType, l2Addr->group != 0 ? std::optional<AclLookupClass>(AclLookupClass(l2Addr->group)) : std::nullopt); } } bool isL2OperationOfInterest(int operation, bool detailedUpdate) { if (detailedUpdate) { return kL2AddrDetailedUpdateOperationsOfInterest.find(operation) != kL2AddrDetailedUpdateOperationsOfInterest.end(); } return kL2AddrBasicUpdateOperationsOfInterest.find(operation) != kL2AddrBasicUpdateOperationsOfInterest.end(); } L2EntryUpdateType getL2EntryUpdateType(int operation, bool detailedUpdate) { CHECK(isL2OperationOfInterest(operation, detailedUpdate)); if (detailedUpdate) { return kL2AddrDetailedUpdateOperationsOfInterest.find(operation)->second; } return kL2AddrBasicUpdateOperationsOfInterest.find(operation)->second; } /* * How many bytes we copy for sFlow sampling */ const unsigned int kMaxSflowSnapLen = 128; bool isValidLabeledNextHopSet( facebook::fboss::BcmPlatform* platform, const facebook::fboss::LabelNextHopSet& nexthops) { if (!facebook::fboss::LabelForwardingInformationBase::isValidNextHopSet( nexthops)) { return false; } std::optional<facebook::fboss::LabelForwardingAction::LabelForwardingType> forwardingType; for (const auto& nexthop : nexthops) { const auto& labelForwardingAction = nexthop.labelForwardingAction(); if (labelForwardingAction->type() == facebook::fboss::LabelForwardingAction::LabelForwardingType::PUSH && labelForwardingAction->pushStack()->size() > platform->getAsic()->getMaxLabelStackDepth()) { // label stack to push exceeds what platform can support XLOG(ERR) << "next hoop " << nexthop.str() << " label stack exceeds " << platform->getAsic()->getMaxLabelStackDepth(); return false; } if (!forwardingType.has_value()) { forwardingType = labelForwardingAction->type(); } else if (forwardingType != labelForwardingAction->type()) { // each next hop must have same LabelForwardingAction type. XLOG(ERR) << "more than one forwarding action in next hop set"; return false; } } return true; } } // namespace namespace facebook::fboss { phy::FecMode BcmSwitch::getPortFECMode(PortID port) const { return getPortTable()->getBcmPort(port)->getFECMode(); } cfg::PortSpeed BcmSwitch::getPortMaxSpeed(PortID port) const { return getPortTable()->getBcmPort(port)->getMaxSpeed(); } BcmSwitch::BcmSwitch(BcmPlatform* platform, uint32_t featuresDesired) : BcmSwitchIf(featuresDesired), platform_(platform), mmuBufferBytes_(platform->getMMUBufferBytes()), mmuCellBytes_(platform->getMMUCellBytes()), warmBootCache_(new BcmWarmBootCache(this)), portTable_(new BcmPortTable(this)), intfTable_(new BcmIntfTable(this)), hostTable_(new BcmHostTable(this)), egressManager_(new BcmEgressManager(this)), neighborTable_(new BcmNeighborTable(this)), l3NextHopTable_(new BcmL3NextHopTable(this)), mplsNextHopTable_(new BcmMplsNextHopTable(this)), multiPathNextHopTable_(new BcmMultiPathNextHopTable(this)), labelMap_(new BcmLabelMap(this)), routeCounterTable_( getPlatform()->getAsic()->isSupported( HwAsic::Feature::ROUTE_FLEX_COUNTERS) ? static_cast<BcmRouteCounterTableBase*>( new BcmRouteFlexCounterTable(this)) : static_cast<BcmRouteCounterTableBase*>( new BcmRouteCounterTable(this))), routeTable_(new BcmRouteTable(this)), qosPolicyTable_(new BcmQosPolicyTable(this)), aclTable_(new BcmAclTable(this)), trunkTable_(new BcmTrunkTable(this)), sFlowExporterTable_(new BcmSflowExporterTable()), rtag7LoadBalancer_(new BcmRtag7LoadBalancer(this)), mirrorTable_(new BcmMirrorTable(this)), bstStatsMgr_(new BcmBstStatsMgr(this)), switchSettings_(new BcmSwitchSettings(this)), macTable_(new BcmMacTable(this)), qcmManager_(new BcmQcmManager(this)), ptpTcMgr_(new BcmPtpTcMgr(this)) {} BcmSwitch::~BcmSwitch() { XLOG(INFO) << "Destroying BcmSwitch"; resetTables(); if (unitObject_) { // In agent this would be done in the signal handler // gracefulExitImpl(). In bcm_tests there is no signal. // So if unitObject_ is still valid, destroy it. unitObject_.reset(); } } void BcmSwitch::resetTables() { std::unique_lock<std::mutex> lk(lock_); unregisterCallbacks(); labelMap_.reset(); routeTable_.reset(); // ACL entries now may hold reference to multi path nexthop. // So release ACLs before any nexthop related tables: // l3 nexthop table // mpls nexthop table // host table // multi path nexthop table aclTable_->releaseAcls(); aclTable_.reset(); l3NextHopTable_.reset(); mplsNextHopTable_.reset(); // Release host entries before reseting switch's host table // entries so that if host try to refer to look up host table // via the BCM switch during their destruction the pointer // access is still valid. hostTable_->releaseHosts(); routeCounterTable_.reset(); // reset neighbors before resetting host table neighborTable_.reset(); // reset interfaces before host table, as interfaces have // host references now. intfTable_.reset(); egressManager_.reset(); multiPathNextHopTable_.reset(); hostTable_.reset(); toCPUEgress_.reset(); portTable_.reset(); qosPolicyTable_.reset(); mirrorTable_.reset(); trunkTable_.reset(); controlPlane_.reset(); rtag7LoadBalancer_.reset(); bcmStatUpdater_.reset(); bstStatsMgr_.reset(); switchSettings_.reset(); macTable_.reset(); qcmManager_.reset(); ptpTcMgr_.reset(); queueFlexCounterMgr_.reset(); // Reset warmboot cache last in case Bcm object destructors // access it during object deletion. warmBootCache_.reset(); } void BcmSwitch::unregisterCallbacks() { if (flags_ & RX_REGISTERED) { int rv; bcm_rx_stop(unit_, nullptr); if (!usePKTIO()) { rv = bcm_rx_unregister(unit_, packetRxSdkCallback, kRxCallbackPriority); } else { #ifdef INCLUDE_PKTIO rv = bcm_pktio_rx_unregister( unit_, pktioPacketRxSdkCallback, kRxCallbackPriority); #else throw FbossError("invalid PKTIO configuration."); #endif } CHECK(BCM_SUCCESS(rv)) << "failed to unregister BcmSwitch rx callback: " << bcm_errmsg(rv); flags_ &= ~RX_REGISTERED; } // Note that we don't explicitly call bcm_linkscan_detach() here-- // this call is not thread safe and should only be called from the main // thread. However, bcm_detach() / _bcm_shutdown() will clean up the // linkscan module properly. if (flags_ & LINKSCAN_REGISTERED) { auto rv = bcm_linkscan_unregister(unit_, linkscanCallback); CHECK(BCM_SUCCESS(rv)) << "failed to unregister BcmSwitch linkscan " "callback: " << bcm_errmsg(rv); stopLinkscanThread(); flags_ &= ~LINKSCAN_REGISTERED; } if (flags_ & PFC_WATCHDOG_REGISTERED) { // mostly to ensure that callback is not triggered while we are in // "process" of shutting down auto rv = bcm_cosq_pfc_deadlock_recovery_event_unregister( unit_, pfcDeadlockRecoveryEventCallback, callback_); CHECK(BCM_SUCCESS(rv)) << "failed to unregister BcmSwitch pfc watchdog" "callback: " << bcm_errmsg(rv); flags_ &= ~PFC_WATCHDOG_REGISTERED; XLOG(INFO) << "Unregistered pfc watchdog"; } /* * SOFTWARE L2 learning mode enables callback when there are updates to L2 * table. Disable it: we don't want these callbacks (that call BcmSwitch * method) to fire after BcmSwitch object is destroyed. */ if (switchSettings_->getL2LearningMode().has_value() && switchSettings_->getL2LearningMode().value() == cfg::L2LearningMode::SOFTWARE) { switchSettings_->disableL2LearningCallback(); } } void BcmSwitch::gracefulExitImpl(folly::dynamic& switchState) { steady_clock::time_point begin = steady_clock::now(); XLOG(INFO) << "[Exit] Starting BCM Switch graceful exit"; // Ideally, preparePortsForGracefulExit() would run in update EVB of the // SwSwitch, but it does not really matter at the graceful exit time. If // this is a concern, this can be moved to the updateEventBase_ of SwSwitch. portTable_->preparePortsForGracefulExit(); bstStatsMgr_->stopBufferStatCollection(); qcmManager_->stop(); std::lock_guard<std::mutex> g(lock_); // This will run some common shell commands to give more info about // the underlying bcm sdk state dumpState(platform_->getWarmBootHelper()->shutdownSdkDumpFile()); switchState[kHwSwitch] = toFollyDynamic(); unitObject_->writeWarmBootState(switchState); unitObject_.reset(); XLOG(INFO) << "[Exit] BRCM Graceful Exit time " << duration_cast<duration<float>>(steady_clock::now() - begin).count(); } folly::dynamic BcmSwitch::toFollyDynamic() const { return warmBootCache_->getWarmBootStateFollyDynamic(); } int BcmSwitch::addL2TableCb( int /*unit*/, bcm_l2_addr_t* l2Addr, void* userData) { auto* bcmSw = static_cast<BcmSwitch*>(userData); bcmSw->callback_->l2LearningUpdateReceived( createL2Entry(l2Addr, bcmSw->isL2EntryPending(l2Addr)), L2EntryUpdateType::L2_ENTRY_UPDATE_TYPE_ADD); return 0; } int BcmSwitch::addL2TableCbForPendingOnly( int /*unit*/, bcm_l2_addr_t* l2Addr, void* userData) { auto* bcmSw = static_cast<BcmSwitch*>(userData); if (bcmSw->isL2EntryPending(l2Addr)) { bcmSw->callback_->l2LearningUpdateReceived( createL2Entry(l2Addr, bcmSw->isL2EntryPending(l2Addr)), L2EntryUpdateType::L2_ENTRY_UPDATE_TYPE_ADD); } return 0; } int BcmSwitch::deleteL2TableCb( int /*unit*/, bcm_l2_addr_t* l2Addr, void* userData) { auto* bcmSw = static_cast<BcmSwitch*>(userData); bcmSw->callback_->l2LearningUpdateReceived( createL2Entry(l2Addr, bcmSw->isL2EntryPending(l2Addr)), L2EntryUpdateType::L2_ENTRY_UPDATE_TYPE_DELETE); return 0; } void BcmSwitch::switchRunStateChangedImpl(SwitchRunState newState) { std::lock_guard<std::mutex> g(lock_); switch (newState) { case SwitchRunState::INITIALIZED: setupLinkscan(); setupPacketRx(); break; case SwitchRunState::CONFIGURED: { /* * When L2LearningMode::SOFTWARE is set, during warmboot time window, * there is no software running that can receive L2 table update * callbacks. However, the hardware continues to learn unknown source MAC * + VLAN as PENDING entries during that time window. Once agent comes * up, it must update its MAC table and VALIDATE the PENDING entries. */ if (bootType_ == BootType::WARM_BOOT && switchSettings_->getL2LearningMode().has_value()) { if (switchSettings_->getL2LearningMode().value() == cfg::L2LearningMode::SOFTWARE) { auto rv = bcm_l2_traverse( getUnit(), BcmSwitch::addL2TableCbForPendingOnly, this); bcmCheckError(rv, "bcm_l2_traverse failed"); } } break; } default: break; } } bool BcmSwitch::isPortEnabled(PortID port) const { return portTable_->getBcmPort(port)->isEnabled(); } bool BcmSwitch::isPortUp(PortID port) const { return portTable_->getBcmPort(port)->isUp(); } std::shared_ptr<SwitchState> BcmSwitch::getColdBootSwitchState() const { auto bootState = make_shared<SwitchState>(); uint32_t flags = 0; for (const auto& kv : *portTable_) { uint32_t port_flags; PortID portID = kv.first; auto rv = bcm_port_learn_get(getUnit(), portID, &port_flags); bcmCheckError(rv, "Unable to get L2 Learning flags for port: ", portID); if (flags == 0) { flags = port_flags; } else if (flags != port_flags) { throw FbossError( "Every port should have same L2 Learning setting by default"); } } // This is cold boot, so there cannot be any L2 update callback registered. // Thus, BCM_PORT_LEARN_ARL | BCM_PORT_LEARN_FWD should be enough to // ascertain HARDWARE as L2 learning mode. cfg::L2LearningMode l2LearningMode; if (flags == (BCM_PORT_LEARN_ARL | BCM_PORT_LEARN_FWD)) { l2LearningMode = cfg::L2LearningMode::HARDWARE; } else if (flags == (BCM_PORT_LEARN_ARL | BCM_PORT_LEARN_PENDING)) { l2LearningMode = cfg::L2LearningMode::SOFTWARE; } else { throw FbossError( "L2 Learning mode is neither SOFTWARE, nor HARDWARE, flags: ", flags); } auto switchSettings = make_shared<SwitchSettings>(); switchSettings->setL2LearningMode(l2LearningMode); bootState->resetSwitchSettings(switchSettings); bcm_vlan_t defaultVlan; auto rv = bcm_vlan_default_get(getUnit(), &defaultVlan); bcmCheckError(rv, "Unable to get default VLAN"); bootState->setDefaultVlan(VlanID(defaultVlan)); // get cpu queue settings auto cpu = make_shared<ControlPlane>(); auto cpuQueues = controlPlane_->getMulticastQueueSettings(); auto rxReasonToQueue = controlPlane_->getRxReasonToQueue(); cpu->resetQueues(cpuQueues); cpu->resetRxReasonToQueue(rxReasonToQueue); bootState->resetControlPlane(cpu); // On cold boot all ports are in Vlan 1 auto vlan = make_shared<Vlan>(VlanID(1), "InitVlan"); Vlan::MemberPorts memberPorts; for (const auto& kv : *portTable_) { PortID portID = kv.first; BcmPort* bcmPort = kv.second; string name = folly::to<string>("port", portID); auto swPort = make_shared<Port>(portID, name); auto platformPort = getPlatform()->getPlatformPort(portID); swPort->setProfileId( platformPort->getProfileIDBySpeed(bcmPort->getSpeed())); // Coldboot state can assume transceiver doesn't exist PlatformPortProfileConfigMatcher matcher{swPort->getProfileID(), portID}; if (auto profileConfig = platform_->getPortProfileConfig(matcher)) { swPort->setProfileConfig(*profileConfig->iphy()); } else { throw FbossError( "No port profile config found with matcher:", matcher.toString()); } swPort->resetPinConfigs( platform_->getPlatformMapping()->getPortIphyPinConfigs(matcher)); swPort->setSpeed(bcmPort->getSpeed()); if (platform_->getAsic()->isSupported(HwAsic::Feature::L3_QOS)) { auto queues = bcmPort->getCurrentQueueSettings(); swPort->resetPortQueues(queues); } bootState->addPort(swPort); memberPorts.insert(make_pair(portID, false)); } vlan->setPorts(memberPorts); bootState->addVlan(vlan); bootState->publish(); return bootState; } void BcmSwitch::runBcmScript(const std::string& filename) const { std::ifstream scriptFile(filename); if (!scriptFile.good()) { return; } XLOG(INFO) << "Run script " << filename; printDiagCmd(folly::to<string>("rcload ", filename)); } void BcmSwitch::setupLinkscan() { if (!(getFeaturesDesired() & FeaturesDesired::LINKSCAN_DESIRED)) { XLOG(DBG1) << " Skipping linkscan registeration as the feature is disabled"; return; } linkScanBottomHalfThread_ = std::make_unique<std::thread>([=]() { initThread("fbossLinkScanBH"); linkScanBottomHalfEventBase_.loopForever(); }); auto rv = bcm_switch_control_set(unit_, bcmSwitchLinkDownInfoSkip, 1); bcmCheckError(rv, "failed to skip port info get for link down ports"); rv = bcm_linkscan_register(unit_, linkscanCallback); bcmCheckError(rv, "failed to register for linkscan events"); flags_ |= LINKSCAN_REGISTERED; rv = bcm_linkscan_enable_set(unit_, FLAGS_linkscan_interval_us); bcmCheckError(rv, "failed to enable linkscan"); if (getBootType() == BootType::WARM_BOOT) { bcm_port_config_t pcfg; bcm_port_config_t_init(&pcfg); rv = bcm_port_config_get(unit_, &pcfg); bcmCheckError(rv, "failed to get port configuration"); forceLinkscanOn(pcfg.port); // Sometimes after warmboot sw does not have the correct state of every port // so we need to update sw state here. This needs to be done after linkscan // is enabled otherwise the sdk may return inconsistent results for (auto& port : *portTable_) { callback_->linkStateChanged(port.first, port.second->isUp()); } } } void BcmSwitch::setMacAging(std::chrono::seconds agingInterval) { /** * Enable MAC aging on the ASIC. * Every `seconds` seconds, all addresses * that have not been updated/used since the last age timer interval * are removed from the table. * * This is implemented by tracking the 'hit' bit in the L2 table. * On the aging timer, all MACs that have the hit bit set have it cleared, * and all MACs without the hit bit set are removed from the table. * * The ASIC will automatically set the hit bit anytime that MAC address is * looked up and used in a packet, either as a SRC lookup or DST lookup. * * Only set if timer is different than what's currently set. */ int currentSeconds; int targetSeconds = agingInterval.count(); // force to 32bit bcmCheckError( bcm_l2_age_timer_get(unit_, &currentSeconds), "failed to get l2_age_timer"); if (currentSeconds != targetSeconds) { XLOG(DBG1) << "Changing MAC Aging timer from " << currentSeconds << " to " << targetSeconds; bcmCheckError( bcm_l2_age_timer_set(unit_, targetSeconds), "failed to set l2_age_timer"); } } void BcmSwitch::minimalInit() { std::lock_guard<std::mutex> g(lock_); CHECK(!unitObject_); unitObject_ = BcmAPI::createOnlyUnit(platform_); unit_ = unitObject_->getNumber(); unitObject_->setCookie(this); BcmAPI::initUnit(unit_, platform_); bootType_ = platform_->getWarmBootHelper()->canWarmBoot() ? BootType::WARM_BOOT : BootType::COLD_BOOT; } HwInitResult BcmSwitch::init( Callback* callback, bool /*failHwCallsOnWarmboot*/) { HwInitResult ret; ret.rib = std::make_unique<RoutingInformationBase>(); std::lock_guard<std::mutex> g(lock_); steady_clock::time_point begin = steady_clock::now(); CHECK(!unitObject_); unitObject_ = BcmAPI::createOnlyUnit(platform_); unit_ = unitObject_->getNumber(); unitObject_->setCookie(this); BcmAPI::initUnit(unit_, platform_); bootType_ = platform_->getWarmBootHelper()->canWarmBoot() ? BootType::WARM_BOOT : BootType::COLD_BOOT; auto warmBoot = bootType_ == BootType::WARM_BOOT; callback_ = callback; ret.initializedTime = duration_cast<duration<float>>(steady_clock::now() - begin).count(); XLOG(INFO) << "Initializing BcmSwitch for unit " << unit_; // Add callbacks for unit and parity errors as early as possible to handle // critical events BcmSwitchEventUtils::initUnit(unit_, this); auto fatalCob = make_shared<BcmSwitchEventUnitFatalErrorCallback>(); auto nonFatalCob = make_shared<BcmSwitchEventUnitNonFatalErrorCallback>(); BcmSwitchEventUtils::registerSwitchEventCallback( unit_, BCM_SWITCH_EVENT_STABLE_FULL, fatalCob); BcmSwitchEventUtils::registerSwitchEventCallback( unit_, BCM_SWITCH_EVENT_STABLE_ERROR, fatalCob); BcmSwitchEventUtils::registerSwitchEventCallback( unit_, BCM_SWITCH_EVENT_UNCONTROLLED_SHUTDOWN, fatalCob); BcmSwitchEventUtils::registerSwitchEventCallback( unit_, BCM_SWITCH_EVENT_WARM_BOOT_DOWNGRADE, fatalCob); BcmSwitchEventUtils::registerSwitchEventCallback( unit_, BCM_SWITCH_EVENT_PARITY_ERROR, nonFatalCob); // Create bcmStatUpdater to cache the stat ids bcmStatUpdater_ = std::make_unique<BcmStatUpdater>(this); XLOG(INFO) << " Is ALPM enabled: " << BcmAPI::isAlpmEnabled(); // Additional switch configuration auto state = make_shared<SwitchState>(); bcm_port_config_t pcfg; bcm_port_config_t_init(&pcfg); auto rv = bcm_port_config_get(unit_, &pcfg); bcmCheckError(rv, "failed to get port configuration"); if (!warmBoot) { LOG(INFO) << " Performing cold boot "; /* initialize mirroring module */ initMirrorModule(); /* initialize MPLS */ initMplsModule(); } else { LOG(INFO) << "Performing warm boot "; // This dumps debug info about initial sdk state. Useful after warm boot. dumpState(platform_->getWarmBootHelper()->startupSdkDumpFile()); } // If the platform doesn't support auto enabling l3 egress mode if (!platform_->getAsic()->isSupported( HwAsic::Feature::L3_EGRESS_MODE_AUTO_ENABLED)) { rv = bcm_switch_control_set(unit_, bcmSwitchL3EgressMode, 1); bcmCheckError(rv, "failed to set L3 egress mode"); } if (getPlatform()->getAsic()->getAsicType() == HwAsic::AsicType::ASIC_TYPE_TOMAHAWK4) { rv = bcm_l3_enable_set(unit_, 1); bcmCheckError(rv, "failed to enable l3"); } // Trap IPv4 Address Resolution Protocol (ARP) packets. // TODO: We may want to trap ARP on a per-port or per-VLAN basis. rv = bcm_switch_control_set(unit_, bcmSwitchArpRequestToCpu, 1); bcmCheckError(rv, "failed to set ARP request trapping"); rv = bcm_switch_control_set(unit_, bcmSwitchArpReplyToCpu, 1); bcmCheckError(rv, "failed to set ARP reply trapping"); // Trap IP header TTL or hoplimit 1 to CPU rv = bcm_switch_control_set(unit_, bcmSwitchL3UcastTtl1ToCpu, 1); bcmCheckError(rv, "failed to set L3 header error trapping"); // Trap DHCP packets to CPU rv = bcm_switch_control_set(unit_, bcmSwitchDhcpPktToCpu, 1); bcmCheckError(rv, "failed to set DHCP packet trapping"); // Trap Dest miss rv = bcm_switch_control_set(unit_, bcmSwitchUnknownL3DestToCpu, 1); bcmCheckError(rv, "failed to set destination miss trapping"); rv = bcm_switch_control_set(unit_, bcmSwitchV6L3DstMissToCpu, 1); bcmCheckError(rv, "failed to set IPv6 destination miss trapping"); // Trap IPv6 Neighbor Discovery Protocol (NDP) packets. // TODO: We may want to trap NDP on a per-port or per-VLAN basis. rv = bcm_switch_control_set(unit_, bcmSwitchNdPktToCpu, 1); bcmCheckError(rv, "failed to set NDP trapping"); disableHotSwap(); if (FLAGS_force_init_fp || !warmBoot || haveMissingOrQSetChangedFPGroups()) { initFieldProcessor(); setupFPGroups(); } dropDhcpPackets(); setL3MtuFailPackets(); mmuState_ = BcmAPI::getMmuState(); // enable IPv4 and IPv6 on CPU port bcm_port_t idx; BCM_PBMP_ITER(pcfg.cpu, idx) { rv = bcm_port_control_set(unit_, idx, bcmPortControlIP4, 1); bcmCheckError(rv, "failed to enable IPv4 on cpu port ", idx); rv = bcm_port_control_set(unit_, idx, bcmPortControlIP6, 1); bcmCheckError(rv, "failed to enable IPv6 on cpu port ", idx); XLOG(DBG2) << "Enabled IPv4/IPv6 on CPU port " << idx; } // Setup the default drop egress BcmEgress::setupDefaultDropEgress(unit_, getDropEgressId()); setupCos(); folly::dynamic switchStateJson; if (warmBoot) { // This needs to be done after we have set // bcmSwitchL3EgressMode else the egress ids // in the host table don't show up correctly. switchStateJson = getPlatform()->getWarmBootHelper()->getWarmBootState(); warmBootCache_->populate(switchStateJson); } setupToCpuEgress(); portTable_->initPorts(&pcfg, warmBoot); bstStatsMgr_->startBufferStatCollection(); // Set the spanning tree state of all ports to forwarding. // TODO: Eventually the spanning tree state should be part of the Port // state, and this should be handled in applyConfig(). // // Spanning tree group settings // TODO: This should eventually be done as part of applyConfig() bcm_stg_t stg = 1; BCM_PBMP_ITER(pcfg.port, idx) { rv = bcm_stg_stp_set(unit_, stg, idx, BCM_STG_STP_FORWARD); bcmCheckError(rv, "failed to set spanning tree state on port ", idx); } ret.bootType = bootType_; if (warmBoot) { ret.switchState = warmBootCache_->getDumpedSwSwitchState().clone(); getPlatform()->preWarmbootStateApplied(); if (switchStateJson.find(kRib) != switchStateJson.items().end()) { ret.rib = RoutingInformationBase::fromFollyDynamic( switchStateJson[kRib], ret.switchState->getFibs(), ret.switchState->getLabelForwardingInformationBase()); } stateChangedImpl(StateDelta(make_shared<SwitchState>(), ret.switchState)); hostTable_->warmBootHostEntriesSynced(); // Done with warm boot, clear warm boot cache warmBootCache_->clear(); if (getPlatform()->getAsic()->getAsicType() == HwAsic::AsicType::ASIC_TYPE_TRIDENT2) { for (auto ip : {folly::IPAddress("0.0.0.0"), folly::IPAddress("::")}) { bcm_l3_route_t rt; bcm_l3_route_t_init(&rt); rt.l3a_flags |= ip.isV6() ? BCM_L3_IP6 : 0; bcm_l3_route_get(getUnit(), &rt); rt.l3a_flags |= BCM_L3_REPLACE; bcm_l3_route_add(getUnit(), &rt); } } } else { ret.switchState = getColdBootSwitchState(); setMacAging(std::chrono::seconds( ret.switchState->getSwitchSettings()->getL2AgeTimerSeconds())); } macTable_ = std::make_unique<BcmMacTable>(this); ret.bootTime = duration_cast<duration<float>>(steady_clock::now() - begin).count(); ret.switchState->publish(); return ret; } void BcmSwitch::setupToCpuEgress() { // create an egress object for ToCPU toCPUEgress_ = make_unique<BcmEgress>(this); toCPUEgress_->programToCPU(); } void BcmSwitch::setupPacketRx() { static bcm_rx_cfg_t rxCfg = { (16 * 1032), // packet alloc size (12K packets plus spare) 16, // Packets per chain 0, // Default pkt rate, global (all COS, one unit) 0, // Burst {// 1 RX channel {0, 0, 0, 0}, // Channel 0 is usually TX { // Channel 1, default RX 4, // DV count (number of chains) 0, // Default pkt rate, DEPRECATED 0, // No flags 0xff // COS bitmap channel to receive }}, nullptr, // Use default alloc function nullptr, // Use default free function 0, // flags 0, // Num CPU addrs nullptr // cpu_addresses }; if (!(getFeaturesDesired() & FeaturesDesired::PACKET_RX_DESIRED)) { XLOG(DBG1) << " Skip setting up packet RX since its explicitly disabled"; return; } // Register our packet handler callback function. uint32_t rxFlags = BCM_RCO_F_ALL_COS; int rv; if (!usePKTIO()) { rv = bcm_rx_register( unit_, // int unit "fboss_rx", // const char *name packetRxSdkCallback, // bcm_rx_cb_f callback kRxCallbackPriority, // uint8 priority this, // void *cookie rxFlags); // uint32 flags } else { #ifdef INCLUDE_PKTIO rv = bcm_pktio_rx_register( unit_, // int unit "fboss_rx", // const char *name pktioPacketRxSdkCallback, // bcm_rx_pktio_cb_f callback kRxCallbackPriority, // uint8 priority this, // void *cookie rxFlags); // uint32 flags #else throw FbossError("invalid PKTIO configuration"); #endif } bcmCheckError(rv, "failed to register packet rx callback"); flags_ |= RX_REGISTERED; if (!unitObject_->usePKTIO()) { if (!isRxThreadRunning()) { rv = bcm_rx_start(unit_, &rxCfg); } } bcmCheckError(rv, "failed to start broadcom packet rx API"); } void BcmSwitch::processSwitchSettingsChanged(const StateDelta& delta) { const auto switchSettingsDelta = delta.getSwitchSettingsDelta(); const auto& oldSwitchSettings = switchSettingsDelta.getOld(); const auto& newSwitchSettings = switchSettingsDelta.getNew(); /* * SwitchSettings are mandatory and can thus only be modified. * Every field in SwitchSettings must always be set in new SwitchState. */ CHECK(oldSwitchSettings); CHECK(newSwitchSettings); if (oldSwitchSettings->getL2LearningMode() != newSwitchSettings->getL2LearningMode()) { XLOG(DBG3) << "Configuring L2LearningMode old: " << static_cast<int>(oldSwitchSettings->getL2LearningMode()) << " new: " << static_cast<int>(newSwitchSettings->getL2LearningMode()); switchSettings_->setL2LearningMode(newSwitchSettings->getL2LearningMode()); } if (oldSwitchSettings->isQcmEnable() != newSwitchSettings->isQcmEnable()) { XLOG(DBG3) << "Set QCM enable: " << std::boolalpha << newSwitchSettings->isQcmEnable(); switchSettings_->setQcmEnable( newSwitchSettings->isQcmEnable(), delta.newState()); } // Its possible during switch init, qcm is enabled // but all the ports are not enabled yet, so we trigger processing // ports on every cfg reload // QCM logic determines eligibility if there are any new ports // and update internally if (newSwitchSettings->isQcmEnable()) { if (oldSwitchSettings->isQcmEnable()) { // check whether config params changed on the fly qcmManager_->processQcmConfigChanged(delta.newState(), delta.oldState()); } XLOG(DBG3) << "Qcm enabled. Evaluate the monitored ports"; qcmManager_->processPortsForQcm(delta.newState()); } const auto newPtpTcEnable = newSwitchSettings->isPtpTcEnable(); if (oldSwitchSettings->isPtpTcEnable() != newPtpTcEnable) { XLOG(DBG3) << "Set PTP TC enable: " << std::boolalpha << newPtpTcEnable; switchSettings_->setPtpTc(newPtpTcEnable, delta.newState()); } const auto oldVal = oldSwitchSettings->getL2AgeTimerSeconds(); const auto newVal = newSwitchSettings->getL2AgeTimerSeconds(); if (oldVal != newVal) { XLOG(DBG3) << "Configuring l2AgeTimerSeconds old: " << static_cast<int>(oldVal) << " new: " << static_cast<int>(newVal); setMacAging(std::chrono::seconds(newVal)); switchSettings_->setL2AgeTimerSeconds(newVal); } if (oldSwitchSettings->getMaxRouteCounterIDs() != newSwitchSettings->getMaxRouteCounterIDs()) { routeCounterTable_->setMaxRouteCounterIDs( newSwitchSettings->getMaxRouteCounterIDs()); } } void BcmSwitch::processMacTableChanges(const StateDelta& stateDelta) { for (const auto& vlanDelta : stateDelta.getVlansDelta()) { auto vlanId = vlanDelta.getOld() ? vlanDelta.getOld()->getID() : vlanDelta.getNew()->getID(); for (const auto& delta : vlanDelta.getMacDelta()) { const auto* oldEntry = delta.getOld().get(); const auto* newEntry = delta.getNew().get(); if (!oldEntry) { macTable_->processMacAdded(newEntry, vlanId); } else if (!newEntry) { macTable_->processMacRemoved(oldEntry, vlanId); } else { macTable_->processMacChanged(oldEntry, newEntry, vlanId); } } } } std::shared_ptr<SwitchState> BcmSwitch::stateChanged(const StateDelta& delta) { // Take the lock before modifying any objects std::lock_guard<std::mutex> g(lock_); auto appliedState = stateChangedImpl(delta); appliedState->publish(); return appliedState; } std::shared_ptr<SwitchState> BcmSwitch::stateChangedTransaction( const StateDelta& delta) { throw FbossError("Transactions not supported on BcmSwitch"); return nullptr; } std::shared_ptr<SwitchState> BcmSwitch::stateChangedImpl( const StateDelta& delta) { // Reconfigure port groups in case we are changing between using a port as // 1, 2 or 4 ports. Only do this if flexports are enabled // Calling reconfigure port group first to make sure the ports of SW state // already exists in HW. if (FLAGS_flexports) { reconfigurePortGroups(delta); } forEachAdded(delta.getPortsDelta(), [this](const auto& newPort) { if (!portTable_->getBcmPortIf(newPort->getID())) { throw FbossError( "Cannot add a port:", newPort->getID(), " unknown to hardware"); } }); forEachRemoved(delta.getPortsDelta(), [this](const auto& oldPort) { if (portTable_->getBcmPortIf(oldPort->getID())) { throw FbossError( "Cannot remove a port:", oldPort->getID(), " still in port table"); } }); auto appliedState = delta.newState(); // TODO: This function contains high-level logic for how to apply the // StateDelta, and isn't particularly hardware-specific. I plan to refactor // it, and move it out into a common helper class that can be shared by // many different HwSwitch implementations. // As the first step, disable ports that are now disabled. // This ensures that we immediately stop forwarding traffic on these ports. processDisabledPorts(delta); processSwitchSettingsChanged(delta); processMacTableChanges(delta); processLoadBalancerChanges(delta); // remove all routes to be deleted processRemovedRoutes(delta); // Any neighbor removals, and modify appliedState if some changes fail to // apply processNeighborDelta(delta, &appliedState, REMOVED); // delete all interface not existing anymore. that should stop // all traffic on that interface now forEachRemoved(delta.getIntfsDelta(), &BcmSwitch::processRemovedIntf, this); // Add all new VLANs, and modify VLAN port memberships. // We don't actually delete removed VLANs at this point, we simply remove // all members from the VLAN. This way any ports that ingress packets to this // VLAN will still use this VLAN until we get the new VLAN fully configured. forEachChanged( delta.getVlansDelta(), &BcmSwitch::processChangedVlan, &BcmSwitch::processAddedVlan, &BcmSwitch::preprocessRemovedVlan, this); // Broadcom requires a default VLAN to always exist. // This VLAN is used as the default ingress VLAN for ports that don't have a // default ingress set. // // We always specify the ingress VLAN for all enabled ports, so this VLAN is // never really used for us. We instead always point the default VLAN. if (delta.oldState()->getDefaultVlan() != delta.newState()->getDefaultVlan()) { changeDefaultVlan( delta.oldState()->getDefaultVlan(), delta.newState()->getDefaultVlan()); } // Update changed interfaces forEachChanged(delta.getIntfsDelta(), &BcmSwitch::processChangedIntf, this); // Remove deleted VLANs forEachRemoved(delta.getVlansDelta(), &BcmSwitch::processRemovedVlan, this); // Add all new interfaces forEachAdded(delta.getIntfsDelta(), &BcmSwitch::processAddedIntf, this); // Any changes to the Qos maps processQosChanges(delta); processControlPlaneChanges(delta); processAggregatePortChanges(delta); // Any neighbor additions/changes, and modify appliedState if some changes // fail to apply processNeighborDelta(delta, &appliedState, ADDED); processNeighborDelta(delta, &appliedState, CHANGED); // process label forwarding changes after neighbor entries are updated processChangedLabelForwardingInformationBase(delta); // Add/update mirrors before processing Acl and port changes // This is to ensure that port and acls can access latest mirrors forEachAdded( delta.getMirrorsDelta(), &BcmMirrorTable::processAddedMirror, writableBcmMirrorTable()); forEachChanged( delta.getMirrorsDelta(), &BcmMirrorTable::processChangedMirror, writableBcmMirrorTable()); // Any ACL changes processAclChanges(delta); // Any changes to the set of sFlow collectors processSflowCollectorChanges(delta); // Any changes to the sampling rate of sflow processSflowSamplingRateChanges(delta); // Process any new routes or route changes processAddedChangedRoutes(delta, &appliedState); processAddedPorts(delta); processChangedPorts(delta); // delete any removed mirrors after processing port and acl changes forEachRemoved( delta.getMirrorsDelta(), &BcmMirrorTable::processRemovedMirror, writableBcmMirrorTable()); // Process global PFC watchdog configurations processPfcWatchdogGlobalChanges(delta); pickupLinkStatusChanges(delta); // As the last step, enable newly enabled ports. Doing this as the // last step ensures that we only start forwarding traffic once the // ports are correctly configured. Note that this will also set the // ingressVlan and speed correctly before enabling. processEnabledPorts(delta); bcmStatUpdater_->refreshPostBcmStateChange(delta); return appliedState; } unique_ptr<TxPacket> BcmSwitch::allocatePacket(uint32_t size) const { // For future reference: Allocating the packet data requires the unit number // of the unit that the packet will be used with. Our allocatePacket() API // doesn't require the caller to specify which ports they plan to use the // packet with. // // At the moment we only support a single unit, so this isn't really an // issue. However, it may be more challenging for a HwSwitch implementation // that supports multiple units. Fortunately, the linux userspace // implemetation uses the same DMA pool for all local units, so it wouldn't // really matter which unit we specified when allocating the buffer. return make_unique<BcmTxPacket>(unit_, size, this); } void BcmSwitch::processDisabledPorts(const StateDelta& delta) { forEachChanged( delta.getPortsDelta(), [&](const shared_ptr<Port>& oldPort, const shared_ptr<Port>& newPort) { if (oldPort->isEnabled() && !newPort->isEnabled()) { auto bcmPort = portTable_->getBcmPort(newPort->getID()); XLOG(INFO) << "Disabling port: " << newPort->getID(); bcmPort->disable(newPort); } }); } void BcmSwitch::processEnabledPortQueues(const shared_ptr<Port>& port) { auto id = port->getID(); auto bcmPort = portTable_->getBcmPort(id); for (const auto& queue : port->getPortQueues()) { XLOG(DBG1) << "Enable cos queue settings on port " << port->getID() << " queue: " << static_cast<int>(queue->getID()); bcmPort->setupQueue(*queue); } } void BcmSwitch::processEnabledPorts(const StateDelta& delta) { forEachChanged( delta.getPortsDelta(), [&](const shared_ptr<Port>& oldPort, const shared_ptr<Port>& newPort) { if (!newPort->isEnabled()) { // skip if the port is not even enabled return; } // There're only two cases that we should enable the port: // 1) If the old port is disabled // 2) If the speed changes if (!oldPort->isEnabled() || (oldPort->getSpeed() != newPort->getSpeed())) { auto bcmPort = portTable_->getBcmPort(newPort->getID()); bcmPort->enable(newPort); processEnabledPortQueues(newPort); } }); forEachAdded(delta.getPortsDelta(), [&](const shared_ptr<Port>& port) { // For any newly created port, as long as it needs enable, always enable if (port->isEnabled()) { auto bcmPort = portTable_->getBcmPort(port->getID()); bcmPort->enable(port); processEnabledPortQueues(port); } }); } bool BcmSwitch::processChangedIngressPoolCfg( std::optional<BufferPoolCfgPtr> oldBufferPoolCfgPtr, std::optional<BufferPoolCfgPtr> newBufferPoolCfgPtr) { // bufferPool <-> noBufferPool if ((oldBufferPoolCfgPtr && !newBufferPoolCfgPtr) || (!oldBufferPoolCfgPtr && newBufferPoolCfgPtr)) { return true; } // bufferPool exists old and new, but doesn't match if (oldBufferPoolCfgPtr && newBufferPoolCfgPtr) { if (*oldBufferPoolCfgPtr != *newBufferPoolCfgPtr) { return true; } } return false; } int BcmSwitch::pfcDeadlockRecoveryEventCallback( int unit, bcm_port_t port, bcm_cos_queue_t cosq, bcm_cosq_pfc_deadlock_recovery_event_t recovery_state, void* userdata) { Callback* callback = (Callback*)userdata; XLOG_EVERY_MS(WARNING, 5000) << "PFC deadlock recovery callback invoked for unit " << unit << " port " << (int)port << " cosq " << (int)cosq << " recovery state " << (int)recovery_state; CHECK(callback); if (recovery_state == bcmCosqPfcDeadlockRecoveryEventBegin) { // deadlock detected callback->pfcWatchdogStateChanged(PortID(port), true); } else if (recovery_state == bcmCosqPfcDeadlockRecoveryEventEnd) { callback->pfcWatchdogStateChanged(PortID(port), false); } return 0; } void BcmSwitch::processPfcWatchdogGlobalChanges(const StateDelta& delta) { auto oldRecoveryAction = delta.oldState()->getPfcWatchdogRecoveryAction(); auto newRecoveryAction = delta.newState()->getPfcWatchdogRecoveryAction(); if (!platform_->getAsic()->isSupported(HwAsic::Feature::PFC)) { // Look for PFC watchdog only if PFC is supported on platform return; } // Needed if PFC watchdog enabled status changes at device level if (oldRecoveryAction.has_value() != newRecoveryAction.has_value()) { int rv = 0; if (newRecoveryAction.has_value()) { // Register the PFC deadlock recovery callback function rv = bcm_cosq_pfc_deadlock_recovery_event_register( unit_, pfcDeadlockRecoveryEventCallback, callback_); bcmCheckError(rv, "Failed to register PFC deadlock recovery event!"); flags_ |= PFC_WATCHDOG_REGISTERED; } else { // Unregister the PFC deadlock recovery callback function rv = bcm_cosq_pfc_deadlock_recovery_event_unregister( unit_, pfcDeadlockRecoveryEventCallback, callback_); bcmCheckError(rv, "Failed to unregister PFC deadlock recovery event!"); flags_ &= ~PFC_WATCHDOG_REGISTERED; } XLOG(DBG2) << "PFC watchdog deadlock_recovery_event " << std::boolalpha << newRecoveryAction.has_value(); } // Default recovery action is NO_DROP if (newRecoveryAction != oldRecoveryAction) { bcm_switch_pfc_deadlock_action_t recoveryAction = bcmSwitchPFCDeadlockActionTransmit; auto recoveryActionConfig = cfg::PfcWatchdogRecoveryAction::NO_DROP; if (newRecoveryAction.has_value() && (*newRecoveryAction == cfg::PfcWatchdogRecoveryAction::DROP)) { recoveryAction = bcmSwitchPFCDeadlockActionDrop; recoveryActionConfig = *newRecoveryAction; } auto rv = bcm_switch_control_set( unit_, bcmSwitchPFCDeadlockRecoveryAction, recoveryAction); bcmCheckError(rv, "Failed to set bcmSwitchPFCDeadlockRecoveryAction"); XLOG(DBG2) << "PFC watchdog deadlock recovery action " << apache::thrift::util::enumNameSafe(recoveryActionConfig) << " programmed!"; } } bool BcmSwitch::processChangedPgCfg( const std::shared_ptr<Port>& oldPort, const std::shared_ptr<Port>& newPort) { const auto& oldPortPgCfgs = oldPort->getPortPgConfigs(); const auto& newPortPgCfgs = newPort->getPortPgConfigs(); std::map<int, std::shared_ptr<PortPgConfig>> newPortPgConfigMap; if (!oldPortPgCfgs && !newPortPgCfgs) { // no change before or after return false; } if ((oldPortPgCfgs && !newPortPgCfgs) || (!oldPortPgCfgs && newPortPgCfgs)) { // if go from old pgConfig <-> no pg cfg and vice versa, there is a change return true; } if ((*oldPortPgCfgs).size() != (*newPortPgCfgs).size()) { return true; } for (const auto& portPg : *newPortPgCfgs) { newPortPgConfigMap.emplace(std::make_pair(portPg->getID(), portPg)); } for (const auto& oldPortPg : *oldPortPgCfgs) { auto iter = newPortPgConfigMap.find(oldPortPg->getID()); if ((iter == newPortPgConfigMap.end()) || (*iter->second != *oldPortPg)) { return true; } // also validate buffer pools associated with the given pg if any const auto& oldBufferPoolCfgPtr = oldPortPg->getBufferPoolConfig(); const auto& newBufferPoolCfgPtr = iter->second->getBufferPoolConfig(); if (processChangedIngressPoolCfg( oldBufferPoolCfgPtr, newBufferPoolCfgPtr)) { XLOG(DBG1) << "New ingress buffer pool changes on port: " << newPort->getID(); return true; } } // no change return false; } void BcmSwitch::processChangedPortQueues( const shared_ptr<Port>& oldPort, const shared_ptr<Port>& newPort) { auto id = newPort->getID(); auto bcmPort = portTable_->getBcmPort(id); // We expect the number of port queues to remain constant because this is // defined by the hardware for (const auto& newQueue : newPort->getPortQueues()) { if (oldPort->getPortQueues().size() > 0 && *(oldPort->getPortQueues().at(newQueue->getID())) == *newQueue) { continue; } XLOG(DBG1) << "New cos queue settings on port " << id << " queue " << static_cast<int>(newQueue->getID()); bcmPort->setupStatsIfNeeded(newPort); bcmPort->setupQueue(*newQueue); } } void BcmSwitch::processAddedPorts(const StateDelta& delta) { // For now, this just enables stats on newly added ports forEachAdded(delta.getPortsDelta(), [&](const shared_ptr<Port>& port) { auto id = port->getID(); auto bcmPort = portTable_->getBcmPort(id); if (port->isEnabled()) { // This ensures all the settings are up to date. We need to // be very careful that program() is both idempotent and // does not cause port flaps if called with the same // settings twice. bcmPort->program(port); bcmPort->enableStatCollection(port); } }); } void BcmSwitch::processChangedPorts(const StateDelta& delta) { forEachChanged( delta.getPortsDelta(), [&](const shared_ptr<Port>& oldPort, const shared_ptr<Port>& newPort) { auto id = newPort->getID(); auto bcmPort = portTable_->getBcmPort(id); if (!oldPort->isEnabled() && !newPort->isEnabled()) { // No need to process changes on disabled ports. We will pick up // changes should the port ever become enabled. return; } auto speedChanged = oldPort->getSpeed() != newPort->getSpeed(); XLOG_IF(DBG1, speedChanged) << "New speed on port " << id; auto profileIDChanged = oldPort->getProfileID() != newPort->getProfileID(); XLOG_IF(DBG1, profileIDChanged) << "New profileID on port " << id; auto vlanChanged = oldPort->getIngressVlan() != newPort->getIngressVlan(); XLOG_IF(DBG1, vlanChanged) << "New ingress vlan on port " << id; auto pauseChanged = oldPort->getPause() != newPort->getPause(); XLOG_IF(DBG1, pauseChanged) << "New pause settings on port " << id; auto sFlowChanged = (oldPort->getSflowIngressRate() != newPort->getSflowIngressRate()) || (oldPort->getSflowEgressRate() != newPort->getSflowEgressRate()); XLOG_IF(DBG1, sFlowChanged) << "New sFlow settings on port " << id; auto loopbackChanged = oldPort->getLoopbackMode() != newPort->getLoopbackMode(); XLOG_IF(DBG1, loopbackChanged) << "New loopback mode settings on port " << id; auto mirrorChanged = (oldPort->getIngressMirror() != newPort->getIngressMirror()) || (oldPort->getEgressMirror() != newPort->getEgressMirror()); XLOG_IF(DBG1, mirrorChanged) << "New mirror settings on port " << id; auto qosPolicyChanged = oldPort->getQosPolicy() != newPort->getQosPolicy(); XLOG_IF(DBG1, qosPolicyChanged) << "New Qos Policy on port " << id; auto nameChanged = oldPort->getName() != newPort->getName(); XLOG_IF(DBG1, nameChanged) << "New name on port " << id; auto asicPrbsChanged = oldPort->getAsicPrbs() != newPort->getAsicPrbs(); XLOG_IF(DBG1, asicPrbsChanged) << "New asicPrbs on port " << id; // if pfc config for port points to new PG profile, pfcChanged will find // it auto pfcChanged = oldPort->getPfc() != newPort->getPfc(); XLOG_IF(DBG1, pfcChanged) << "New pfc settings on port " << id; auto pgCfgChanged = processChangedPgCfg(oldPort, newPort); XLOG_IF(DBG1, pgCfgChanged) << "New pg config settings on port " << id; if (speedChanged || profileIDChanged || vlanChanged || pauseChanged || sFlowChanged || loopbackChanged || mirrorChanged || qosPolicyChanged || nameChanged || asicPrbsChanged || pfcChanged || pgCfgChanged) { bcmPort->program(newPort); } if (newPort->getPortQueues().size() != 0 && !platform_->getAsic()->isSupported(HwAsic::Feature::L3_QOS)) { throw FbossError( "Changing settings for cos queues not supported on ", "this platform"); } processChangedPortQueues(oldPort, newPort); }); } void BcmSwitch::pickupLinkStatusChanges(const StateDelta& delta) { forEachChanged( delta.getPortsDelta(), [&](const std::shared_ptr<Port>& oldPort, const std::shared_ptr<Port>& newPort) { auto id = newPort->getID(); auto bcmPort = portTable_->getBcmPort(id); auto adminStateChanged = oldPort->getAdminState() != newPort->getAdminState(); if (adminStateChanged) { auto adminStr = (newPort->isEnabled()) ? "ENABLED" : "DISABLED"; XLOG(DBG1) << "Admin state changed on port " << id << ": " << adminStr; } auto operStateChanged = oldPort->getOperState() != newPort->getOperState(); if (operStateChanged) { auto operStr = (newPort->isUp()) ? "UP" : "DOWN"; XLOG(DBG1) << "Oper state changed on port " << id << ": " << operStr; } if (adminStateChanged || operStateChanged) { bcmPort->linkStatusChanged(newPort); } if (auto faultStatus = newPort->getIPhyLinkFaultStatus()) { XLOG(DBG1) << newPort->getName() << " Fault status on port " << id << " LocalFault: " << *(*faultStatus).localFault() << ", RemoteFault: " << *(*faultStatus).remoteFault(); bcmPort->cacheFaultStatus(*faultStatus); } }); } void BcmSwitch::reconfigurePortGroups(const StateDelta& delta) { // This logic is a bit messy. We could encode some notion of port // groups into the switch state somehow so it is easy to generate // deltas for these. For now, we need pass around the SwitchState // object and get the relevant ports manually. // Note that reconfigurePortGroups will program the speed and enable // newly enabled ports in its group. This means it can overlap a bit // with the work done in processEnabledPorts and processChangedPorts. // Both BcmPort::program and BcmPort::enable should be no-ops if already // programmed or already enabled. However, this MUST BE called before // those methods as enabling or changing the speed of a port may require // changing the configuration of a port group. auto oldState = delta.oldState(); auto newState = delta.newState(); forEachChanged( delta.getPortsDelta(), [&](const shared_ptr<Port>& oldPort, const shared_ptr<Port>& newPort) { auto enabled = !oldPort->isEnabled() && newPort->isEnabled(); auto speedProfileChanged = oldPort->getProfileID() != newPort->getProfileID(); auto sFlowChanged = (oldPort->getSflowIngressRate() != newPort->getSflowIngressRate()) || (oldPort->getSflowEgressRate() != newPort->getSflowEgressRate()); if (enabled || speedProfileChanged || sFlowChanged) { if (!isValidPortUpdate(oldPort, newPort, newState)) { // Fail hard throw FbossError("Invalid port configuration passed in "); } auto bcmPort = portTable_->getBcmPort(newPort->getID()); auto portGroup = bcmPort->getPortGroup(); if (portGroup) { portGroup->reconfigureIfNeeded(oldState, newState); } } }); } bool BcmSwitch::isValidPortUpdate( const shared_ptr<Port>& oldPort, const shared_ptr<Port>& newPort, const shared_ptr<SwitchState>& newState) const { if (auto mirrorName = newPort->getIngressMirror()) { auto mirror = newState->getMirrors()->getMirrorIf(mirrorName.value()); if (!mirror) { XLOG(ERR) << "Ingress mirror " << mirrorName.value() << " for port : " << newPort->getID() << " not found"; return false; } } if (auto mirrorName = newPort->getEgressMirror()) { auto mirror = newState->getMirrors()->getMirrorIf(mirrorName.value()); if (!mirror) { XLOG(ERR) << "Egress mirror " << mirrorName.value() << " for port : " << newPort->getID() << " not found"; return false; } } if (newPort->getSampleDestination() && newPort->getSampleDestination().value() == cfg::SampleDestination::MIRROR) { auto ingressMirrorName = newPort->getIngressMirror(); if (!ingressMirrorName) { XLOG(ERR) << "Ingress mirror not set for sampled port with sample destination of mirror not set : " << newPort->getID(); return false; } auto ingressMirror = newState->getMirrors()->getMirrorIf(ingressMirrorName.value()); if (ingressMirror->type() != Mirror::Type::SFLOW) { XLOG(ERR) << "Ingress mirror " << ingressMirrorName.value() << " for sampled port not sflow : " << newPort->getID(); return false; } auto egressMirrorName = newPort->getEgressMirror(); if (egressMirrorName) { XLOG(ERR) << "Egress mirror " << ingressMirrorName.value() << " for sampled port set on : " << newPort->getID() << " , egress sampling is not supported!"; return false; } } auto enabled = !oldPort->isEnabled() && newPort->isEnabled(); auto speedChanged = oldPort->getSpeed() != newPort->getSpeed(); if (speedChanged || enabled) { auto bcmPort = portTable_->getBcmPort(newPort->getID()); auto portGroup = bcmPort->getPortGroup(); XLOG(DBG1) << "Verifying port group config for : " << newPort->getID(); return !portGroup || portGroup->validConfiguration(newState); } if (!isValidPortQueueUpdate( oldPort->getPortQueues(), newPort->getPortQueues())) { return false; } if (!isValidPortQosPolicyUpdate(oldPort, newPort, newState)) { return false; } return true; } template <typename AddrT> bool BcmSwitch::isRouteUpdateValid(const StateDelta& delta) const { using RouteT = Route<AddrT>; auto maxLabelStackDepth = getPlatform()->getAsic()->getMaxLabelStackDepth(); auto validateLabeledRoute = [maxLabelStackDepth](const auto& route) { for (const auto& nhop : route->getForwardInfo().getNextHopSet()) { if (!nhop.labelForwardingAction()) { continue; } if (nhop.labelForwardingAction()->type() != LabelForwardingAction::LabelForwardingType::PUSH) { return false; } else { if (nhop.labelForwardingAction()->pushStack()->size() > maxLabelStackDepth) { return false; } } } return true; }; bool isValid = true; forEachChangedRoute<AddrT>( delta, [&isValid, validateLabeledRoute]( RouterID /*rid*/, const std::shared_ptr<RouteT>& /*oldRoute*/, const std::shared_ptr<RouteT>& newRoute) { if (isValid && !validateLabeledRoute(newRoute)) { isValid = false; } }, [&isValid, validateLabeledRoute]( RouterID /*rid*/, const std::shared_ptr<RouteT>& addedRoute) { if (isValid && !validateLabeledRoute(addedRoute)) { isValid = false; } }, [](RouterID /*rid*/, const std::shared_ptr<RouteT>& /*removedRoute*/) {}); return isValid; } bool BcmSwitch::isValidStateUpdate(const StateDelta& delta) const { auto newState = delta.newState(); auto isValid = true; forEachChanged( delta.getPortsDelta(), [&](const shared_ptr<Port>& oldPort, const shared_ptr<Port>& newPort) { if (isValid && !isValidPortUpdate(oldPort, newPort, newState)) { isValid = false; } }); isValid = isValid && (newState->getMirrors()->size() <= platform_->getAsic()->getMaxMirrors()); forEachChanged( delta.getMirrorsDelta(), [&](const shared_ptr<Mirror>& /* oldMirror */, const shared_ptr<Mirror>& newMirror) { if (newMirror->getTruncate() && !getPlatform()->getAsic()->isSupported( HwAsic::Feature::MIRROR_PACKET_TRUNCATION)) { XLOG(ERR) << "Mirror packet truncation is not supported on this platform"; isValid = false; } }); int sflowMirrorCount = 0; for (const auto& mirror : *(newState->getMirrors())) { if (mirror->type() == Mirror::Type::SFLOW) { sflowMirrorCount++; } } if (sflowMirrorCount > 1) { XLOG(ERR) << "More than one sflow mirrors configured"; isValid = false; } forEachAdded( delta.getQosPoliciesDelta(), [&](const std::shared_ptr<QosPolicy>& qosPolicy) { isValid = isValid && BcmQosPolicyTable::isValid(qosPolicy); }); forEachChanged( delta.getQosPoliciesDelta(), [&](const std::shared_ptr<QosPolicy>& oldQosPolicy, const std::shared_ptr<QosPolicy>& newQosPolicy) { isValid = isValid && BcmQosPolicyTable::isValid(newQosPolicy); }); forEachChanged( delta.getLabelForwardingInformationBaseDelta(), [&](const std::shared_ptr<Route<LabelID>>& /*oldEntry*/, const std::shared_ptr<Route<LabelID>>& newEntry) { // changed Fn isValid = isValid && isValidLabelForwardingEntry(newEntry.get()); }, [&](const std::shared_ptr<Route<LabelID>>& newEntry) { // added Fn isValid = isValid && isValidLabelForwardingEntry(newEntry.get()); }, [](const std::shared_ptr<Route<LabelID>>& /*oldEntry*/) { // removed Fn }); isValid = isValid && isRouteUpdateValid<folly::IPAddressV4>(delta); isValid = isValid && isRouteUpdateValid<folly::IPAddressV6>(delta); forEachChanged( delta.getAclsDelta(), [&](const shared_ptr<AclEntry>& /* oldAcl */, const shared_ptr<AclEntry>& newAcl) { isValid = isValid && hasValidAclMatcher(newAcl); return isValid ? LoopAction::CONTINUE : LoopAction::BREAK; }, [&](const shared_ptr<AclEntry>& addAcl) { isValid = isValid && hasValidAclMatcher(addAcl); return isValid ? LoopAction::CONTINUE : LoopAction::BREAK; }, [&](const shared_ptr<AclEntry>& /* delAcl */) {}); if (getPlatform()->getAsic()->isSupported( HwAsic::Feature::INGRESS_L3_INTERFACE)) { // default vlan l3 interface should not be created from port vlan config forEachAdded( delta.getIntfsDelta(), [&](const std::shared_ptr<Interface>& intf) { isValid = isValid && intf->getVlanID() != delta.newState()->getDefaultVlan(); }); } return isValid; } void BcmSwitch::changeDefaultVlan(VlanID oldId, VlanID newId) { auto rv = bcm_vlan_default_set(unit_, newId); bcmCheckError(rv, "failed to set default VLAN to ", newId); if (getPlatform()->getAsic()->isSupported( HwAsic::Feature::INGRESS_L3_INTERFACE)) { // create or update L3 intf for default vlan, // otherwise, packets with default vlan tag will // be dropped due to no mathcing in ingress L3 intf if (intfTable_->getBcmIntfIf(oldId)) { auto oldIntf = make_shared<Interface>( InterfaceID(oldId), RouterID(0), /* currently VRF is always zero anyway */ oldId, folly::sformat("Interface-{}", uint16_t(oldId)), getPlatform()->getLocalMac(), 9000, /* mtu */ false, /* is virtual */ false /* is state_sync disabled*/); intfTable_->deleteIntf(oldIntf); } auto newIntf = make_shared<Interface>( InterfaceID(newId), RouterID(0), /* currently VRF is always zero anyway */ newId, folly::sformat("Interface-{}", uint16_t(newId)), getPlatform()->getLocalMac(), 9000, /* mtu */ false, /* is virtual */ false /* is state_sync disabled*/); intfTable_->addIntf(newIntf); } } void BcmSwitch::processChangedVlan( const shared_ptr<Vlan>& oldVlan, const shared_ptr<Vlan>& newVlan) { // Update port membership bcm_pbmp_t addedPorts; BCM_PBMP_CLEAR(addedPorts); bcm_pbmp_t addedUntaggedPorts; BCM_PBMP_CLEAR(addedUntaggedPorts); bcm_pbmp_t removedPorts; BCM_PBMP_CLEAR(removedPorts); const auto& oldPorts = oldVlan->getPorts(); const auto& newPorts = newVlan->getPorts(); auto oldIter = oldPorts.begin(); auto newIter = newPorts.begin(); uint32_t numAdded{0}; uint32_t numRemoved{0}; while (oldIter != oldPorts.end() || newIter != newPorts.end()) { if (oldIter == oldPorts.end() || (newIter != newPorts.end() && newIter->first < oldIter->first)) { // This port was added ++numAdded; bcm_port_t bcmPort = portTable_->getBcmPortId(newIter->first); BCM_PBMP_PORT_ADD(addedPorts, bcmPort); if (!newIter->second.tagged) { BCM_PBMP_PORT_ADD(addedUntaggedPorts, bcmPort); } ++newIter; } else if ( newIter == newPorts.end() || (oldIter != oldPorts.end() && oldIter->first < newIter->first)) { // This port was removed ++numRemoved; bcm_port_t bcmPort = portTable_->getBcmPortId(oldIter->first); BCM_PBMP_PORT_ADD(removedPorts, bcmPort); ++oldIter; } else { ++oldIter; ++newIter; } } XLOG(DBG2) << "updating VLAN " << newVlan->getID() << ": " << numAdded << " ports added, " << numRemoved << " ports removed"; if (numRemoved) { auto rv = bcm_vlan_port_remove(unit_, newVlan->getID(), removedPorts); bcmCheckError(rv, "failed to remove ports from VLAN ", newVlan->getID()); } if (numAdded) { auto rv = bcm_vlan_port_add( unit_, newVlan->getID(), addedPorts, addedUntaggedPorts); bcmCheckError(rv, "failed to add ports to VLAN ", newVlan->getID()); } } void BcmSwitch::processAddedVlan(const shared_ptr<Vlan>& vlan) { XLOG(DBG2) << "creating VLAN " << vlan->getID() << " with " << vlan->getPorts().size() << " ports"; bcm_pbmp_t pbmp; bcm_pbmp_t ubmp; BCM_PBMP_CLEAR(pbmp); BCM_PBMP_CLEAR(ubmp); for (const auto& entry : vlan->getPorts()) { bcm_port_t bcmPort = portTable_->getBcmPortId(entry.first); BCM_PBMP_PORT_ADD(pbmp, bcmPort); if (!entry.second.tagged) { BCM_PBMP_PORT_ADD(ubmp, bcmPort); } } typedef BcmWarmBootCache::VlanInfo VlanInfo; // Since during warm boot all VLAN in the config will show // up as added VLANs we only need to consult the warm boot // cache here. auto vlanItr = warmBootCache_->findVlanInfo(vlan->getID()); if (vlanItr != warmBootCache_->vlan2VlanInfo_end()) { // Compare with existing vlan to determine if we need to reprogram auto equivalent = [](VlanInfo newVlan, VlanInfo existingVlan) { return BCM_PBMP_EQ(newVlan.allPorts, existingVlan.allPorts) && BCM_PBMP_EQ(newVlan.untagged, existingVlan.untagged); }; const auto& existingVlan = vlanItr->second; if (!equivalent(VlanInfo(vlan->getID(), ubmp, pbmp), existingVlan)) { XLOG(DBG1) << "updating VLAN " << vlan->getID() << " with " << vlan->getPorts().size() << " ports"; auto oldVlan = vlan->clone(); warmBootCache_->fillVlanPortInfo(oldVlan.get()); processChangedVlan(oldVlan, vlan); } else { XLOG(DBG1) << " Vlan : " << vlan->getID() << " already exists "; } warmBootCache_->programmed(vlanItr); } else { XLOG(DBG1) << "creating VLAN " << vlan->getID() << " with " << vlan->getPorts().size() << " ports"; auto rv = bcm_vlan_create(unit_, vlan->getID()); bcmCheckError(rv, "failed to add VLAN ", vlan->getID()); rv = bcm_vlan_port_add(unit_, vlan->getID(), pbmp, ubmp); bcmCheckError(rv, "failed to add members to new VLAN ", vlan->getID()); } } void BcmSwitch::preprocessRemovedVlan(const shared_ptr<Vlan>& vlan) { // Remove all ports from this VLAN at this phase. XLOG(DBG2) << "preparing to remove VLAN " << vlan->getID(); auto rv = bcm_vlan_gport_delete_all(unit_, vlan->getID()); bcmCheckError(rv, "failed to remove members from VLAN ", vlan->getID()); } void BcmSwitch::processRemovedVlan(const shared_ptr<Vlan>& vlan) { XLOG(DBG2) << "removing VLAN " << vlan->getID(); auto rv = bcm_vlan_destroy(unit_, vlan->getID()); bcmCheckError(rv, "failed to remove VLAN ", vlan->getID()); } void BcmSwitch::processChangedIntf( const shared_ptr<Interface>& oldIntf, const shared_ptr<Interface>& newIntf) { CHECK_EQ(oldIntf->getID(), newIntf->getID()); XLOG(DBG2) << "changing interface " << oldIntf->getID(); intfTable_->programIntf(newIntf); } void BcmSwitch::processAddedIntf(const shared_ptr<Interface>& intf) { XLOG(DBG2) << "adding interface " << intf->getID(); intfTable_->addIntf(intf); } void BcmSwitch::processRemovedIntf(const shared_ptr<Interface>& intf) { XLOG(DBG2) << "deleting interface " << intf->getID(); intfTable_->deleteIntf(intf); } void BcmSwitch::processQosChanges(const StateDelta& delta) { auto oldDefaultQosPolicy = delta.oldState() ? delta.oldState()->getDefaultDataPlaneQosPolicy() : nullptr; auto newDefaultQosPolicy = delta.newState() ? delta.newState()->getDefaultDataPlaneQosPolicy() : nullptr; if (oldDefaultQosPolicy && newDefaultQosPolicy) { qosPolicyTable_->processChangedDefaultDataPlaneQosPolicy( oldDefaultQosPolicy, newDefaultQosPolicy); } else if (oldDefaultQosPolicy) { qosPolicyTable_->processRemovedDefaultDataPlaneQosPolicy( oldDefaultQosPolicy); } else if (newDefaultQosPolicy) { qosPolicyTable_->processAddedDefaultDataPlaneQosPolicy(newDefaultQosPolicy); } forEachChanged( delta.getQosPoliciesDelta(), &BcmQosPolicyTable::processChangedQosPolicy, &BcmQosPolicyTable::processAddedQosPolicy, &BcmQosPolicyTable::processRemovedQosPolicy, qosPolicyTable_.get()); } void BcmSwitch::processAclChanges(const StateDelta& delta) { forEachChanged( delta.getAclsDelta(), &BcmSwitch::processChangedAcl, &BcmSwitch::processAddedAcl, &BcmSwitch::processRemovedAcl, this); } void BcmSwitch::processAggregatePortChanges(const StateDelta& delta) { forEachChanged( delta.getAggregatePortsDelta(), &BcmSwitch::processChangedAggregatePort, &BcmSwitch::processAddedAggregatePort, &BcmSwitch::processRemovedAggregatePort, this); } void BcmSwitch::processChangedSflowCollector( const std::shared_ptr<SflowCollector>& /* oldCollector */, const std::shared_ptr<SflowCollector>& /* newCollector */) { XLOG(ERR) << "sFlow collector should should only change on restarts"; } void BcmSwitch::processRemovedSflowCollector( const std::shared_ptr<SflowCollector>& collector) { if (!sFlowExporterTable_->contains(collector)) { throw FbossError("Tried to remove non-existent sFlow exporter"); } sFlowExporterTable_->removeExporter(collector->getID()); } void BcmSwitch::processAddedSflowCollector( const shared_ptr<SflowCollector>& collector) { if (sFlowExporterTable_->contains(collector)) { // Something is wrong. throw FbossError("Tried to add an existing sFlow exporter"); } sFlowExporterTable_->addExporter(collector); } void BcmSwitch::processSflowSamplingRateChanges(const StateDelta& delta) { forEachChanged( delta.getPortsDelta(), [&](const shared_ptr<Port>& oldPort, const shared_ptr<Port>& newPort) { auto oldIngressRate = oldPort->getSflowIngressRate(); auto oldEgressRate = oldPort->getSflowEgressRate(); auto newIngressRate = newPort->getSflowIngressRate(); auto newEgressRate = newPort->getSflowEgressRate(); auto sFlowChanged = (oldIngressRate != newIngressRate) || (oldEgressRate != newEgressRate); if (sFlowChanged) { auto id = newPort->getID(); sFlowExporterTable_->updateSamplingRates( id, newIngressRate, newEgressRate); } }); } void BcmSwitch::processSflowCollectorChanges(const StateDelta& delta) { forEachChanged( delta.getSflowCollectorsDelta(), &BcmSwitch::processChangedSflowCollector, &BcmSwitch::processAddedSflowCollector, &BcmSwitch::processRemovedSflowCollector, this); } template <typename NeighborEntryT> void BcmSwitch::processAddedAndChangedNeighbor( const BcmHostKey& neighborKey, const BcmIntf* intf, const NeighborEntryT* entry) { auto* neighbor = neighborTable_->getNeighbor(neighborKey); CHECK(neighbor); BcmHostTableIf* hostTable; if (getPlatform()->getAsic()->isSupported(HwAsic::Feature::HOSTTABLE)) { hostTable = hostTable_.get(); } else { hostTable = routeTable_.get(); } if (entry->isPending()) { hostTable->programHostsToCPU(neighborKey, intf->getBcmIfId()); return; } auto neighborMac = entry->getMac(); auto isTrunk = entry->getPort().isAggregatePort(); if (isTrunk) { auto trunk = entry->getPort().aggPortID(); hostTable->programHostsToTrunk( neighborKey, intf->getBcmIfId(), neighborMac, getTrunkTable()->getBcmTrunkId(trunk)); } else { auto port = entry->getPort().phyPortID(); hostTable->programHostsToPort( neighborKey, intf->getBcmIfId(), neighborMac, getPortTable()->getBcmPortId(port), entry->getClassID()); } std::for_each( writableMplsNextHopTable()->getNextHops().begin(), writableMplsNextHopTable()->getNextHops().end(), [neighborKey](const auto& entry) { auto nhop = entry.second.lock(); if (nhop->getBcmHostKey() == neighborKey) { nhop->program(neighborKey); } }); } template <typename NeighborEntryT> void BcmSwitch::processAddedNeighborEntry(const NeighborEntryT* addedEntry) { CHECK(addedEntry); if (addedEntry->isPending()) { XLOG(DBG3) << "adding pending neighbor entry to " << addedEntry->getIP().str() << (addedEntry->getClassID().has_value() ? static_cast<int>(addedEntry->getClassID().value()) : 0); } else { XLOG(DBG3) << "adding neighbor entry " << addedEntry->getIP().str() << " to " << addedEntry->getMac().toString() << (addedEntry->getClassID().has_value() ? static_cast<int>(addedEntry->getClassID().value()) : 0); } const auto* intf = getIntfTable()->getBcmIntf(addedEntry->getIntfID()); auto vrf = getBcmVrfId(intf->getInterface()->getRouterID()); auto neighborKey = BcmHostKey(vrf, IPAddress(addedEntry->getIP()), addedEntry->getIntfID()); neighborTable_->registerNeighbor(neighborKey); processAddedAndChangedNeighbor(neighborKey, intf, addedEntry); } template <typename NeighborEntryT> void BcmSwitch::processChangedNeighborEntry( const NeighborEntryT* oldEntry, const NeighborEntryT* newEntry) { CHECK(oldEntry); CHECK(newEntry); CHECK_EQ(oldEntry->getIP(), newEntry->getIP()); if (newEntry->isPending()) { XLOG(DBG3) << "changing neighbor entry " << newEntry->getIP().str() << " classID: " << (newEntry->getClassID().has_value() ? static_cast<int>(newEntry->getClassID().value()) : 0) << " to pending"; } else { XLOG(DBG3) << "changing neighbor entry " << newEntry->getIP().str() << " classID: " << (newEntry->getClassID().has_value() ? static_cast<int>(newEntry->getClassID().value()) : 0) << " to " << newEntry->getMac().toString(); } const auto* intf = getIntfTable()->getBcmIntf(newEntry->getIntfID()); auto vrf = getBcmVrfId(intf->getInterface()->getRouterID()); auto neighborKey = BcmHostKey(vrf, IPAddress(newEntry->getIP()), newEntry->getIntfID()); processAddedAndChangedNeighbor(neighborKey, intf, newEntry); } template <typename NeighborEntryT> void BcmSwitch::processRemovedNeighborEntry( const NeighborEntryT* removedEntry) { CHECK(removedEntry); XLOG(DBG3) << "deleting neighbor entry " << removedEntry->getIP().str(); const auto* intf = getIntfTable()->getBcmIntf(removedEntry->getIntfID()); auto vrf = getBcmVrfId(intf->getInterface()->getRouterID()); auto neighborKey = BcmHostKey( vrf, IPAddress(removedEntry->getIP()), removedEntry->getIntfID()); neighborTable_->unregisterNeighbor(neighborKey); if (getPlatform()->getAsic()->isSupported(HwAsic::Feature::HOSTTABLE)) { hostTable_->programHostsToCPU(neighborKey, intf->getBcmIfId()); } else { routeTable_->programHostsToCPU(neighborKey, intf->getBcmIfId()); } std::for_each( writableMplsNextHopTable()->getNextHops().begin(), writableMplsNextHopTable()->getNextHops().end(), [neighborKey](const auto& entry) { auto nhop = entry.second.lock(); if (nhop->getBcmHostKey() == neighborKey) { nhop->program(neighborKey); } }); } void BcmSwitch::processNeighborDelta( const StateDelta& delta, std::shared_ptr<SwitchState>* appliedState, DeltaType optype) { processNeighborTableDelta<folly::IPAddressV4>(delta, appliedState, optype); processNeighborTableDelta<folly::IPAddressV6>(delta, appliedState, optype); } template <typename AddrT> void BcmSwitch::processNeighborTableDelta( const StateDelta& stateDelta, std::shared_ptr<SwitchState>* appliedState, DeltaType optype) { using NeighborTableT = std::conditional_t< std::is_same<AddrT, folly::IPAddressV4>::value, ArpTable, NdpTable>; using NeighborEntryT = typename NeighborTableT::Entry; using NeighborEntryDeltaT = DeltaValue<NeighborEntryT>; std::vector<NeighborEntryDeltaT> discardedNeighborEntryDelta; for (const auto& vlanDelta : stateDelta.getVlansDelta()) { for (const auto& delta : vlanDelta.template getNeighborDelta<NeighborTableT>()) { try { const auto* oldEntry = delta.getOld().get(); const auto* newEntry = delta.getNew().get(); if (optype == ADDED && !oldEntry) { processAddedNeighborEntry(newEntry); } else if (optype == REMOVED && !newEntry) { processRemovedNeighborEntry(oldEntry); } else if (optype == CHANGED && oldEntry && newEntry) { processChangedNeighborEntry(oldEntry, newEntry); } } catch (const BcmError& error) { rethrowIfHwNotFull(error); discardedNeighborEntryDelta.push_back(delta); } } } for (const auto& delta : discardedNeighborEntryDelta) { SwitchState::revertNewNeighborEntry<NeighborEntryT, NeighborTableT>( delta.getNew(), delta.getOld(), appliedState); } } template <typename RouteT> void BcmSwitch::processChangedRoute( const RouterID& id, const shared_ptr<RouteT>& oldRoute, const shared_ptr<RouteT>& newRoute) { std::string routeMessage; folly::toAppend( "changing route entry @ vrf ", id, " from old: ", oldRoute->str(), "to new: ", newRoute->str(), &routeMessage); XLOG(DBG3) << routeMessage; // if the new route is not resolved, delete it instead of changing it if (!newRoute->isResolved()) { XLOG(DBG1) << "Non-resolved route HW programming is skipped"; processRemovedRoute(id, oldRoute); } else { routeTable_->addRoute(getBcmVrfId(id), newRoute.get()); } } template <typename RouteT> void BcmSwitch::processAddedRoute( const RouterID& id, const shared_ptr<RouteT>& route) { std::string routeMessage; folly::toAppend( "adding route entry @ vrf ", id, " ", route->str(), &routeMessage); XLOG(DBG3) << routeMessage; // if the new route is not resolved, ignore it if (!route->isResolved()) { XLOG(DBG1) << "Non-resolved route HW programming is skipped"; return; } routeTable_->addRoute(getBcmVrfId(id), route.get()); } template <typename RouteT> void BcmSwitch::processRemovedRoute( const RouterID id, const shared_ptr<RouteT>& route) { XLOG(DBG3) << "removing route entry @ vrf " << id << " " << route->str(); if (!route->isResolved()) { XLOG(DBG1) << "Non-resolved route HW programming is skipped"; return; } routeTable_->deleteRoute(getBcmVrfId(id), route.get()); } void BcmSwitch::processRemovedRoutes(const StateDelta& delta) { forEachChangedRoute( delta, [](RouterID /*id*/, const auto& /*oldRoute*/, const auto& /*newRoute*/) { }, [](RouterID /*id*/, const auto& /*addedRoute*/) {}, [this](RouterID rid, const auto& deleted) { processRemovedRoute(rid, deleted); }); } void BcmSwitch::processAddedChangedRoutes( const StateDelta& delta, std::shared_ptr<SwitchState>* appliedState) { processRouteTableDelta<folly::IPAddressV4>(delta, appliedState); processRouteTableDelta<folly::IPAddressV6>(delta, appliedState); } template <typename AddrT> void BcmSwitch::processRouteTableDelta( const StateDelta& delta, std::shared_ptr<SwitchState>* appliedState) { using RouteT = Route<AddrT>; using PrefixT = typename Route<AddrT>::Prefix; std::map<RouterID, std::vector<PrefixT>> discardedPrefixes; if (!isRouteUpdateValid<AddrT>(delta)) { // typically indicate label stack depth exceeded. throw FbossError("invalid route update"); } forEachChangedRoute<AddrT>( delta, [&](RouterID id, const shared_ptr<RouteT>& oldRoute, const shared_ptr<RouteT>& newRoute) { try { processChangedRoute(id, oldRoute, newRoute); } catch (const BcmError& e) { rethrowIfHwNotFull(e); discardedPrefixes[id].push_back(oldRoute->prefix()); } }, [&](RouterID id, const shared_ptr<RouteT>& addedRoute) { try { processAddedRoute(id, addedRoute); } catch (const BcmError& e) { rethrowIfHwNotFull(e); discardedPrefixes[id].push_back(addedRoute->prefix()); } }, [](RouterID /*rid*/, const shared_ptr<RouteT>& /*deletedRoute*/) { // do nothing }); // discard routes for (const auto& entry : discardedPrefixes) { const auto id = entry.first; for (const auto& prefix : entry.second) { const auto newRoute = findRoute<AddrT>(id, prefix.toCidrNetwork(), delta.newState()); const auto oldRoute = findRoute<AddrT>(id, prefix.toCidrNetwork(), delta.oldState()); SwitchState::revertNewRouteEntry(id, newRoute, oldRoute, appliedState); } } } void BcmSwitch::linkscanCallback( int unit, bcm_port_t bcmPort, bcm_port_info_t* info) { try { BcmUnit* unitObj = BcmAPI::getUnit(unit); BcmSwitch* hw = static_cast<BcmSwitch*>(unitObj->getCookie()); bool up = info->linkstatus == BCM_PORT_LINK_STATUS_UP; phy::LinkFaultStatus linkFault; linkFault.localFault() = info->fault & BCM_PORT_FAULT_LOCAL; linkFault.remoteFault() = info->fault & BCM_PORT_FAULT_REMOTE; hw->linkScanBottomHalfEventBase_.runInEventBaseThread( [hw, bcmPort, up, linkFault]() { hw->linkStateChangedHwNotLocked(bcmPort, up, linkFault); }); } catch (const std::exception& ex) { XLOG(ERR) << "unhandled exception while processing linkscan callback " << "for unit " << unit << " port " << bcmPort << ": " << folly::exceptionStr(ex); } } void BcmSwitch::linkStateChangedHwNotLocked( bcm_port_t bcmPortId, bool up, phy::LinkFaultStatus iPhyLinkFaultStatus) { CHECK(linkScanBottomHalfEventBase_.inRunningEventBaseThread()); if (!up) { auto trunk = trunkTable_->linkDownHwNotLocked(bcmPortId); if (trunk != BcmTrunk::INVALID) { XLOG(INFO) << "Shrinking ECMP entries egressing over trunk " << trunk; writableEgressManager()->trunkDownHwNotLocked(trunk); } writableEgressManager()->linkDownHwNotLocked(bcmPortId); } else { // For port up events we wait till ARP/NDP entries // are re resolved after port up before adding them // back. Adding them earlier leads to packet loss. } callback_->linkStateChanged( portTable_->getPortId(bcmPortId), up, iPhyLinkFaultStatus); } // The callback provided to bcm_rx_register() bcm_rx_t BcmSwitch::packetRxSdkCallback(int unit, bcm_pkt_t* pkt, void* cookie) { auto* bcmSw = static_cast<BcmSwitch*>(cookie); DCHECK_EQ(bcmSw->getUnit(), unit); DCHECK_EQ(bcmSw->getUnit(), pkt->unit); BcmPacketT bcmPacket; bcmPacket.usePktIO = false; bcmPacket.ptrUnion.pkt = pkt; return (bcm_rx_t)(bcmSw->packetReceived(bcmPacket)); } #ifdef INCLUDE_PKTIO // The callback provided to bcm_pktio_rx_register() bcm_pktio_rx_t BcmSwitch::pktioPacketRxSdkCallback( int unit, bcm_pktio_pkt_t* pkt, void* cookie) { auto* bcmSw = static_cast<BcmSwitch*>(cookie); DCHECK_EQ(bcmSw->getUnit(), unit); DCHECK_EQ(bcmSw->getUnit(), pkt->unit); BcmPacketT bcmPacket; bcmPacket.usePktIO = true; bcmPacket.ptrUnion.pktioPkt = pkt; return (bcm_pktio_rx_t)(bcmSw->packetReceived(bcmPacket)); } #endif int BcmSwitch::packetReceived(BcmPacketT& bcmPacket) noexcept { bool usePktIO = bcmPacket.usePktIO; DCHECK_EQ(usePktIO, usePKTIO()); // Log it if it's an sFlow sample if (handleSflowPacket(bcmPacket)) { // It was just here because it was an sFlow packet XLOG(DBG4) << "The packet is sample-only. Skip further procssing."; if (!usePktIO) { bcm_pkt_t* pkt = bcmPacket.ptrUnion.pkt; bcm_pkt_rx_free(pkt->unit, pkt); return BCM_RX_HANDLED_OWNED; } else { #ifdef INCLUDE_PKTIO return BCM_PKTIO_RX_HANDLED; #else // TBD should not be here. // exception is not allowed. what should be proper handling here? return BCM_RX_HANDLED_OWNED; #endif } } // Otherwise, send it to the SwSwitch unique_ptr<BcmRxPacket> bcmPkt; try { bcmPkt = createRxPacket(bcmPacket); } catch (const std::exception& ex) { XLOG(ERR) << "failed to allocate BcmRxPacket for receive handling: " << folly::exceptionStr(ex); #ifdef INCLUDE_PKTIO return usePktIO ? BCM_PKTIO_RX_NOT_HANDLED : BCM_RX_NOT_HANDLED; #else return BCM_RX_NOT_HANDLED; #endif } callback_->packetReceived(std::move(bcmPkt)); #ifdef INCLUDE_PKTIO return usePktIO ? BCM_PKTIO_RX_HANDLED : BCM_RX_HANDLED_OWNED; #else return BCM_RX_HANDLED_OWNED; #endif } bool BcmSwitch::sendPacketSwitchedAsync(unique_ptr<TxPacket> pkt) noexcept { unique_ptr<BcmTxPacket> bcmPkt( boost::polymorphic_downcast<BcmTxPacket*>(pkt.release())); return BCM_SUCCESS(BcmTxPacket::sendAsync(std::move(bcmPkt), this)); } bool BcmSwitch::sendPacketOutOfPortAsync( unique_ptr<TxPacket> pkt, PortID portID, std::optional<uint8_t> queue) noexcept { unique_ptr<BcmTxPacket> bcmPkt( boost::polymorphic_downcast<BcmTxPacket*>(pkt.release())); #ifdef INCLUDE_PKTIO bcmPkt->setSwitched(false); #endif bcmPkt->setDestModPort(getPortTable()->getBcmPortId(portID)); if (queue) { bcmPkt->setCos(*queue); } return BCM_SUCCESS(BcmTxPacket::sendAsync(std::move(bcmPkt), this)); } bool BcmSwitch::sendPacketSwitchedSync(unique_ptr<TxPacket> pkt) noexcept { unique_ptr<BcmTxPacket> bcmPkt( boost::polymorphic_downcast<BcmTxPacket*>(pkt.release())); return BCM_SUCCESS(BcmTxPacket::sendSync(std::move(bcmPkt), this)); } bool BcmSwitch::sendPacketOutOfPortSync( unique_ptr<TxPacket> pkt, PortID portID, std::optional<uint8_t> cos) noexcept { unique_ptr<BcmTxPacket> bcmPkt( boost::polymorphic_downcast<BcmTxPacket*>(pkt.release())); #ifdef INCLUDE_PKTIO bcmPkt->setSwitched(false); #endif if (cos.has_value()) { bcmPkt->setCos(cos.value()); } bcmPkt->setDestModPort(getPortTable()->getBcmPortId(portID)); return BCM_SUCCESS(BcmTxPacket::sendSync(std::move(bcmPkt), this)); } void BcmSwitch::updateStatsImpl(SwitchStats* /* switchStats */) { // Update global statistics. updateGlobalStats(); // Update cpu or host bound packet stats controlPlane_->updateQueueCounters(); } folly::F14FastMap<std::string, HwPortStats> BcmSwitch::getPortStats() const { folly::F14FastMap<std::string, HwPortStats> portStats; for (auto& bcmPortEntry : *portTable_) { auto stat = bcmPortEntry.second->getPortStats(); if (stat) { portStats.emplace(bcmPortEntry.second->getPortName(), *stat); } } return portStats; } shared_ptr<BcmSwitchEventCallback> BcmSwitch::registerSwitchEventCallback( bcm_switch_event_t eventID, shared_ptr<BcmSwitchEventCallback> callback) { return BcmSwitchEventUtils::registerSwitchEventCallback( unit_, eventID, callback); } void BcmSwitch::unregisterSwitchEventCallback(bcm_switch_event_t eventID) { return BcmSwitchEventUtils::unregisterSwitchEventCallback(unit_, eventID); } void BcmSwitch::updateGlobalStats() { portTable_->updatePortStats(); trunkTable_->updateStats(); bcmStatUpdater_->updateStats(); auto now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); if ((now - bstStatsUpdateTime_ >= FLAGS_update_watermark_stats_interval_s) || bstStatsMgr_->isFineGrainedBufferStatLoggingEnabled()) { bstStatsUpdateTime_ = now; bstStatsMgr_->updateStats(); } } std::map<PortID, phy::PhyInfo> BcmSwitch::updateAllPhyInfo() const { return portTable_->updateIPhyInfo(); } uint64_t BcmSwitch::getDeviceWatermarkBytes() const { return bstStatsMgr_->getDeviceWatermarkBytes(); } bcm_if_t BcmSwitch::getDropEgressId() const { return platform_->getAsic()->getDefaultDropEgressID(); } bcm_if_t BcmSwitch::getToCPUEgressId() const { if (toCPUEgress_) { return toCPUEgress_->getID(); } else { return BcmEgressBase::INVALID; } } bool BcmSwitch::getAndClearNeighborHit( RouterID /*vrf*/, folly::IPAddress& /*ip*/) { // TODO(aeckert): t20059623 This should look in the host table and // check the hit bit, but that currently requires grabbing the main // lock and opens up the possibility of bg thread getting stuck // behind update thread. For now, stub this out to return true and // work on adding a better way to communicate hit bit + stale entry // garbage collection. return true; } void BcmSwitch::exitFatal() const { utilCreateDir(platform_->getCrashInfoDir()); dumpState(platform_->getCrashHwStateFile()); callback_->exitFatal(); } void BcmSwitch::processChangedAggregatePort( const std::shared_ptr<AggregatePort>& oldAggPort, const std::shared_ptr<AggregatePort>& newAggPort) { CHECK_EQ(oldAggPort->getID(), newAggPort->getID()); XLOG(DBG2) << "reprogramming AggregatePort " << oldAggPort->getID(); trunkTable_->programTrunk(oldAggPort, newAggPort); } void BcmSwitch::processAddedAggregatePort( const std::shared_ptr<AggregatePort>& aggPort) { XLOG(DBG2) << "creating AggregatePort " << aggPort->getID() << " with " << aggPort->subportsCount() << " ports"; trunkTable_->addTrunk(aggPort); } void BcmSwitch::processRemovedAggregatePort( const std::shared_ptr<AggregatePort>& aggPort) { XLOG(DBG2) << "deleting AggregatePort " << aggPort->getID(); trunkTable_->deleteTrunk(aggPort); } void BcmSwitch::processLoadBalancerChanges(const StateDelta& delta) { forEachChanged( delta.getLoadBalancersDelta(), &BcmSwitch::processChangedLoadBalancer, &BcmSwitch::processAddedLoadBalancer, &BcmSwitch::processRemovedLoadBalancer, this); } void BcmSwitch::processChangedLoadBalancer( const std::shared_ptr<LoadBalancer>& oldLoadBalancer, const std::shared_ptr<LoadBalancer>& newLoadBalancer) { CHECK_EQ(oldLoadBalancer->getID(), newLoadBalancer->getID()); XLOG(DBG2) << "reprogramming LoadBalancer " << oldLoadBalancer->getID(); rtag7LoadBalancer_->programLoadBalancer(oldLoadBalancer, newLoadBalancer); } void BcmSwitch::processAddedLoadBalancer( const std::shared_ptr<LoadBalancer>& loadBalancer) { XLOG(DBG2) << "creating LoadBalancer " << loadBalancer->getID(); rtag7LoadBalancer_->addLoadBalancer(loadBalancer); } void BcmSwitch::processRemovedLoadBalancer( const std::shared_ptr<LoadBalancer>& loadBalancer) { XLOG(DBG2) << "deleting LoadBalancer " << loadBalancer->getID(); rtag7LoadBalancer_->deleteLoadBalancer(loadBalancer); } bool BcmSwitch::isControlPlaneQueueNameChanged( const shared_ptr<ControlPlane>& oldCPU, const shared_ptr<ControlPlane>& newCPU) { if ((oldCPU->getQueues().size() != newCPU->getQueues().size())) { return true; } for (const auto& newQueue : newCPU->getQueues()) { auto oldQueue = oldCPU->getQueues().at(newQueue->getID()); if (newQueue->getName() != oldQueue->getName()) { return true; } } return false; } void BcmSwitch::processChangedControlPlaneQueues( const shared_ptr<ControlPlane>& oldCPU, const shared_ptr<ControlPlane>& newCPU) { XLOG_IF(DBG1, oldCPU->getQueues().size() != newCPU->getQueues().size()) << "Old cpu queue size:" << oldCPU->getQueues().size() << ", but new cpu queue size:" << newCPU->getQueues().size(); // first make sure queue settings changes applied for (const auto& newQueue : newCPU->getQueues()) { if (oldCPU->getQueues().size() > newQueue->getID() && *(oldCPU->getQueues().at(newQueue->getID())) == *newQueue) { continue; } XLOG(DBG1) << "New cos queue settings on cpu queue " << static_cast<int>(newQueue->getID()); controlPlane_->setupQueue(*newQueue); } if (isControlPlaneQueueNameChanged(oldCPU, newCPU)) { controlPlane_->getQueueManager()->setupQueueCounters(newCPU->getQueues()); } } void BcmSwitch::processChangedRxReasonToQueueEntries( const shared_ptr<ControlPlane>& oldCPU, const shared_ptr<ControlPlane>& newCPU) { const auto& oldReasonToQueue = oldCPU->getRxReasonToQueue(); const auto& newReasonToQueue = newCPU->getRxReasonToQueue(); for (int index = 0; index < std::max(newReasonToQueue.size(), oldReasonToQueue.size()); ++index) { if (index >= oldReasonToQueue.size()) { // added controlPlane_->setReasonToQueueEntry(index, newReasonToQueue[index]); } else if (index >= newReasonToQueue.size()) { // deleted controlPlane_->deleteReasonToQueueEntry(index); } else if (oldReasonToQueue[index] != newReasonToQueue[index]) { // changed controlPlane_->setReasonToQueueEntry(index, newReasonToQueue[index]); } } } void BcmSwitch::processMirrorChanges(const StateDelta& delta) { forEachChanged( delta.getMirrorsDelta(), &BcmMirrorTable::processChangedMirror, &BcmMirrorTable::processAddedMirror, &BcmMirrorTable::processRemovedMirror, writableBcmMirrorTable()); } void BcmSwitch::clearPortStats( const std::unique_ptr<std::vector<int32_t>>& ports) { bcmStatUpdater_->clearPortStats(ports); } std::vector<PrbsLaneStats> BcmSwitch::getPortAsicPrbsStats(int32_t portId) { return bcmStatUpdater_->getPortAsicPrbsStats(portId); } void BcmSwitch::clearPortAsicPrbsStats(int32_t portId) { bcmStatUpdater_->clearPortAsicPrbsStats(portId); } std::vector<PrbsLaneStats> BcmSwitch::getPortGearboxPrbsStats( int32_t portId, phy::Side side) { return portTable_->getBcmPort(portId)->getPlatformPort()->getGearboxPrbsStats( side); } void BcmSwitch::clearPortGearboxPrbsStats(int32_t portId, phy::Side side) { portTable_->getBcmPort(portId)->getPlatformPort()->clearGearboxPrbsStats( side); } void BcmSwitch::dumpState(const std::string& path) const { auto stateString = gatherSdkState(); if (stateString.length() > 0) { folly::writeFile(stateString, path.c_str()); } } void BcmSwitch::processChangedLabelForwardingInformationBase( const StateDelta& delta) { forEachChanged( delta.getLabelForwardingInformationBaseDelta(), &BcmSwitch::processChangedLabelForwardingEntry, &BcmSwitch::processAddedLabelForwardingEntry, &BcmSwitch::processRemovedLabelForwardingEntry, this); } void BcmSwitch::processAddedLabelForwardingEntry( const std::shared_ptr<Route<LabelID>>& addedEntry) { writableLabelMap()->processAddedLabelSwitchAction( addedEntry->getID().value(), addedEntry->getForwardInfo()); } void BcmSwitch::processRemovedLabelForwardingEntry( const std::shared_ptr<Route<LabelID>>& deletedEntry) { writableLabelMap()->processRemovedLabelSwitchAction( deletedEntry->getID().value()); } void BcmSwitch::processChangedLabelForwardingEntry( const std::shared_ptr<Route<LabelID>>& /*oldEntry*/, const std::shared_ptr<Route<LabelID>>& newEntry) { writableLabelMap()->processChangedLabelSwitchAction( newEntry->getID().value(), newEntry->getForwardInfo()); } void BcmSwitch::l2LearningCallback( int unit, bcm_l2_addr_t* l2Addr, int operation, void* userData) { auto* bcmSw = static_cast<BcmSwitch*>(userData); DCHECK_EQ(bcmSw->getUnit(), unit); bcmSw->l2LearningUpdateReceived(l2Addr, operation); } /* * TD2 and TH * ---------- * * Typical Learning workflow: * - ASIC encounters unknown source MAC+vlan. * - ASIC creates a PENDING L2 entry for the MAC+vlan and generates callback. * - In response, wedge_agent will VALIDATE the L2 entry. * * Typical Aging workflow: * - SDK thread ages out a stale MAC+vlan entry. * - This triggers a callback of type DELETE. * - The MAC+vlan that is aged out would be VALIDATED as when the entry is * learned, in resposne to PENDING ADD callback, wedge_agent would have * VALIDATED the entry. * * The initial implementation (which used wrong BCM API to register callbacks, * and got L2 mod fifo to fill up), in addition to PENDING ADD callback, when * PENDING entry got VALIDATED, wedge_agent received DELETE for PENDING and ADD * for VALIDATED. Thus, wedge_agent had an explicit logic to ignore DELETE * callback for * PENDING and ADD callback for VALIDATED. * * However, with correct BCM API to register callbacks, we no longer get DELETE * for PENDING and ADD for VALIDATED, and thus those need not be ignored here * (Broadcom case CS9347300). * * Furthermore, there is a legitimate case where wedge_agent receives ADD for * VALIDATED, and thus wedge_agent cannot ignore this callback but must handle * it viz. Mac Move. * * Typical Mac Move workflow: * - MAC+vlan is learned on a port p1 (say) using typical Learning workflow. * - ASIC encounters the same source MAC+vlan on a different port p2 (say). * - In response, the MAC is moved from port p1 to p2 and two callbacks are * generated viz. DELETE for VALIDATED entry on p1, followed by ADD for * VALIDATED entry on p2. * * TH3 * --- * * For TH3, there is no notion of PENDING * * The entries are learned as VALIDATED and thus the first callback agent * receives is for VALIDATED ADD entry, and wedge_agent must process it. * * In response, the wedge_agent will program the already VALIDATED entry * again. This has no effect and does not generate additional callback. * * The Mac Move wofkflow for TH3 is same as that for TD2 and TH3 i.e. DELETE * on old port and ADD on new port both for VALIDATED MACs. */ void BcmSwitch::l2LearningUpdateReceived( bcm_l2_addr_t* l2Addr, int operation) noexcept { bool detailedUpdate = platform_->getAsic()->isSupported(HwAsic::Feature::DETAILED_L2_UPDATE); if (l2Addr && isL2OperationOfInterest(operation, detailedUpdate)) { callback_->l2LearningUpdateReceived( createL2Entry(l2Addr, isL2EntryPending(l2Addr)), getL2EntryUpdateType(operation, detailedUpdate)); } } void BcmSwitch::setupCos() { if (platform_->getAsic()->isSupported( HwAsic::Feature::EGRESS_QUEUE_FLEX_COUNTER)) { // Need to handle warmboot queueFlexCounterMgr_.reset(new BcmEgressQueueFlexCounterManager(this)); } cosManager_.reset(new BcmCosManager(this)); controlPlane_.reset(new BcmControlPlane(this)); // set up cpu queue stats counter controlPlane_->getQueueManager()->setupQueueCounters(); } std::unique_ptr<BcmRxPacket> BcmSwitch::createRxPacket(BcmPacketT bcmPacket) { return std::make_unique<FbBcmRxPacket>(bcmPacket, this); } bool BcmSwitch::isRxThreadRunning() const { return bcm_rx_active(unit_); } void BcmSwitch::stopLinkscanThread() { // Disable the linkscan thread auto rv = bcm_linkscan_enable_set(unit_, 0); CHECK(BCM_SUCCESS(rv)) << "failed to stop BcmSwitch linkscan thread " << bcm_errmsg(rv); if (linkScanBottomHalfThread_) { linkScanBottomHalfEventBase_.runInEventBaseThreadAndWait( [this] { linkScanBottomHalfEventBase_.terminateLoopSoon(); }); linkScanBottomHalfThread_->join(); } } void BcmSwitch::createAclGroup() { // Install the master ACL group here, whose content may change overtime createFPGroup( unit_, getAclQset(getPlatform()->getAsic()->getAsicType()), platform_->getAsic()->getDefaultACLGroupID(), FLAGS_acl_g_pri, getPlatform()->getAsic()->isSupported(HwAsic::Feature::HSDK)); } void BcmSwitch::dropDhcpPackets() { // Don't flood broadcast DHCP packets to other hosts in the vlan. We // should still trap the packet to the cpu in this case though. auto rv = bcm_switch_control_set(unit_, bcmSwitchDhcpPktDrop, 1); bcmCheckError(rv, "failed to set DHCP packet dropping"); } void BcmSwitch::setL3MtuFailPackets() { // trap packets for which L3 MTU check fails auto rv = bcm_switch_control_set(unit_, bcmSwitchL3MtuFailToCpu, 1); bcmCheckError(rv, "Failed to set L3 MTU fail to CPU"); } void BcmSwitch::initFieldProcessor() const { // We need to make sure we remove all the FlexCounter attached to acl entries // in all the field process group before we call the bcm_field_init(). // Otherwise we'll lose the mapping b/w BcmIngressFieldProcessorFlexCounter // and BcmAclEntry and won't be able to remove // attached BcmIngressFieldProcessorFlexCounter even the BcmAclEntry is // removed. if (getPlatform()->getAsic()->isSupported( HwAsic::Feature::INGRESS_FIELD_PROCESSOR_FLEX_COUNTER)) { for (auto grpId : {platform_->getAsic()->getDefaultACLGroupID()}) { BcmIngressFieldProcessorFlexCounter::removeAllCountersInFpGroup( unit_, grpId); } } auto rv = bcm_field_init(unit_); bcmCheckError(rv, "failed to set up ContentAware processing"); } void BcmSwitch::initMirrorModule() const { if (platform_->getAsic()->isSupported(HwAsic::Feature::HSDK)) { // Do not need the calls below on HSDK: T74698149. bcm_mirror_init is // optional but harmless to call to make sure we are cleaning any state auto rv = bcm_mirror_init(unit_); bcmCheckError(rv, "failed to set up Mirroring"); rv = bcm_switch_control_set( unit_, bcmSwitchSampleIngressRandomSeed, kHSDKSflowSamplingSeed); bcmCheckError(rv, "failed to set ingress mirror seed"); return; } auto rv = bcm_switch_control_set(unit_, bcmSwitchDirectedMirroring, 1); bcmCheckError(rv, "Failed to set Switch Directed Mirroring"); rv = bcm_mirror_init(unit_); bcmCheckError(rv, "failed to set up Mirroring"); rv = bcm_mirror_mode_set(unit_, BCM_MIRROR_L2); bcmCheckError(rv, "Failed to set mirror mode"); } void BcmSwitch::initMplsModule() const { auto rv = bcm_mpls_init(unit_); bcmCheckError(rv, "failed to set up label switching"); } void BcmSwitch::setupFPGroups() { for (auto grpId : {platform_->getAsic()->getDefaultACLGroupID()}) { auto gid = static_cast<bcm_field_group_t>(grpId); XLOG(DBG1) << "Setting up FP group : " << gid; if (gid == platform_->getAsic()->getDefaultACLGroupID()) { createAclGroup(); } else { throw FbossError("Unknown group id : ", gid); } } } bool BcmSwitch::haveMissingOrQSetChangedFPGroups() const { std::map<std::pair<int32_t, int32_t>, bcm_field_qset_t> grp2Qset = { {{platform_->getAsic()->getDefaultACLGroupID(), FLAGS_acl_g_pri}, getAclQset(getPlatform()->getAsic()->getAsicType())}, }; for (const auto& grpAndQset : grp2Qset) { auto gid = static_cast<bcm_field_group_t>(grpAndQset.first.first); auto qset = grpAndQset.second; if (!fpGroupExists(unit_, gid)) { XLOG(DBG1) << " FP group : " << gid << " does not exist"; return true; } else if (!FPGroupDesiredQsetCmp(this, gid, qset).hasDesiredQset()) { XLOG(INFO) << " FP group : " << gid << " has a changed QSET"; return true; } } XLOG(DBG1) << " All FP groups have desired QSETs "; return false; } bcm_gport_t BcmSwitch::getCpuGPort() const { return BCM_GPORT_LOCAL_CPU; } // TODO(petr): this is here only because bcmRxReasonSampleSource and the like // are not yet open sourced. bool BcmSwitch::handleSflowPacket(BcmPacketT& bcmPacket) noexcept { bool ingressSample = false, egressSample = false, sampleOnly; uint32 src_port, dest_port, vlan, pkt_len; uint8* pkt_data; if (!bcmPacket.usePktIO) { bcm_pkt_t* pkt = bcmPacket.ptrUnion.pkt; ingressSample = _SHR_RX_REASON_GET(pkt->rx_reasons, bcmRxReasonSampleSource); egressSample = _SHR_RX_REASON_GET(pkt->rx_reasons, bcmRxReasonSampleDest); src_port = pkt->src_port; dest_port = pkt->dest_port; vlan = pkt->vlan; pkt_data = pkt->pkt_data->data; pkt_len = pkt->pkt_data->len; sampleOnly = ((pkt->rx_reason ^ bcmRxReasonSampleSource ^ bcmRxReasonSampleDest) == 0); } else { #ifdef INCLUDE_PKTIO bcm_pktio_pkt_t* pkt = bcmPacket.ptrUnion.pktioPkt; bcm_pktio_reasons_t reasons; uint32 val; auto rv = bcm_pktio_pmd_reasons_get(pkt->unit, pkt, &reasons); bcmCheckError(rv, "failed get PKTIO RX reason"); ingressSample = BCM_PKTIO_REASON_GET( reasons.rx_reasons, BCMPKT_RX_REASON_CPU_SFLOW_SRC); egressSample = BCM_PKTIO_REASON_GET( reasons.rx_reasons, BCMPKT_RX_REASON_CPU_SFLOW_DST); // Getting src_port, dst_port, vlan is done here and also in RxPacket // constructor, which is after slow handling. TBD -- should RxPacket be // constructed before sflow handling to avoid duplication? Why is SFLOW // handling not part of general RX handling but singled out as a // pre-processing step before RxPacket is constructed and processed? rv = bcm_pktio_pmd_field_get( unit_, pkt, bcmPktioPmdTypeHigig2, BCM_PKTIO_HG2_SRC_PORT, &val); bcmCheckError(rv, "failed to get pktio SRC_PORT_NUM"); src_port = val; /* The query of (bcmPktioPmdTypeHigig2, BCM_PKTIO_HG2_DST_PORT_MGIDL) * depends on MCST. * When MCST=0, it is destination port ID. * When MCST=1, it is LSBs of MGID. * MCST can be obtained from a query of * (bcmPktioPmdTypeHigig2, BCM_PKTIO_HG2_MCST) */ rv = bcm_pktio_pmd_field_get( unit_, pkt, bcmPktioPmdTypeHigig2, BCM_PKTIO_HG2_DST_PORT_MGIDL, &val); bcmCheckError(rv, "failed to get pktio BCM_PKTIO_HG2_DST_PORT_MGIDL"); dest_port = val; rv = bcm_pktio_pmd_field_get( unit_, pkt, bcmPktioPmdTypeRx, BCMPKT_RXPMD_OUTER_VID, &val); bcmCheckError(rv, "failed to get pktio OUTER_VID"); vlan = val; rv = bcm_pktio_pkt_data_get(unit_, pkt, (void**)&pkt_data, &pkt_len); bcmCheckError(rv, "failed to get pktio packet data"); BCM_PKTIO_REASON_CLEAR(reasons.rx_reasons, BCMPKT_RX_REASON_CPU_SFLOW_SRC); BCM_PKTIO_REASON_CLEAR(reasons.rx_reasons, BCMPKT_RX_REASON_CPU_SFLOW_DST); sampleOnly = BCM_PKTIO_REASON_IS_NULL(reasons.rx_reasons); #endif } if (!ingressSample && !egressSample) { return false; } // Assemble packet metadata SflowPacketInfo info; auto currentTime = std::chrono::high_resolution_clock::now(); auto duration = duration_cast<std::chrono::nanoseconds>(currentTime.time_since_epoch()); *info.timestamp()->seconds() = duration_cast<std::chrono::seconds>(duration).count(); *info.timestamp()->nanoseconds() = duration_cast<std::chrono::nanoseconds>( duration % std::chrono::seconds(1)) .count(); *info.ingressSampled() = ingressSample; *info.egressSampled() = egressSample; info.srcPort() = src_port; info.dstPort() = dest_port; info.vlan() = vlan; auto snapLen = std::min(kMaxSflowSnapLen, (unsigned int)(pkt_len)); XLOG(DBG6) << "sFlow captured packet of size " << pkt_len; XLOG(DBG6) << "Packet dump following (max 128 bytes):\n " << folly::hexDump(pkt_data, snapLen); { std::string packetData(pkt_data, pkt_data + snapLen); *info.packetData() = std::move(packetData); } // Print it for debugging XLOG(DBG6) << "sFlowSample: (" << *info.timestamp()->seconds() << ',' << *info.timestamp()->nanoseconds() << ',' << *info.ingressSampled() << ',' << *info.egressSampled() << ',' << *info.srcPort() << ',' << *info.dstPort() << ',' << *info.vlan() << ',' << info.packetData()->length() << ")\n"; sFlowExporterTable_->sendToAll(info); // If it is only here because of sFlow, we're done if (sampleOnly) { return true; } return false; } std::string BcmSwitch::gatherSdkState() const { if (!platform_->isBcmShellSupported()) { XLOG(INFO) << "Cannot dump SDK state since the platform does not support " "bcm shell"; return ""; } BcmLogBuffer logBuffer; printDiagCmd("portstat"); printDiagCmd("l2 show"); printDiagCmd("l3 l3table show"); printDiagCmd("l3 ip6host show"); printDiagCmd("l3 defip show"); printDiagCmd("l3 ip6route show"); printDiagCmd("l3 intf show"); printDiagCmd("l3 egress show"); printDiagCmd("l3 multipath show"); printDiagCmd("trunk show"); return logBuffer.getBuffer()->moveToFbString().toStdString(); } /* * BroadView is currently only supported in certain BCM SDK version. We will * remove this (and move to Bcm) once all 6.3.x BCM SDK is removed. */ std::unique_ptr<PacketTraceInfo> BcmSwitch::getPacketTrace( std::unique_ptr<MockRxPacket> pkt) const { // Check if the port actually exists. if (!getPortTable()->portExists(pkt->getSrcPort())) { XLOG(ERR) << "Cannot getPacketTrace from non-existing port"; return nullptr; } // Construct the arguments for bcm_switch_pkt_trace_info_get() unsigned int options = 0; // Unused now auto port = getPortTable()->getBcmPortId(pkt->getSrcPort()); auto packetLength = pkt->getLength(); auto packetData = pkt->buf()->writableData(); bcm_switch_pkt_trace_info_t bcmPacketTraceInfo; auto rv = bcm_switch_pkt_trace_info_get( unit_, options, port, packetLength, packetData, &bcmPacketTraceInfo); bcmCheckError(rv, "Failed to get packet trace info (no support on Trident2)"); // Parse bcmPacketTraceInfo std::unique_ptr<PacketTraceInfo> packetTraceInfo = std::make_unique<PacketTraceInfo>(); transformPacketTraceInfo( this, bcmPacketTraceInfo, /* bcm_switch_pkt_trace_info_t */ *packetTraceInfo /* fboss::PacketTraceInfo */ ); return packetTraceInfo; } static int _addL2Entry(int /*unit*/, bcm_l2_addr_t* l2addr, void* user_data) { L2EntryThrift entry; auto* cookie = static_cast<std::pair<BcmSwitch*, std::vector<L2EntryThrift>*>*>( user_data); auto [hw, l2Table] = *cookie; *entry.mac() = folly::sformat( "{0:02x}:{1:02x}:{2:02x}:{3:02x}:{4:02x}:{5:02x}", l2addr->mac[0], l2addr->mac[1], l2addr->mac[2], l2addr->mac[3], l2addr->mac[4], l2addr->mac[5]); *entry.vlanID() = l2addr->vid; *entry.port() = 0; if (l2addr->flags & BCM_L2_TRUNK_MEMBER) { entry.trunk() = hw->getTrunkTable()->getAggregatePortId(l2addr->tgid); XLOG(DBG6) << "L2 entry: Mac:" << *entry.mac() << " Vid:" << *entry.vlanID() << " Trunk: " << entry.trunk().value_or({}); } else { *entry.port() = l2addr->port; XLOG(DBG6) << "L2 entry: Mac:" << *entry.mac() << " Vid:" << *entry.vlanID() << " Port: " << *entry.port(); } *entry.l2EntryType() = (l2addr->flags & BCM_L2_PENDING) ? L2EntryType::L2_ENTRY_TYPE_PENDING : L2EntryType::L2_ENTRY_TYPE_VALIDATED; // If classID is not set, this field is set to 0 by default. // Set in L2EntryThrift only if set to non-default if (l2addr->group != 0) { entry.classID() = l2addr->group; } l2Table->push_back(entry); return 0; } void BcmSwitch::fetchL2Table(std::vector<L2EntryThrift>* l2Table) const { auto cookie = std::make_pair(this, l2Table); int rv = bcm_l2_traverse(unit_, _addL2Entry, &cookie); bcmCheckError(rv, "bcm_l2_traverse failed"); } void BcmSwitch::processChangedAcl( const std::shared_ptr<AclEntry>& oldAcl, const std::shared_ptr<AclEntry>& newAcl) { // Unfortunately, we cannot modify a field entry due to BCM limitation XLOG(DBG3) << "processChangedAcl, ACL=" << oldAcl->getID(); processRemovedAcl(oldAcl); processAddedAcl(newAcl); } void BcmSwitch::processRemovedAcl(const std::shared_ptr<AclEntry>& acl) { XLOG(DBG3) << "processRemovedAcl, ACL=" << acl->getID(); aclTable_->processRemovedAcl(acl); } void BcmSwitch::processAddedAcl(const std::shared_ptr<AclEntry>& acl) { XLOG(DBG3) << "processAddedAcl, ACL=" << acl->getID(); aclTable_->processAddedAcl(platform_->getAsic()->getDefaultACLGroupID(), acl); } bool BcmSwitch::hasValidAclMatcher(const std::shared_ptr<AclEntry>& acl) const { /* * Broadcom SDK provides single API to configure lookupClassNeighbor and * lookupClassRoute. Thus, if both are specified for an ACL, they must have * same value. */ if ((acl->getLookupClassNeighbor() && acl->getLookupClassRoute()) && acl->getLookupClassNeighbor().value() != acl->getLookupClassRoute().value()) { return false; } return true; } void BcmSwitch::forceLinkscanOn(bcm_pbmp_t ports) { if (!(flags_ & LINKSCAN_REGISTERED)) { XLOG(INFO) << "Linkscan not registered, skipping force scan"; return; } // will immediately scan ports in the passed in port bitmap. Useful // for guaranteeing link status is updated during initialization. auto rv = bcm_linkscan_update(unit_, ports); bcmCheckError(rv, "failed initial scan of link status"); } bool BcmSwitch::isValidLabelForwardingEntry(const Route<LabelID>* entry) const { if (!isValidLabeledNextHopSet( platform_, entry->getForwardInfo().getNextHopSet())) { return false; } for (auto client : AllClientIDs()) { const auto* entryForClient = entry->getEntryForClient(client); if (!entryForClient) { continue; } if (!isValidLabeledNextHopSet(platform_, entryForClient->getNextHopSet())) { return false; } } return true; } void BcmSwitch::processControlPlaneChanges(const StateDelta& delta) { const auto controlPlaneDelta = delta.getControlPlaneDelta(); const auto& oldCPU = controlPlaneDelta.getOld(); const auto& newCPU = controlPlaneDelta.getNew(); processChangedControlPlaneQueues(oldCPU, newCPU); if (oldCPU->getQosPolicy() != newCPU->getQosPolicy()) { controlPlane_->setupIngressQosPolicy(newCPU->getQosPolicy()); } processChangedRxReasonToQueueEntries(oldCPU, newCPU); // COPP ACL will be handled just as regular ACL in processAclChanges() } void BcmSwitch::disableHotSwap() const { if (getPlatform()->getAsic()->isSupported(HwAsic::Feature::HOT_SWAP)) { switch (getPlatform()->getAsic()->getAsicType()) { case HwAsic::AsicType::ASIC_TYPE_FAKE: case HwAsic::AsicType::ASIC_TYPE_TRIDENT2: case HwAsic::AsicType::ASIC_TYPE_TOMAHAWK: // For TD2 and TH, we patch the SDK to disable hot swap, break; case HwAsic::AsicType::ASIC_TYPE_TOMAHAWK3: case HwAsic::AsicType::ASIC_TYPE_TOMAHAWK4: { auto rv = bcm_switch_control_set(unit_, bcmSwitchPcieHotSwapDisable, 1); bcmCheckError(rv, "Failed to disable hotswap"); } break; case HwAsic::AsicType::ASIC_TYPE_EBRO: case HwAsic::AsicType::ASIC_TYPE_GARONNE: case HwAsic::AsicType::ASIC_TYPE_MOCK: case HwAsic::AsicType::ASIC_TYPE_ELBERT_8DD: CHECK(0) << " Invalid ASIC type"; } } } bool BcmSwitch::isL2EntryPending(const bcm_l2_addr_t* l2Addr) { return l2Addr->flags & BCM_L2_PENDING; } bool BcmSwitch::isValidPortQueueUpdate( const std::vector<std::shared_ptr<PortQueue>>& /*oldPortQueueConfig*/, const std::vector<std::shared_ptr<PortQueue>>& newPortQueueConfig) const { if (newPortQueueConfig.empty()) { return true; } std::map<TrafficClass, int> tcToQueue; // validate that a traffic class has only one associated queue for (const auto& portQueue : newPortQueueConfig) { auto trafficClass = portQueue->getTrafficClass() ? portQueue->getTrafficClass().value() : static_cast<TrafficClass>(portQueue->getID()); if (trafficClass > BCM_PRIO_MAX || trafficClass < BCM_PRIO_MIN) { return false; } if (tcToQueue.find(trafficClass) != tcToQueue.end()) { // this traffic class has already been mapped to some queue return false; } tcToQueue.emplace(trafficClass, portQueue->getID()); // validate pfc priority if it exists on the queue if (const auto pfcPrioritySet = portQueue->getPfcPrioritySet()) { // anything more than 7 is illegal (3 bits) for (const auto& pfcPri : *pfcPrioritySet) { if (pfcPri > cfg::switch_config_constants::PFC_PRIORITY_VALUE_MAX()) { return false; } } } } return true; } bool BcmSwitch::isValidPortQosPolicyUpdate( const std::shared_ptr<Port>& /*oldPort*/, const std::shared_ptr<Port>& newPort, const std::shared_ptr<SwitchState>& newState) const { auto portQosPolicyName = newPort->getQosPolicy(); if (!portQosPolicyName.has_value() || portQosPolicyName.value() == newState->getDefaultDataPlaneQosPolicy()->getName()) { // either no policy or global default poliicy is provided return true; } auto qosPolicy = newState->getQosPolicy(portQosPolicyName.value()); // qos policy for port must not have exp, since this is not supported return qosPolicy->getExpMap().to().empty() && qosPolicy->getExpMap().from().empty(); } std::string BcmSwitch::listObjects( const std::vector<HwObjectType>& /*types*/, bool /*cached*/) const { throw FbossError("Listing hw objects not supported. Use Bcm shell"); } bool BcmSwitch::useHSDK() const { return BcmAPI::isHwUsingHSDK(); } bool BcmSwitch::usePKTIO() const { return unitObject_->usePKTIO(); } uint32_t BcmSwitch::generateDeterministicSeed( LoadBalancerID loadBalancerID, folly::MacAddress platformMac) const { // To avoid changing the seed across graceful restarts, the seed is generated // deterministically using the local MAC address. auto mac64 = platformMac.u64HBO(); uint32_t mac32 = static_cast<uint32_t>(mac64 & 0xFFFFFFFF); uint32_t seed = 0; switch (loadBalancerID) { case LoadBalancerID::ECMP: seed = folly::hash::jenkins_rev_mix32(mac32); break; case LoadBalancerID::AGGREGATE_PORT: seed = folly::hash::twang_32from64(mac64); break; } return seed; } } // namespace facebook::fboss

fboss/agent/hw/bcm/BcmSwitch.cpp (2,684 lines of code) (raw):