in syncd/Syncd.cpp [5462:5714]
void Syncd::run()
{
SWSS_LOG_ENTER();
WarmRestartTable warmRestartTable("STATE_DB"); // TODO from config
syncd_restart_type_t shutdownType = SYNCD_RESTART_TYPE_COLD;
volatile bool runMainLoop = true;
std::shared_ptr<swss::Select> s = std::make_shared<swss::Select>();
try
{
onSyncdStart(m_commandLineOptions->m_startType == SAI_START_TYPE_WARM_BOOT);
// create notifications processing thread after we create_switch to
// make sure, we have switch_id translated to VID before we start
// processing possible quick fdb notifications, and pointer for
// notification queue is created before we create switch
m_processor->startNotificationsProcessingThread();
for (auto& sw: m_switches)
{
m_mdioIpcServer->setSwitchId(sw.second->getRid());
}
m_mdioIpcServer->startMdioThread();
SWSS_LOG_NOTICE("syncd listening for events");
s->addSelectable(m_selectableChannel.get());
s->addSelectable(m_restartQuery.get());
s->addSelectable(m_flexCounter.get());
s->addSelectable(m_flexCounterGroup.get());
SWSS_LOG_NOTICE("starting main loop");
}
catch(const std::exception &e)
{
SWSS_LOG_ERROR("Runtime error during syncd init: %s", e.what());
sendShutdownRequestAfterException();
s = std::make_shared<swss::Select>();
s->addSelectable(m_restartQuery.get());
SWSS_LOG_NOTICE("starting main loop, ONLY restart query");
if (m_commandLineOptions->m_disableExitSleep)
runMainLoop = false;
}
m_timerWatchdog.setCallback(timerWatchdogCallback);
while (runMainLoop)
{
try
{
swss::Selectable *sel = NULL;
int result = s->select(&sel);
if (sel == m_restartQuery.get())
{
/*
* This is actual a bad design, since selectable may pick up
* multiple events from the queue, and after restart those
* events will be forgotten since they were consumed already and
* this may lead to forget populate object table which will
* lead to unable to find some objects.
*/
SWSS_LOG_NOTICE("is asic queue empty: %d", m_selectableChannel->empty());
while (!m_selectableChannel->empty())
{
processEvent(*m_selectableChannel.get());
}
SWSS_LOG_NOTICE("drained queue");
WatchdogScope ws(m_timerWatchdog, "restart query");
shutdownType = handleRestartQuery(*m_restartQuery);
if (shutdownType != SYNCD_RESTART_TYPE_PRE_SHUTDOWN && shutdownType != SYNCD_RESTART_TYPE_PRE_EXPRESS_SHUTDOWN)
{
// break out the event handling loop to shutdown syncd
runMainLoop = false;
break;
}
// Handle switch pre-shutdown and wait for the final shutdown
// event
SWSS_LOG_TIMER("%s pre-shutdown", (shutdownType == SYNCD_RESTART_TYPE_PRE_SHUTDOWN) ? "warm" : "express");
m_manager->removeAllCounters();
sai_status_t status = setRestartWarmOnAllSwitches(true);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_RESTART_WARM=true: %s for pre-shutdown",
sai_serialize_status(status).c_str());
shutdownType = SYNCD_RESTART_TYPE_COLD;
warmRestartTable.setFlagFailed();
continue;
}
if (shutdownType == SYNCD_RESTART_TYPE_PRE_EXPRESS_SHUTDOWN)
{
SWSS_LOG_NOTICE("express boot, enable fast API pre-shutdown");
status = setFastAPIEnableOnAllSwitches();
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_FAST_API_ENABLE=true: %s for express pre-shutdown. Fall back to cold restart",
sai_serialize_status(status).c_str());
shutdownType = SYNCD_RESTART_TYPE_COLD;
warmRestartTable.setFlagFailed();
continue;
}
}
status = setPreShutdownOnAllSwitches();
if (status == SAI_STATUS_SUCCESS)
{
warmRestartTable.setPreShutdown(true);
s = std::make_shared<swss::Select>(); // make sure previous select is destroyed
s->addSelectable(m_restartQuery.get());
SWSS_LOG_NOTICE("switched to PRE_SHUTDOWN, from now on accepting only shutdown requests");
}
else
{
SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_PRE_SHUTDOWN=true: %s",
sai_serialize_status(status).c_str());
warmRestartTable.setPreShutdown(false);
// Restore cold shutdown.
setRestartWarmOnAllSwitches(false);
}
}
else if (sel == m_flexCounter.get())
{
processFlexCounterEvent(*(swss::ConsumerTable*)sel);
}
else if (sel == m_flexCounterGroup.get())
{
processFlexCounterGroupEvent(*(swss::ConsumerTable*)sel);
}
else if (sel == m_selectableChannel.get())
{
processEvent(*m_selectableChannel.get());
}
else
{
SWSS_LOG_ERROR("select failed: %d", result);
}
}
catch(const std::exception &e)
{
SWSS_LOG_ERROR("Runtime error: %s", e.what());
sendShutdownRequestAfterException();
s = std::make_shared<swss::Select>();
s->addSelectable(m_restartQuery.get());
if (m_commandLineOptions->m_disableExitSleep)
runMainLoop = false;
// make sure that if second exception will arise, then we break the loop
m_commandLineOptions->m_disableExitSleep = true;
}
}
WatchdogScope ws(m_timerWatchdog, "shutting down syncd");
if (shutdownType == SYNCD_RESTART_TYPE_WARM)
{
const char *warmBootWriteFile = profileGetValue(0, SAI_KEY_WARM_BOOT_WRITE_FILE);
SWSS_LOG_NOTICE("using warmBootWriteFile: '%s'", warmBootWriteFile);
if (warmBootWriteFile == NULL)
{
SWSS_LOG_WARN("user requested warm shutdown but warmBootWriteFile is not specified, forcing cold shutdown");
shutdownType = SYNCD_RESTART_TYPE_COLD;
warmRestartTable.setWarmShutdown(false);
}
else
{
SWSS_LOG_NOTICE("Warm Reboot requested, keeping data plane running");
sai_status_t status = setRestartWarmOnAllSwitches(true);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_RESTART_WARM=true: %s, fall back to cold restart",
sai_serialize_status(status).c_str());
shutdownType = SYNCD_RESTART_TYPE_COLD;
warmRestartTable.setFlagFailed();
}
}
}
if (shutdownType == SYNCD_RESTART_TYPE_FAST || shutdownType == SYNCD_RESTART_TYPE_WARM || shutdownType == SYNCD_RESTART_TYPE_EXPRESS)
{
setUninitDataPlaneOnRemovalOnAllSwitches();
}
m_manager->removeAllCounters();
m_mdioIpcServer->stopMdioThread();
sai_status_t status = removeAllSwitches();
// Stop notification thread after removing switch
m_processor->stopNotificationsProcessingThread();
if (shutdownType == SYNCD_RESTART_TYPE_WARM || shutdownType == SYNCD_RESTART_TYPE_EXPRESS)
{
warmRestartTable.setWarmShutdown(status == SAI_STATUS_SUCCESS);
}
SWSS_LOG_NOTICE("calling api uninitialize");
status = m_vendorSai->apiUninitialize();
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("failed to uninitialize api: %s", sai_serialize_status(status).c_str());
}
SWSS_LOG_NOTICE("uninitialize finished");
}