void Syncd::run()

in syncd/Syncd.cpp [5462:5714]


void Syncd::run()
{
    SWSS_LOG_ENTER();

    WarmRestartTable warmRestartTable("STATE_DB"); // TODO from config

    syncd_restart_type_t shutdownType = SYNCD_RESTART_TYPE_COLD;

    volatile bool runMainLoop = true;

    std::shared_ptr<swss::Select> s = std::make_shared<swss::Select>();

    try
    {
        onSyncdStart(m_commandLineOptions->m_startType == SAI_START_TYPE_WARM_BOOT);

        // create notifications processing thread after we create_switch to
        // make sure, we have switch_id translated to VID before we start
        // processing possible quick fdb notifications, and pointer for
        // notification queue is created before we create switch
        m_processor->startNotificationsProcessingThread();

        for (auto& sw: m_switches)
        {
            m_mdioIpcServer->setSwitchId(sw.second->getRid());
        }

        m_mdioIpcServer->startMdioThread();

        SWSS_LOG_NOTICE("syncd listening for events");

        s->addSelectable(m_selectableChannel.get());
        s->addSelectable(m_restartQuery.get());
        s->addSelectable(m_flexCounter.get());
        s->addSelectable(m_flexCounterGroup.get());

        SWSS_LOG_NOTICE("starting main loop");
    }
    catch(const std::exception &e)
    {
        SWSS_LOG_ERROR("Runtime error during syncd init: %s", e.what());

        sendShutdownRequestAfterException();

        s = std::make_shared<swss::Select>();

        s->addSelectable(m_restartQuery.get());

        SWSS_LOG_NOTICE("starting main loop, ONLY restart query");

        if (m_commandLineOptions->m_disableExitSleep)
            runMainLoop = false;
    }

    m_timerWatchdog.setCallback(timerWatchdogCallback);

    while (runMainLoop)
    {
        try
        {
            swss::Selectable *sel = NULL;

            int result = s->select(&sel);

            if (sel == m_restartQuery.get())
            {
                /*
                 * This is actual a bad design, since selectable may pick up
                 * multiple events from the queue, and after restart those
                 * events will be forgotten since they were consumed already and
                 * this may lead to forget populate object table which will
                 * lead to unable to find some objects.
                 */

                SWSS_LOG_NOTICE("is asic queue empty: %d", m_selectableChannel->empty());

                while (!m_selectableChannel->empty())
                {
                    processEvent(*m_selectableChannel.get());
                }

                SWSS_LOG_NOTICE("drained queue");

                WatchdogScope ws(m_timerWatchdog, "restart query");

                shutdownType = handleRestartQuery(*m_restartQuery);

                if (shutdownType != SYNCD_RESTART_TYPE_PRE_SHUTDOWN && shutdownType != SYNCD_RESTART_TYPE_PRE_EXPRESS_SHUTDOWN)
                {
                    // break out the event handling loop to shutdown syncd
                    runMainLoop = false;
                    break;
                }

                // Handle switch pre-shutdown and wait for the final shutdown
                // event

                SWSS_LOG_TIMER("%s pre-shutdown", (shutdownType == SYNCD_RESTART_TYPE_PRE_SHUTDOWN) ? "warm" : "express");

                m_manager->removeAllCounters();

                sai_status_t status = setRestartWarmOnAllSwitches(true);

                if (status != SAI_STATUS_SUCCESS)
                {
                    SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_RESTART_WARM=true: %s for pre-shutdown",
                            sai_serialize_status(status).c_str());

                    shutdownType = SYNCD_RESTART_TYPE_COLD;

                    warmRestartTable.setFlagFailed();
                    continue;
                }

                if (shutdownType == SYNCD_RESTART_TYPE_PRE_EXPRESS_SHUTDOWN)
                {
                    SWSS_LOG_NOTICE("express boot, enable fast API pre-shutdown");
                    status = setFastAPIEnableOnAllSwitches();

                    if (status != SAI_STATUS_SUCCESS)
                    {
                        SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_FAST_API_ENABLE=true: %s for express pre-shutdown. Fall back to cold restart",
				       sai_serialize_status(status).c_str());

                        shutdownType = SYNCD_RESTART_TYPE_COLD;

                        warmRestartTable.setFlagFailed();
                        continue;
                    }
                }

                status = setPreShutdownOnAllSwitches();

                if (status == SAI_STATUS_SUCCESS)
                {
                    warmRestartTable.setPreShutdown(true);

                    s = std::make_shared<swss::Select>(); // make sure previous select is destroyed

                    s->addSelectable(m_restartQuery.get());

                    SWSS_LOG_NOTICE("switched to PRE_SHUTDOWN, from now on accepting only shutdown requests");
                }
                else
                {
                    SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_PRE_SHUTDOWN=true: %s",
                            sai_serialize_status(status).c_str());

                    warmRestartTable.setPreShutdown(false);

                    // Restore cold shutdown.

                    setRestartWarmOnAllSwitches(false);
                }
            }
            else if (sel == m_flexCounter.get())
            {
                processFlexCounterEvent(*(swss::ConsumerTable*)sel);
            }
            else if (sel == m_flexCounterGroup.get())
            {
                processFlexCounterGroupEvent(*(swss::ConsumerTable*)sel);
            }
            else if (sel == m_selectableChannel.get())
            {
                processEvent(*m_selectableChannel.get());
            }
            else
            {
                SWSS_LOG_ERROR("select failed: %d", result);
            }
        }
        catch(const std::exception &e)
        {
            SWSS_LOG_ERROR("Runtime error: %s", e.what());

            sendShutdownRequestAfterException();

            s = std::make_shared<swss::Select>();

            s->addSelectable(m_restartQuery.get());

            if (m_commandLineOptions->m_disableExitSleep)
                runMainLoop = false;

            // make sure that if second exception will arise, then we break the loop
            m_commandLineOptions->m_disableExitSleep = true;
        }
    }

    WatchdogScope ws(m_timerWatchdog, "shutting down syncd");

    if (shutdownType == SYNCD_RESTART_TYPE_WARM)
    {
        const char *warmBootWriteFile = profileGetValue(0, SAI_KEY_WARM_BOOT_WRITE_FILE);

        SWSS_LOG_NOTICE("using warmBootWriteFile: '%s'", warmBootWriteFile);

        if (warmBootWriteFile == NULL)
        {
            SWSS_LOG_WARN("user requested warm shutdown but warmBootWriteFile is not specified, forcing cold shutdown");

            shutdownType = SYNCD_RESTART_TYPE_COLD;
            warmRestartTable.setWarmShutdown(false);
        }
        else
        {
            SWSS_LOG_NOTICE("Warm Reboot requested, keeping data plane running");

            sai_status_t status = setRestartWarmOnAllSwitches(true);

            if (status != SAI_STATUS_SUCCESS)
            {
                SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_RESTART_WARM=true: %s, fall back to cold restart",
                        sai_serialize_status(status).c_str());

                shutdownType = SYNCD_RESTART_TYPE_COLD;

                warmRestartTable.setFlagFailed();
            }
        }
    }

    if (shutdownType == SYNCD_RESTART_TYPE_FAST || shutdownType == SYNCD_RESTART_TYPE_WARM || shutdownType == SYNCD_RESTART_TYPE_EXPRESS)
    {
        setUninitDataPlaneOnRemovalOnAllSwitches();
    }

    m_manager->removeAllCounters();

    m_mdioIpcServer->stopMdioThread();

    sai_status_t status = removeAllSwitches();

    // Stop notification thread after removing switch
    m_processor->stopNotificationsProcessingThread();

    if (shutdownType == SYNCD_RESTART_TYPE_WARM || shutdownType == SYNCD_RESTART_TYPE_EXPRESS)
    {
        warmRestartTable.setWarmShutdown(status == SAI_STATUS_SUCCESS);
    }

    SWSS_LOG_NOTICE("calling api uninitialize");

    status = m_vendorSai->apiUninitialize();

    if (status != SAI_STATUS_SUCCESS)
    {
        SWSS_LOG_ERROR("failed to uninitialize api: %s", sai_serialize_status(status).c_str());
    }

    SWSS_LOG_NOTICE("uninitialize finished");
}