void StateUpdateTask::CheckFailoverUnit()

in src/prod/src/Reliability/Failover/fm/StateUpdateTask.cpp [18:262]


void StateUpdateTask::CheckFailoverUnit(
    LockedFailoverUnitPtr & lockedFailoverUnit,
    vector<StateMachineActionUPtr> &)
{
    StopwatchTime now = Stopwatch::Now();
    FailoverConfig const& config = FailoverConfig::GetConfig();
    FailoverUnit & failoverUnit = *lockedFailoverUnit;
    ApplicationInfoSPtr applicationInfo = failoverUnit.ServiceInfoObj->ServiceType->Application;

    if (lockedFailoverUnit->IsPersistencePending &&
        now.ToDateTime() - lockedFailoverUnit->LastUpdated > config.LazyPersistWaitDuration)
    {
        lockedFailoverUnit.EnableUpdate();
        lockedFailoverUnit->PersistenceState = PersistenceState::ToBeUpdated;
    }

    for (auto it = failoverUnit.BeginIterator; it != failoverUnit.EndIterator; ++it)
    {
        Replica & replica = *it;

        if (replica.FederationNodeInstance != replica.NodeInfoObj->NodeInstance)
        {
            if (replica.FederationNodeInstance < replica.NodeInfoObj->NodeInstance)
            {
                if (replica.NodeInfoObj->IsReplicaUploaded && !replica.IsDeleted)
                {
                    lockedFailoverUnit.EnableUpdate();
                    failoverUnit.OnReplicaDropped(replica);
                    replica.IsDeleted = true;
                }
                else if (replica.IsUp)
                {
                    lockedFailoverUnit.EnableUpdate();
                    failoverUnit.OnReplicaDown(replica, !failoverUnit.HasPersistedState);
                }
            }
        }
        else if (!replica.IsNodeUp && replica.IsUp)
        {
            lockedFailoverUnit.EnableUpdate();
            failoverUnit.OnReplicaDown(replica, !failoverUnit.HasPersistedState);
        }
        else if (replica.IsUp && replica.IsStandBy &&
            !replica.IsInConfiguration &&
            !replica.IsToBeDropped && !failoverUnit.IsToBeDeleted &&
            now.ToDateTime() - replica.LastUpTime > replica.FailoverUnitObj.ServiceInfoObj->ServiceDescription.StandByReplicaKeepDuration)
        {
            lockedFailoverUnit.EnableUpdate();
            replica.IsToBeDroppedByFM = true;
        }
        else if (replica.IsInBuild &&
            !replica.IsInConfiguration &&
            failoverUnit.ServiceInfoObj->ServiceType->IsServiceTypeDisabled(replica.FederationNodeId))
        {
            lockedFailoverUnit.EnableUpdate();
            replica.IsToBeDroppedByFM = true;
        }
        else if (replica.IsMoveInProgress &&
            failoverUnit.InBuildReplicaCount == 0u &&
            failoverUnit.AvailableReplicaCount <= failoverUnit.TargetReplicaSetSize)
        {
            lockedFailoverUnit.EnableUpdate();
            replica.IsMoveInProgress = false;
        }
        else if (replica.IsOffline && !replica.IsNodeUp && !replica.IsInConfiguration &&
            !replica.IsPendingRemove && !failoverUnit.IsToBeDeleted &&
            replica.GetUpdateTime() + config.OfflineReplicaKeepDuration < now)
        {
            lockedFailoverUnit.EnableUpdate();
            replica.PersistenceState = PersistenceState::ToBeDeleted;
        }

        if (replica.IsDropped &&
            !replica.IsInConfiguration &&
            !replica.IsPendingRemove &&
            !replica.IsPreferredPrimaryLocation &&
            !replica.IsPreferredReplicaLocation &&
            !replica.NodeInfoObj->IsPendingUpgradeOrDeactivateNode())
        {
            TimeSpan keepDuration = replica.IsDeleted ? config.DeletedReplicaKeepDuration : config.DroppedReplicaKeepDuration;

            if (replica.GetUpdateTime() + keepDuration < now)
            {
                lockedFailoverUnit.EnableUpdate();
                replica.PersistenceState = PersistenceState::ToBeDeleted;
            }
        }

        if (replica.IsToBeDropped &&
            replica.IsCurrentConfigurationPrimary &&
            !failoverUnit.IsToBeDeleted &&
            !failoverUnit.ToBePromotedReplicaExists)
        {
             // If the Primary is marked as ToBeDroppedByPLB and there is no replica that is marked as ToBePromoted,
             // we clear the ToBeDropped flags on the primary. This can happen if the ToBePromoted secondary
             // failed during a SwapPrimary or MovePrimary movement.
             lockedFailoverUnit.EnableUpdate();
             replica.IsToBeDroppedByFM = false;
             replica.IsToBeDroppedByPLB = false;
        }

        ServiceModel::ServicePackageVersionInstance versionInstance = failoverUnit.ServiceInfoObj->ServiceDescription.PackageVersionInstance;
        if (applicationInfo->GetUpgradeVersionForServiceType(failoverUnit.ServiceInfoObj->ServiceType->Type, versionInstance))
        {
            if (applicationInfo->Upgrade->IsUpgradeCompletedOnNode(*replica.NodeInfoObj))
            {
                if (replica.VersionInstance < versionInstance && replica.IsUp && !replica.IsCreating)
                {
                    lockedFailoverUnit.EnableUpdate();
                    replica.VersionInstance = versionInstance;
                }
            }
        }

        TryClearUpgradeFlags(applicationInfo, lockedFailoverUnit, replica);

        if (replica.IsUp &&
            replica.IsCurrentConfigurationPrimary &&
            !replica.IsPrimaryToBeSwappedOut &&
            !failoverUnit.ToBePromotedReplicaExists &&
            (replica.IsPreferredPrimaryLocation || !failoverUnit.PreferredPrimaryLocationExists) &&
            IsSwapPrimaryNeeded(applicationInfo, *lockedFailoverUnit, replica))
        {
            if (failoverUnit.TargetReplicaSetSize == 1)
            {
                if (FailoverConfig::GetConfig().IsSingletonReplicaMoveAllowedDuringUpgrade)
                {
                    lockedFailoverUnit.EnableUpdate();
                    replica.IsPrimaryToBeSwappedOut = true;
                }
            }
            else
            {
                lockedFailoverUnit.EnableUpdate();
                replica.IsPrimaryToBeSwappedOut = true;
            }

            if (FailoverConfig::GetConfig().RestoreReplicaLocationAfterUpgrade &&
                !replica.NodeInfoObj->DeactivationInfo.IsDeactivated)
            {
                lockedFailoverUnit.EnableUpdate();
                replica.IsPreferredPrimaryLocation = true;
            }
        }

        if (replica.IsNodeUp &&
            !replica.IsOffline &&
            (replica.IsPreferredPrimaryLocation && !replica.IsPrimaryToBePlaced) ||
            (replica.IsPreferredReplicaLocation && !replica.IsReplicaToBePlaced))
        {
            bool isOkToPlacePrimary = true;

            if (fm_.FabricUpgradeManager.Upgrade &&
                fm_.FabricUpgradeManager.Upgrade->IsDomainStarted(replica.NodeInfoObj->ActualUpgradeDomainId) &&
                replica.NodeInfoObj->VersionInstance.Version != fm_.FabricUpgradeManager.Upgrade->Description.Version)
            {
                // Fabric upgrade is going on and the node has not been upgraded
                isOkToPlacePrimary = false;
            }
            
            if (applicationInfo->Upgrade &&
                applicationInfo->Upgrade->IsDomainStarted(replica.NodeInfoObj->ActualUpgradeDomainId) &&
                (!applicationInfo->Upgrade->IsUpgradeCompletedOnNode(*replica.NodeInfoObj) ||
                (!replica.IsDropped && replica.VersionInstance.Version != versionInstance.Version)))
            {
                // Application upgrade is doing on and the replica has not been upgraded
                isOkToPlacePrimary = false;
            }

            if (isOkToPlacePrimary)
            {
                TryPlacePrimary(lockedFailoverUnit, replica);
            }
        }

        if (replica.NodeInfoObj->DeactivationInfo.IsRemove)
        {
            if (replica.IsUp)
            {
                if (replica.IsCurrentConfigurationPrimary &&
                    failoverUnit.InBuildReplicaCount == 0 &&
                    failoverUnit.TargetReplicaSetSize > 1 &&
                    !failoverUnit.IsToBeDeleted)
                {
                    if (!failoverUnit.ToBePromotedReplicaExists)
                    {
                        lockedFailoverUnit.EnableUpdate();
                        replica.IsPrimaryToBeSwappedOut = true;
                    }
                }
                else
                {
                    // if the RemoveNodeOrDataCloseStatelessInstanceAfterSafetyCheckComplete is set we dont want to mark these replicas as to be dropped
                    // in order to avoid dropping them while safety checks are ongoing
                    // RA will close them once the deactivation message is sent by FM
                    if (failoverUnit.IsStateful || !FailoverConfig::GetConfig().RemoveNodeOrDataCloseStatelessInstanceAfterSafetyCheckComplete)
                    {
                        lockedFailoverUnit.EnableUpdate();
                        replica.IsToBeDroppedForNodeDeactivation = true;
                    }
                }
            }
        }
        else if (replica.IsToBeDroppedForNodeDeactivation)
        {
            lockedFailoverUnit.EnableUpdate();
            replica.IsToBeDroppedForNodeDeactivation = false;
        }

        if (failoverUnit.ServiceInfoObj->IsServiceUpdateNeeded &&
            failoverUnit.ServiceInfoObj->UpdatedNodes.find(replica.FederationNodeId) != failoverUnit.ServiceInfoObj->UpdatedNodes.end())
        {
            lockedFailoverUnit.EnableUpdate();
            replica.ServiceUpdateVersion = failoverUnit.ServiceInfoObj->ServiceDescription.UpdateVersion;
        }
    }

    if (failoverUnit.IsSwappingPrimary && !failoverUnit.PreviousConfiguration.Primary->IsAvailable)
    {
        lockedFailoverUnit.EnableUpdate();
        failoverUnit.IsSwappingPrimary = false;
    }

    if (failoverUnit.ServiceInfoObj->IsToBeDeleted &&
        !failoverUnit.IsToBeDeleted)
    {
        lockedFailoverUnit.EnableUpdate();
        failoverUnit.SetToBeDeleted();
    }
    else if (failoverUnit.ServiceInfoObj->RepartitionInfo &&
        failoverUnit.ServiceInfoObj->RepartitionInfo->RepartitionType == RepartitionType::Remove &&
        failoverUnit.ServiceInfoObj->RepartitionInfo->IsRemoved(failoverUnit.Id) &&
        !failoverUnit.IsToBeDeleted)
    {
        lockedFailoverUnit.EnableUpdate();
        failoverUnit.SetToBeDeleted();
    }

    bool isUpgrading = (applicationInfo->Upgrade != nullptr);
    if (failoverUnit.IsUpgrading != isUpgrading)
    {
        lockedFailoverUnit.EnableUpdate();
        failoverUnit.IsUpgrading = isUpgrading;
    }
}