def _try_update_goal_state()

in azurelinuxagent/ga/update.py [0:0]


    def _try_update_goal_state(self, protocol):
        """
        Attempts to update the goal state and returns True on success or False on failure, sending telemetry events about the failures.
        """
        max_errors_to_log = 3

        try:
            #
            # For Fast Track goal states we need to ensure that the tenant certificate is in the goal state.
            #
            # Some scenarios can produce inconsistent goal states. For example, during hibernation/resume, the Fabric goal state changes (the
            # tenant certificate is re-generated when the VM is restarted) *without* the incarnation necessarily changing (e.g. if the incarnation
            # is 1 before the hibernation; on resume the incarnation is set to 1 even though the goal state has a new certificate). If a Fast
            # Track goal state comes after that, the extensions will need the new certificate.
            #
            # For new Fast Track goal states, we check the certificates and, if an inconsistency is detected,  re-fetch the entire goal state
            # (update_goal_state(force_update=True). We re-fetch 2 times, one without waiting (to address scenarios like hibernation) and one with
            # a delay (to address situations in which the HGAP and the WireServer are temporarily out of sync).
            #
            for attempt in range(3):
                protocol.client.update_goal_state(force_update=attempt > 0, silent=self._update_goal_state_error_count >= max_errors_to_log, save_to_history=True)

                goal_state = protocol.get_goal_state()
                new_goal_state = self._goal_state is None or self._goal_state.extensions_goal_state.id != goal_state.extensions_goal_state.id

                if not new_goal_state or goal_state.extensions_goal_state.source != GoalStateSource.FastTrack:
                    break

                if self._check_certificates(goal_state):
                    if attempt > 0:
                        event.info(WALAEventOperation.FetchGoalState, "The extensions goal state is now in sync with the tenant cert.")
                    break

                if attempt == 0:
                    event.info(WALAEventOperation.FetchGoalState, "The extensions are out of sync with the tenant cert. Will refresh the goal state.")
                elif attempt == 1:
                    event.info(WALAEventOperation.FetchGoalState, "The extensions are still out of sync with the tenant cert. Will refresh the goal state one more time after a short delay.")
                    time.sleep(conf.get_goal_state_period())
                else:
                    event.warn(WALAEventOperation.FetchGoalState, "The extensions are still out of sync with the tenant cert. Will continue execution, but some extensions may fail.")
                    break

            self._goal_state = protocol.get_goal_state()

            if self._update_goal_state_error_count > 0:
                event.info(
                    WALAEventOperation.FetchGoalState,
                    "Fetching the goal state recovered from previous errors. Fetched {0} (certificates: {1})",
                    self._goal_state.extensions_goal_state.id, self._goal_state.certs.summary)
                self._update_goal_state_error_count = 0

            try:
                self._supports_fast_track = conf.get_enable_fast_track() and protocol.client.get_host_plugin().check_vm_settings_support()
            except VmSettingsNotSupported:
                self._supports_fast_track = False

            return True

        except Exception as e:
            self._update_goal_state_error_count += 1
            self._heartbeat_update_goal_state_error_count += 1
            if self._update_goal_state_error_count <= max_errors_to_log:
                # Report up to 'max_errors_to_log' immediately
                self._update_goal_state_next_error_report = datetime.now()
                event.error(WALAEventOperation.FetchGoalState, "Error fetching the goal state: {0}", textutil.format_exception(e))
            else:
                # Report one single periodic error every 6 hours
                if datetime.now() >= self._update_goal_state_next_error_report:
                    self._update_goal_state_next_error_report = datetime.now() + timedelta(hours=6)
                    event.error(WALAEventOperation.FetchGoalState, "Fetching the goal state is still failing: {0}", textutil.format_exception(e))
            return False