def _check_processes_in_agent_cgroup()

in azurelinuxagent/ga/cgroupconfigurator.py [0:0]


        def _check_processes_in_agent_cgroup(self, report_immediately):
            """
            Verifies that the agent's cgroup includes only the current process, its parent, commands started using shellutil and instances of systemd-run
            (those processes correspond, respectively, to the extension handler, the daemon, commands started by the extension handler, and the systemd-run
            commands used to start extensions on their own cgroup).
            Other processes started by the agent (e.g. extensions) and processes not started by the agent (e.g. services installed by extensions) are reported
            as unexpected, since they should belong to their own cgroup.

            Raises a CGroupsException only when current unexpected process seen last time.

            report_immediately - flag to switch to old behavior and report immediately if any unexpected process found.

            Note: Process check was added as conservative approach before cgroups feature stable. Now it's producing noise due to race issues, some of those issues are extra process before systemd move to new cgroup or process about to die.
            So now changing the behavior to raise an issue only when we see the same unexpected process on last check. Later we will remove the check if no issues reported.
            """
            current_unexpected = {}
            agent_cgroup_proc_names = []
            report = []

            try:
                daemon = os.getppid()
                extension_handler = os.getpid()
                agent_commands = set()
                agent_commands.update(shellutil.get_running_commands())
                systemd_run_commands = set()
                systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())
                agent_cgroup_proccesses = self._agent_cgroup.get_processes()
                # get the running commands again in case new commands started or completed while we were fetching the processes in the cgroup;
                agent_commands.update(shellutil.get_running_commands())
                systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())

                for process in agent_cgroup_proccesses:
                    agent_cgroup_proc_names.append(self._format_process(process))
                    # Note that the agent uses systemd-run to start extensions; systemd-run belongs to the agent cgroup, though the extensions don't.
                    if process in (daemon, extension_handler) or process in systemd_run_commands:
                        continue
                    # check shell systemd_run process if above process check didn't catch it
                    if self._check_systemd_run_process(process):
                        continue
                    # systemd_run_commands contains the shell that started systemd-run, so we also need to check for the parent
                    if self._get_parent(process) in systemd_run_commands and self._get_command(
                            process) == 'systemd-run':
                        continue
                    # check if the process is a command started by the agent or a descendant of one of those commands
                    current = process
                    while current != 0 and current not in agent_commands:
                        current = self._get_parent(current)
                    # Verify if Process started by agent based on the marker found in process environment or process is in Zombie state.
                    # If so, consider it as valid process in agent cgroup.
                    if current == 0 and not (self._is_process_descendant_of_the_agent(process) or self._is_zombie_process(process)):
                        current_unexpected[process] = self._format_process(process)
                if report_immediately:
                    report = current_unexpected.values()
                else:
                    for process in current_unexpected:
                        if process in self._unexpected_processes:
                            report.append(current_unexpected[process])
                        if len(report) >= 5:  # collect just a small sample
                            break
                    self._unexpected_processes = current_unexpected
            except Exception as exception:
                log_cgroup_warning("Error checking the processes in the agent's cgroup: {0}".format(ustr(exception)))

            if len(report) > 0:
                self._report_agent_cgroups_procs(agent_cgroup_proc_names, report)
                raise CGroupsException("The agent's cgroup includes unexpected processes: {0}".format(report))