azurelinuxagent/ga/env.py (167 lines of code) (raw):
# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import datetime
import re
import socket
import threading
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.dhcp import get_dhcp_handler
from azurelinuxagent.common import event
from azurelinuxagent.common.event import WALAEventOperation, add_event
from azurelinuxagent.ga.firewall_manager import FirewallManager, FirewallStateError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.interfaces import ThreadHandlerInterface
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.version import AGENT_NAME
from azurelinuxagent.ga.periodic_operation import PeriodicOperation
CACHE_PATTERNS = [
re.compile(r"^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE),
re.compile(r"^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE),
re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE)
]
MAXIMUM_CACHED_FILES = 50
def get_env_handler():
return EnvHandler()
class RemovePersistentNetworkRules(PeriodicOperation):
def __init__(self, osutil):
super(RemovePersistentNetworkRules, self).__init__(conf.get_remove_persistent_net_rules_period())
self.osutil = osutil
def _operation(self):
self.osutil.remove_rules_files()
class MonitorDhcpClientRestart(PeriodicOperation):
def __init__(self, osutil):
super(MonitorDhcpClientRestart, self).__init__(conf.get_monitor_dhcp_client_restart_period())
self.osutil = osutil
self.dhcp_handler = get_dhcp_handler()
self.dhcp_handler.conf_routes()
self.dhcp_warning_enabled = True
self.dhcp_id_list = []
def _operation(self):
if len(self.dhcp_id_list) == 0:
self.dhcp_id_list = self._get_dhcp_client_pid()
return
if all(self.osutil.check_pid_alive(pid) for pid in self.dhcp_id_list):
return
new_pid = self._get_dhcp_client_pid()
if len(new_pid) != 0 and new_pid != self.dhcp_id_list:
logger.info("EnvMonitor: Detected dhcp client restart. Restoring routing table.")
self.dhcp_handler.conf_routes()
self.dhcp_id_list = new_pid
def _get_dhcp_client_pid(self):
pid = []
try:
# return a sorted list since handle_dhclient_restart needs to compare the previous value with
# the new value and the comparison should not be affected by the order of the items in the list
pid = sorted(self.osutil.get_dhcp_pid())
if len(pid) == 0 and self.dhcp_warning_enabled:
logger.warn("Dhcp client is not running.")
except Exception as exception:
if self.dhcp_warning_enabled:
logger.error("Failed to get the PID of the DHCP client: {0}", ustr(exception))
self.dhcp_warning_enabled = len(pid) != 0
return pid
class EnableFirewall(PeriodicOperation):
def __init__(self, wire_server_address):
super(EnableFirewall, self).__init__(conf.get_enable_firewall_period())
self._wire_server_address = wire_server_address
self._firewall_manager = None # initialized on demand in the _operation method
self._message_count = 0
self._report_after = datetime.datetime.now()
def _operation(self):
try:
if self._firewall_manager is None:
self._firewall_manager = FirewallManager.create(self._wire_server_address)
try:
if self._firewall_manager.check():
return # The firewall is configured correctly
self._report(event.warn, "The firewall has not been setup. Will set it up.")
except FirewallStateError as e:
self._report(event.warn, "The firewall is not configured correctly. {0}. Will reset it. Current state:\n{1}", ustr(e), self._firewall_manager.get_state())
self._firewall_manager.remove()
self._firewall_manager.setup()
self._report(event.info, "The firewall was setup successfully:\n{0}", self._firewall_manager.get_state())
except Exception as e:
self._report(event.warn, "An error occurred while setting up the firewall: {0}", ustr(e))
def _report(self, report_function, message, *args):
# Report the first 3 messages, then stop reporting for 12 hours
if datetime.datetime.now() < self._report_after:
return
self._message_count += 1
if self._message_count > 3:
self._report_after = datetime.datetime.now() + datetime.timedelta(hours=12)
self._message_count = 0
return
report_function(WALAEventOperation.ResetFirewall, message, *args)
class SetRootDeviceScsiTimeout(PeriodicOperation):
def __init__(self, osutil):
super(SetRootDeviceScsiTimeout, self).__init__(conf.get_root_device_scsi_timeout_period())
self._osutil = osutil
def _operation(self):
self._osutil.set_scsi_disks_timeout(conf.get_root_device_scsi_timeout())
class MonitorHostNameChanges(PeriodicOperation):
def __init__(self, osutil):
super(MonitorHostNameChanges, self).__init__(conf.get_monitor_hostname_period())
self._osutil = osutil
self._hostname = self._osutil.get_hostname_record()
def _operation(self):
curr_hostname = socket.gethostname()
if curr_hostname != self._hostname:
logger.info("EnvMonitor: Detected hostname change: {0} -> {1}",
self._hostname,
curr_hostname)
self._osutil.set_hostname(curr_hostname)
try:
self._osutil.publish_hostname(curr_hostname, recover_nic=True)
except Exception as e:
msg = "Error while publishing the hostname: {0}".format(e)
add_event(AGENT_NAME, op=WALAEventOperation.HostnamePublishing, is_success=False, message=msg, log_event=False)
self._hostname = curr_hostname
class EnvHandler(ThreadHandlerInterface):
"""
Monitor changes to dhcp and hostname.
If dhcp client process re-start has occurred, reset routes, dhcp with fabric.
Monitor scsi disk.
If new scsi disk found, set timeout
"""
_THREAD_NAME = "EnvHandler"
@staticmethod
def get_thread_name():
return EnvHandler._THREAD_NAME
def __init__(self):
self.stopped = True
self.hostname = None
self.env_thread = None
def run(self):
if not self.stopped:
logger.info("Stop existing env monitor service.")
self.stop()
self.stopped = False
logger.info("Starting env monitor service.")
self.start()
def is_alive(self):
return self.env_thread.is_alive()
def start(self):
self.env_thread = threading.Thread(target=self.daemon)
self.env_thread.daemon = True
self.env_thread.name = self.get_thread_name()
self.env_thread.start()
def daemon(self):
try:
# The initialization of the protocol needs to be done within the environment thread itself rather
# than initializing it in the ExtHandler thread. This is done to avoid any concurrency issues as each
# thread would now have its own ProtocolUtil object as per the SingletonPerThread model.
protocol_util = get_protocol_util()
protocol = protocol_util.get_protocol()
osutil = get_osutil()
periodic_operations = [
RemovePersistentNetworkRules(osutil),
MonitorDhcpClientRestart(osutil),
]
if conf.enable_firewall():
periodic_operations.append(EnableFirewall(protocol.get_endpoint()))
if conf.get_root_device_scsi_timeout() is not None:
periodic_operations.append(SetRootDeviceScsiTimeout(osutil))
if conf.get_monitor_hostname():
periodic_operations.append(MonitorHostNameChanges(osutil))
while not self.stopped:
try:
for op in periodic_operations:
op.run()
except Exception as e:
logger.error("An error occurred in the environment thread main loop; will skip the current iteration.\n{0}", ustr(e))
finally:
PeriodicOperation.sleep_until_next_operation(periodic_operations)
except Exception as e:
logger.error("An error occurred in the environment thread; will exit the thread.\n{0}", ustr(e))
def stop(self):
"""
Stop server communication and join the thread to main thread.
"""
self.stopped = True
if self.env_thread is not None:
self.env_thread.join()