azurelinuxagent/ga/guestagent.py (298 lines of code) (raw):
import json
import os
import shutil
import time
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common import logger, conf
from azurelinuxagent.common.exception import UpdateError
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.version import AGENT_DIR_PATTERN, AGENT_NAME
from azurelinuxagent.ga.exthandlers import HandlerManifest
AGENT_ERROR_FILE = "error.json" # File name for agent error record
AGENT_MANIFEST_FILE = "HandlerManifest.json"
MAX_FAILURE = 3 # Max failure allowed for agent before declare bad agent
AGENT_UPDATE_COUNT_FILE = "update_attempt.json" # File for tracking agent update attempt count
RSM_UPDATE_STATE_FILE = "waagent_rsm_update"
INITIAL_UPDATE_STATE_FILE = "waagent_initial_update"
class GuestAgent(object):
def __init__(self, path, pkg):
"""
If 'path' is given, the object is initialized to the version installed under that path.
If 'pkg' is given, the version specified in the package information is downloaded and the object is
initialized to that version.
NOTE: Prefer using the from_installed_agent and from_agent_package methods instead of calling __init__ directly
"""
self.pkg = pkg
version = None
if path is not None:
m = AGENT_DIR_PATTERN.match(path)
if m is None:
raise UpdateError(u"Illegal agent directory: {0}".format(path))
version = m.group(1)
elif self.pkg is not None:
version = pkg.version
if version is None:
raise UpdateError(u"Illegal agent version: {0}".format(version))
self.version = FlexibleVersion(version)
location = u"disk" if path is not None else u"package"
logger.verbose(u"Loading Agent {0} from {1}", self.name, location)
self.error = GuestAgentError(self.get_agent_error_file())
self.error.load()
self.update_attempt_data = GuestAgentUpdateAttempt(self.get_agent_update_count_file())
self.update_attempt_data.load()
try:
self._ensure_loaded()
except Exception as e:
# If we're unable to unpack the agent, delete the Agent directory
try:
if os.path.isdir(self.get_agent_dir()):
shutil.rmtree(self.get_agent_dir(), ignore_errors=True)
except Exception as err:
logger.warn("Unable to delete Agent files: {0}".format(err))
msg = u"Agent {0} install failed with exception:".format(
self.name)
detailed_msg = '{0} {1}'.format(msg, textutil.format_exception(e))
add_event(
AGENT_NAME,
version=self.version,
op=WALAEventOperation.Install,
is_success=False,
message=detailed_msg)
@staticmethod
def from_installed_agent(path):
"""
Creates an instance of GuestAgent using the agent installed in the given 'path'.
"""
return GuestAgent(path, None)
@staticmethod
def from_agent_package(package):
"""
Creates an instance of GuestAgent using the information provided in the 'package'; if that version of the agent is not installed it, it installs it.
"""
return GuestAgent(None, package)
@property
def name(self):
return "{0}-{1}".format(AGENT_NAME, self.version)
def get_agent_cmd(self):
return self.manifest.get_enable_command()
def get_agent_dir(self):
return os.path.join(conf.get_lib_dir(), self.name)
def get_agent_error_file(self):
return os.path.join(conf.get_lib_dir(), self.name, AGENT_ERROR_FILE)
def get_agent_update_count_file(self):
return os.path.join(conf.get_lib_dir(), self.name, AGENT_UPDATE_COUNT_FILE)
def get_agent_manifest_path(self):
return os.path.join(self.get_agent_dir(), AGENT_MANIFEST_FILE)
def get_agent_pkg_path(self):
return ".".join((os.path.join(conf.get_lib_dir(), self.name), "zip"))
def clear_error(self):
self.error.clear()
self.error.save()
@property
def is_available(self):
return self.is_downloaded and not self.is_blacklisted
@property
def is_blacklisted(self):
return self.error is not None and self.error.is_blacklisted
@property
def is_downloaded(self):
return self.is_blacklisted or \
os.path.isfile(self.get_agent_manifest_path())
def mark_failure(self, is_fatal=False, reason=''):
try:
if not os.path.isdir(self.get_agent_dir()):
os.makedirs(self.get_agent_dir())
self.error.mark_failure(is_fatal=is_fatal, reason=reason)
self.error.save()
if self.error.is_blacklisted:
msg = u"Agent {0} is permanently blacklisted".format(self.name)
logger.warn(msg)
add_event(op=WALAEventOperation.AgentBlacklisted, is_success=False, message=msg, log_event=False,
version=self.version)
except Exception as e:
logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e))
def inc_update_attempt_count(self):
try:
self.update_attempt_data.inc_count()
self.update_attempt_data.save()
except Exception as e:
logger.warn(u"Agent {0} failed recording update attempt: {1}", self.name, ustr(e))
def get_update_attempt_count(self):
return self.update_attempt_data.count
def _ensure_loaded(self):
self._load_manifest()
self._load_error()
def _load_error(self):
try:
self.error = GuestAgentError(self.get_agent_error_file())
self.error.load()
logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error))
except Exception as e:
logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e))
def _load_manifest(self):
path = self.get_agent_manifest_path()
if not os.path.isfile(path):
msg = u"Agent {0} is missing the {1} file".format(self.name, AGENT_MANIFEST_FILE)
raise UpdateError(msg)
with open(path, "r") as manifest_file:
try:
manifests = json.load(manifest_file)
except Exception as e:
msg = u"Agent {0} has a malformed {1} ({2})".format(self.name, AGENT_MANIFEST_FILE, ustr(e))
raise UpdateError(msg)
if type(manifests) is list:
if len(manifests) <= 0:
msg = u"Agent {0} has an empty {1}".format(self.name, AGENT_MANIFEST_FILE)
raise UpdateError(msg)
manifest = manifests[0]
else:
manifest = manifests
try:
self.manifest = HandlerManifest(manifest) # pylint: disable=W0201
if len(self.manifest.get_enable_command()) <= 0:
raise Exception(u"Manifest is missing the enable command")
except Exception as e:
msg = u"Agent {0} has an illegal {1}: {2}".format(
self.name,
AGENT_MANIFEST_FILE,
ustr(e))
raise UpdateError(msg)
logger.verbose(
u"Agent {0} loaded manifest from {1}",
self.name,
self.get_agent_manifest_path())
logger.verbose(u"Successfully loaded Agent {0} {1}: {2}",
self.name,
AGENT_MANIFEST_FILE,
ustr(self.manifest.data))
return
class GuestAgentError(object):
def __init__(self, path):
self.last_failure = 0.0
self.was_fatal = False
if path is None:
raise UpdateError(u"GuestAgentError requires a path")
self.path = path
self.failure_count = 0
self.reason = ''
self.clear()
return
def mark_failure(self, is_fatal=False, reason=''):
self.last_failure = time.time()
self.failure_count += 1
self.was_fatal = is_fatal
self.reason = reason
return
def clear(self):
self.last_failure = 0.0
self.failure_count = 0
self.was_fatal = False
self.reason = ''
return
@property
def is_blacklisted(self):
return self.was_fatal or self.failure_count >= MAX_FAILURE
def load(self):
if self.path is not None and os.path.isfile(self.path):
try:
with open(self.path, 'r') as f:
self.from_json(json.load(f))
except Exception as error:
# The error.json file is only supposed to be written only by the agent.
# If for whatever reason the file is malformed, just delete it to reset state of the errors.
logger.warn(
"Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format(
self.path, textutil.format_exception(error)))
try:
os.remove(self.path)
except Exception:
# We try best case efforts to delete the file, ignore error if we're unable to do so
pass
return
def save(self):
if os.path.isdir(os.path.dirname(self.path)):
with open(self.path, 'w') as f:
json.dump(self.to_json(), f)
return
def from_json(self, data):
self.last_failure = max(self.last_failure, data.get(u"last_failure", 0.0))
self.failure_count = max(self.failure_count, data.get(u"failure_count", 0))
self.was_fatal = self.was_fatal or data.get(u"was_fatal", False)
reason = data.get(u"reason", '')
self.reason = reason if reason != '' else self.reason
return
def to_json(self):
data = {
u"last_failure": self.last_failure,
u"failure_count": self.failure_count,
u"was_fatal": self.was_fatal,
u"reason": ustr(self.reason)
}
return data
def __str__(self):
return "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format(
self.last_failure,
self.failure_count,
self.was_fatal,
self.reason)
class GuestAgentUpdateAttempt(object):
def __init__(self, path):
self.count = 0
if path is None:
raise UpdateError(u"GuestAgentUpdateAttempt requires a path")
self.path = path
self.clear()
def inc_count(self):
self.count += 1
def clear(self):
self.count = 0
def load(self):
if self.path is not None and os.path.isfile(self.path):
try:
with open(self.path, 'r') as f:
self.from_json(json.load(f))
except Exception as error:
# The update_attempt.json file is only supposed to be written only by the agent.
# If for whatever reason the file is malformed, just delete it to reset state of the errors.
logger.warn(
"Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format(
self.path, textutil.format_exception(error)))
try:
os.remove(self.path)
except Exception:
# We try best case efforts to delete the file, ignore error if we're unable to do so
pass
def save(self):
if os.path.isdir(os.path.dirname(self.path)):
with open(self.path, 'w') as f:
json.dump(self.to_json(), f)
def from_json(self, data):
self.count = data.get(u"count", 0)
def to_json(self):
data = {
u"count": self.count
}
return data
class GuestAgentUpdateUtil(object):
@staticmethod
def get_initial_update_state_file():
"""
This file tracks whether the initial update attempt has been made or not
"""
return os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE)
@staticmethod
def save_initial_update_state_file():
"""
Save the file if agent attempted initial update
"""
try:
with open(GuestAgentUpdateUtil.get_initial_update_state_file(), "w"):
pass
except Exception as e:
msg = "Error creating the initial update state file ({0}): {1}".format(GuestAgentUpdateUtil.get_initial_update_state_file(), ustr(e))
logger.warn(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
@staticmethod
def is_initial_update():
"""
Returns True if the state file doesn't exist, as the presence of the file indicates that the initial update has already been attempted
"""
return not os.path.exists(GuestAgentUpdateUtil.get_initial_update_state_file())
@staticmethod
def get_rsm_update_state_file():
"""
This file tracks whether the last attempted update was an RSM update or not
"""
return os.path.join(conf.get_lib_dir(), RSM_UPDATE_STATE_FILE)
@staticmethod
def save_rsm_update_state_file():
"""
Save the rsm state empty file when we switch to RSM
"""
try:
with open(GuestAgentUpdateUtil.get_rsm_update_state_file(), "w"):
pass
except Exception as e:
msg = "Error creating the RSM state file ({0}): {1}".format(GuestAgentUpdateUtil.get_rsm_update_state_file(), ustr(e))
logger.warn(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
@staticmethod
def remove_rsm_update_state_file():
"""
Remove the rsm state file when we switch to self-update
"""
try:
if os.path.exists(GuestAgentUpdateUtil.get_rsm_update_state_file()):
os.remove(GuestAgentUpdateUtil.get_rsm_update_state_file())
except Exception as e:
msg = "Error removing the RSM state file ({0}): {1}".format(GuestAgentUpdateUtil.get_rsm_update_state_file(), ustr(e))
logger.warn(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
@staticmethod
def is_last_update_with_rsm():
"""
Returns True if the state file exists, as this indicates that the last update was with RSM
"""
return os.path.exists(GuestAgentUpdateUtil.get_rsm_update_state_file())