OmsAgent/omsagent.py (1,435 lines of code) (raw):
#!/usr/bin/env python
#
# OmsAgentForLinux Extension
#
# Copyright 2015 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
import os
import os.path
import signal
import pwd
import grp
import re
import traceback
import time
import platform
import subprocess
import json
import base64
import inspect
import watcherutil
import shutil
from threading import Thread
try:
from Utils.WAAgentUtil import waagent
import Utils.HandlerUtil as HUtil
except Exception as e:
# These utils have checks around the use of them; this is not an exit case
print('Importing utils failed with error: {0}'.format(e))
if sys.version_info[0] == 3:
import urllib.request as urllib
from urllib.parse import urlparse
import urllib.error as urlerror
elif sys.version_info[0] == 2:
import urllib2 as urllib
from urlparse import urlparse
import urllib2 as urlerror
# This monkey patch duplicates the one made in the waagent import above.
# It is necessary because on 2.6, the waagent monkey patch appears to be overridden
# by the python-future subprocess.check_output backport.
if sys.version_info < (2,7):
def check_output(*popenargs, **kwargs):
r"""Backport from subprocess module from python 2.7"""
if 'stdout' in kwargs:
raise ValueError('stdout argument not allowed, it will be overridden.')
process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
output, unused_err = process.communicate()
retcode = process.poll()
if retcode:
cmd = kwargs.get("args")
if cmd is None:
cmd = popenargs[0]
raise subprocess.CalledProcessError(retcode, cmd, output=output)
return output
# Exception classes used by this module.
class CalledProcessError(Exception):
def __init__(self, returncode, cmd, output=None):
self.returncode = returncode
self.cmd = cmd
self.output = output
def __str__(self):
return "Command '%s' returned non-zero exit status %d" % (self.cmd, self.returncode)
subprocess.check_output = check_output
subprocess.CalledProcessError = CalledProcessError
# Global Variables
ProceedOnSigningVerificationFailure = True
PackagesDirectory = 'packages'
keysDirectory = 'keys'
# Below file version will be replaced during OMS-Build time.
BundleFileName = 'omsagent-0.0.0-0.universal.x64.sh'
GUIDRegex = r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}'
GUIDOnlyRegex = r'^' + GUIDRegex + '$'
SCOMCertIssuerRegex = r'^[\s]*Issuer:[\s]*CN=SCX-Certificate/title=SCX' + GUIDRegex + ', DC=.*$'
SCOMPort = 1270
PostOnboardingSleepSeconds = 5
InitialRetrySleepSeconds = 30
IsUpgrade = False
# Paths
OMSAdminPath = '/opt/microsoft/omsagent/bin/omsadmin.sh'
OMSAgentServiceScript = '/opt/microsoft/omsagent/bin/service_control'
OMIConfigEditorPath = '/opt/omi/bin/omiconfigeditor'
OMIServerConfPath = '/etc/opt/omi/conf/omiserver.conf'
EtcOMSAgentPath = '/etc/opt/microsoft/omsagent/'
VarOMSAgentPath = '/var/opt/microsoft/omsagent/'
SCOMCertPath = '/etc/opt/microsoft/scx/ssl/scx.pem'
ExtensionStateSubdirectory = 'state'
# Commands
# Always use upgrade - will handle install if scx, omi are not installed or upgrade if they are.
InstallCommandTemplate = '{0} --upgrade {1}'
UninstallCommandTemplate = '{0} --remove'
WorkspaceCheckCommand = '{0} -l'.format(OMSAdminPath)
OnboardCommandWithOptionalParams = '{0} -w {1} -s {2} {3}'
RestartOMSAgentServiceCommand = '{0} restart'.format(OMSAgentServiceScript)
DisableOMSAgentServiceCommand = '{0} disable'.format(OMSAgentServiceScript)
InstallExtraPackageCommandApt = 'apt-get -y update && apt-get -y install {0}'
SkipDigestCmdTemplate = '{0} --noDigest'
# Cloud Environments
PublicCloudName = "AzurePublicCloud"
FairfaxCloudName = "AzureUSGovernmentCloud"
MooncakeCloudName = "AzureChinaCloud"
USNatCloudName = "USNat" # EX
USSecCloudName = "USSec" # RX
DefaultCloudName = PublicCloudName # Fallback
CloudDomainMap = {
PublicCloudName: "opinsights.azure.com",
FairfaxCloudName: "opinsights.azure.us",
MooncakeCloudName: "opinsights.azure.cn",
USNatCloudName: "opinsights.azure.eaglex.ic.gov",
USSecCloudName: "opinsights.azure.microsoft.scloud"
}
# Error codes
DPKGLockedErrorCode = 55 #56, temporary as it excludes from SLA
InstallErrorCurlNotInstalled = 55 #64, temporary as it excludes from SLA
EnableErrorOMSReturned403 = 5
EnableErrorOMSReturnedNon200 = 6
EnableErrorResolvingHost = 7
EnableErrorOnboarding = 8
EnableCalledBeforeSuccessfulInstall = 52 # since install is a missing dependency
UnsupportedOpenSSL = 55 #60, temporary as it excludes from SLA
UnsupportedGpg = 55
# OneClick error codes
OneClickErrorCode = 40
ManagedIdentityExtMissingErrorCode = 41
ManagedIdentityExtErrorCode = 42
MetadataAPIErrorCode = 43
OMSServiceOneClickErrorCode = 44
MissingorInvalidParameterErrorCode = 11
UnwantedMultipleConnectionsErrorCode = 10
CannotConnectToOMSErrorCode = 55
UnsupportedOperatingSystem = 51
# Configuration
HUtilObject = None
SettingsSequenceNumber = None
HandlerEnvironment = None
SettingsDict = None
# OneClick Constants
ManagedIdentityExtListeningURLPath = '/var/lib/waagent/ManagedIdentity-Settings'
GUIDRegex = '[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}'
OAuthTokenResource = 'https://management.core.windows.net/'
OMSServiceValidationEndpoint = 'https://global.oms.opinsights.azure.com/ManagedIdentityService.svc/Validate'
AutoManagedWorkspaceCreationSleepSeconds = 20
# agent permissions
AgentUser='omsagent'
AgentGroup='omiusers'
"""
What need to be packaged to make the signing work:
keys
dscgpgkey.asc
msgpgkey.asc
packages
omsagent-*.universal.x64.asc
omsagent-*.universal.x64.sha256sums
"""
def verifyShellBundleSigningAndChecksum():
cert_directory = os.path.join(os.getcwd(), PackagesDirectory)
keys_directory = os.path.join(os.getcwd(), keysDirectory)
# import GPG key
dscGPGKeyFilePath = os.path.join(keys_directory, 'dscgpgkey.asc')
if not os.path.isfile(dscGPGKeyFilePath):
raise Exception("Unable to find the dscgpgkey.asc file at " + dscGPGKeyFilePath)
importGPGKeyCommand = "sh ImportGPGkey.sh " + dscGPGKeyFilePath
exit_code, output = run_command_with_retries_output(importGPGKeyCommand, retries = 0, retry_check = retry_skip, check_error = False)
# Check that we can find the keyring file
keyringFilePath = os.path.join(keys_directory, 'keyring.gpg')
if not os.path.isfile(keyringFilePath):
raise Exception("Unable to find the Extension keyring file at " + keyringFilePath)
# Check that we can find the asc file
bundleFileName, file_ext = os.path.splitext(BundleFileName)
ascFilePath = os.path.join(cert_directory, bundleFileName + ".asc")
if not os.path.isfile(ascFilePath):
raise Exception("Unable to find the OMS shell bundle asc file at " + ascFilePath)
# check that we can find the SHA256 sums file
sha256SumsFilePath = os.path.join(cert_directory, bundleFileName + ".sha256sums")
if not os.path.isfile(sha256SumsFilePath):
raise Exception("Unable to find the OMS shell bundle SHA256 sums file at " + sha256SumsFilePath)
# Verify the SHA256 sums file with the keyring and asc files
verifySha256SumsCommand = "HOME=" + keysDirectory + " gpg --no-default-keyring --keyring " + keyringFilePath + " --verify " + ascFilePath + " " + sha256SumsFilePath
exit_code, output = run_command_with_retries_output(verifySha256SumsCommand, retries = 0, retry_check = retry_skip, check_error = False)
if exit_code != 0:
raise Exception("Failed to verify SHA256 sums file at " + sha256SumsFilePath)
# Perform SHA256 sums to verify shell bundle
hutil_log_info("Perform SHA256 sums to verify shell bundle")
performSha256SumsCommand = "cd %s; sha256sum -c %s" % (cert_directory, sha256SumsFilePath)
exit_code, output = run_command_with_retries_output(performSha256SumsCommand, retries = 0, retry_check = retry_skip, check_error = False)
if exit_code != 0:
raise Exception("Failed to verify shell bundle with the SHA256 sums file at " + sha256SumsFilePath)
def main():
"""
Main method
Parse out operation from argument, invoke the operation, and finish.
"""
init_waagent_logger()
waagent_log_info('OmsAgentForLinux started to handle.')
global IsUpgrade
# Determine the operation being executed
operation = None
try:
option = sys.argv[1]
if re.match('^([-/]*)(disable)', option):
operation = 'Disable'
elif re.match('^([-/]*)(uninstall)', option):
operation = 'Uninstall'
elif re.match('^([-/]*)(install)', option):
operation = 'Install'
elif re.match('^([-/]*)(enable)', option):
operation = 'Enable'
elif re.match('^([-/]*)(update)', option):
operation = 'Update'
IsUpgrade = True
elif re.match('^([-/]*)(telemetry)', option):
operation = 'Telemetry'
except Exception as e:
waagent_log_error(str(e))
if operation is None:
log_and_exit('Unknown', 1, 'No valid operation provided')
# Set up for exit code and any error messages
exit_code = 0
message = '{0} succeeded'.format(operation)
# Clean status file to mitigate diskspace issues on small VMs
status_files = [
"/var/opt/microsoft/omsconfig/status/dscperformconsistency",
"/var/opt/microsoft/omsconfig/status/dscperforminventory",
"/var/opt/microsoft/omsconfig/status/dscsetlcm",
"/var/opt/microsoft/omsconfig/status/omsconfighost"
]
for sf in status_files:
if os.path.isfile(sf):
if sf.startswith("/var/opt/microsoft/omsconfig/status"):
try:
os.remove(sf)
except Exception as e:
hutil_log_info('Error removing telemetry status file before installation: {0}'.format(sf))
hutil_log_info('Exception info: {0}'.format(traceback.format_exc()))
exit_code = check_disk_space_availability()
if exit_code != 0:
message = '{0} failed due to low disk space'.format(operation)
log_and_exit(operation, exit_code, message)
exit_if_gpg_unavailable(operation)
# Invoke operation
try:
global HUtilObject
HUtilObject = parse_context(operation)
# Verify shell bundle signing
try:
hutil_log_info("Start signing verification")
verifyShellBundleSigningAndChecksum()
hutil_log_info("ShellBundle signing verification succeeded")
except Exception as ex:
errmsg = "ShellBundle signing verification failed with '%s'" % ex.message
if ProceedOnSigningVerificationFailure:
hutil_log_error(errmsg)
else:
log_and_exit(operation, errmsg)
# invoke operation
exit_code, output = operations[operation]()
# Exit code 1 indicates a general problem that doesn't have a more
# specific error code; it often indicates a missing dependency
if exit_code == 1 and operation == 'Install':
message = 'Install failed with exit code 1. Please check that ' \
'dependencies are installed. For details, check logs ' \
'in /var/log/azure/Microsoft.EnterpriseCloud.' \
'Monitoring.OmsAgentForLinux'
elif exit_code == 127 and operation == 'Install':
# happens if shell bundle couldn't be extracted due to low space or missing dependency
exit_code = 52 # since it is a missing dependency
message = 'Install failed with exit code 127. Please check that ' \
'dependencies are installed. For details, check logs ' \
'in /var/log/azure/Microsoft.EnterpriseCloud.' \
'Monitoring.OmsAgentForLinux'
elif exit_code is DPKGLockedErrorCode and operation == 'Install':
message = 'Install failed with exit code {0} because the ' \
'package manager on the VM is currently locked: ' \
'please wait and try again'.format(DPKGLockedErrorCode)
elif exit_code != 0:
message = '{0} failed with exit code {1} {2}'.format(operation,
exit_code, output)
except OmsAgentForLinuxException as e:
exit_code = e.error_code
message = e.get_error_message(operation)
except Exception as e:
exit_code = 1
message = '{0} failed with error: {1}\n' \
'Stacktrace: {2}'.format(operation, e,
traceback.format_exc())
# Finish up and log messages
log_and_exit(operation, exit_code, message)
def check_disk_space_availability():
"""
Check if there is the required space on the machine.
"""
try:
if get_free_space_mb("/var") < 500 or get_free_space_mb("/etc") < 500 or get_free_space_mb("/opt") < 500:
# 52 is the exit code for missing dependency i.e. disk space
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
return 52
else:
return 0
except:
print('Failed to check disk usage.')
return 0
def get_free_space_mb(dirname):
"""
Get the free space in MB in the directory path.
"""
st = os.statvfs(dirname)
return (st.f_bavail * st.f_frsize) // (1024 * 1024)
def stop_telemetry_process():
pids_filepath = os.path.join(os.getcwd(),'omstelemetry.pid')
# kill existing telemetry watcher
if os.path.exists(pids_filepath):
with open(pids_filepath, "r") as f:
for pid in f.readlines():
# Verify the pid actually belongs to omsagent.
cmd_file = os.path.join("/proc", str(pid.strip("\n")), "cmdline")
if os.path.exists(cmd_file):
with open(cmd_file, "r") as pidf:
cmdline = pidf.readlines()
if cmdline[0].find("omsagent.py") >= 0 and cmdline[0].find("-telemetry") >= 0:
kill_cmd = "kill " + pid
run_command_and_log(kill_cmd)
run_command_and_log("rm "+pids_filepath)
def start_telemetry_process():
"""
Start telemetry process that performs periodic monitoring activities
:return: None
"""
stop_telemetry_process()
#start telemetry watcher
omsagent_filepath = os.path.join(os.getcwd(),'omsagent.py')
args = ['python{0}'.format(sys.version_info[0]), omsagent_filepath, '-telemetry']
log = open(os.path.join(os.getcwd(), 'daemon.log'), 'w')
hutil_log_info('start watcher process '+str(args))
subprocess.Popen(args, stdout=log, stderr=log)
def telemetry():
pids_filepath = os.path.join(os.getcwd(), 'omstelemetry.pid')
py_pid = os.getpid()
with open(pids_filepath, 'w') as f:
f.write(str(py_pid) + '\n')
if HUtilObject is not None:
watcher = watcherutil.Watcher(HUtilObject.error, HUtilObject.log)
watcher_thread = Thread(target = watcher.watch)
self_mon_thread = Thread(target = watcher.monitor_health)
watcher_thread.start()
self_mon_thread.start()
watcher_thread.join()
self_mon_thread.join()
return 0, ""
def prepare_update():
"""
Copy / move configuration directory to the backup
"""
# First check if backup directory was previously created for given workspace.
# If it is created with all the files , we need not move the files again.
public_settings, _ = get_settings()
workspaceId = public_settings.get('workspaceId')
etc_remove_path = os.path.join(EtcOMSAgentPath, workspaceId)
etc_move_path = os.path.join(EtcOMSAgentPath, ExtensionStateSubdirectory, workspaceId)
if (not os.path.isdir(etc_move_path)):
shutil.move(etc_remove_path, etc_move_path)
return 0, ""
def restore_state(workspaceId):
"""
Copy / move state from backup to the expected location.
"""
try:
etc_backup_path = os.path.join(EtcOMSAgentPath, ExtensionStateSubdirectory, workspaceId)
etc_final_path = os.path.join(EtcOMSAgentPath, workspaceId)
if (os.path.isdir(etc_backup_path) and not os.path.isdir(etc_final_path)):
shutil.move(etc_backup_path, etc_final_path)
except Exception as e:
hutil_log_error("Error while restoring the state. Exception : "+traceback.format_exc())
def install():
"""
Ensure that this VM distro and version are supported.
Install the OMSAgent shell bundle, using retries.
Note: install operation times out from WAAgent at 15 minutes, so do not
wait longer.
"""
exit_if_vm_not_supported('Install')
public_settings, protected_settings = get_settings()
if public_settings is None:
raise ParameterMissingException('Public configuration must be ' \
'provided')
workspaceId = public_settings.get('workspaceId')
check_workspace_id(workspaceId)
# Take the backup of the state for given workspace.
restore_state(workspaceId)
# In the case where a SCOM connection is already present, we should not
# create conflicts by installing the OMSAgent packages
stopOnMultipleConnections = public_settings.get('stopOnMultipleConnections')
if (stopOnMultipleConnections is not None
and stopOnMultipleConnections is True):
detect_multiple_connections(workspaceId)
package_directory = os.path.join(os.getcwd(), PackagesDirectory)
bundle_path = os.path.join(package_directory, BundleFileName)
os.chmod(bundle_path, 100)
skipDockerProviderInstall = public_settings.get(
'skipDockerProviderInstall')
if (skipDockerProviderInstall is not None
and skipDockerProviderInstall is True):
cmd = InstallCommandTemplate.format(
bundle_path, '--skip-docker-provider-install')
else:
cmd = InstallCommandTemplate.format(bundle_path, '')
noDigest = public_settings.get(
'noDigest')
if (noDigest is not None
and noDigest is True):
cmd = SkipDigestCmdTemplate.format(cmd)
hutil_log_info('Running command "{0}"'.format(cmd))
# Retry, since install can fail due to concurrent package operations
exit_code, output = run_command_with_retries_output(cmd, retries = 10,
retry_check = retry_if_dpkg_locked_or_curl_is_not_found,
final_check = final_check_if_dpkg_locked)
return exit_code, output
def check_kill_process(pstring):
for line in os.popen("ps ax | grep " + pstring + " | grep -v grep"):
fields = line.split()
pid = fields[0]
os.kill(int(pid), signal.SIGKILL)
def uninstall():
"""
Uninstall the OMSAgent shell bundle.
This is a somewhat soft uninstall. It is not a purge.
Note: uninstall operation times out from WAAgent at 5 minutes
"""
package_directory = os.path.join(os.getcwd(), PackagesDirectory)
bundle_path = os.path.join(package_directory, BundleFileName)
global IsUpgrade
os.chmod(bundle_path, 100)
cmd = UninstallCommandTemplate.format(bundle_path)
hutil_log_info('Running command "{0}"'.format(cmd))
# Retry, since uninstall can fail due to concurrent package operations
try:
exit_code, output = run_command_with_retries_output(cmd, retries = 5,
retry_check = retry_if_dpkg_locked_or_curl_is_not_found,
final_check = final_check_if_dpkg_locked)
except Exception as e:
# try to force clean the installation
try:
check_kill_process("omsagent")
exit_code = 0
except Exception as ex:
exit_code = 1
message = 'Uninstall failed with error: {0}\n' \
'Stacktrace: {1}'.format(ex, traceback.format_exc())
if IsUpgrade:
IsUpgrade = False
else:
remove_workspace_configuration()
return exit_code, output
def enable():
"""
Onboard the OMSAgent to the specified OMS workspace.
This includes enabling the OMS process on the VM.
This call will return non-zero or throw an exception if
the settings provided are incomplete or incorrect.
Note: enable operation times out from WAAgent at 5 minutes
"""
exit_if_vm_not_supported('Enable')
public_settings, protected_settings = get_settings()
if public_settings is None:
raise ParameterMissingException('Public configuration must be ' \
'provided')
if protected_settings is None:
raise ParameterMissingException('Private configuration must be ' \
'provided')
vmResourceId = protected_settings.get('vmResourceId')
# If vmResourceId is not provided in private settings, get it from metadata API
if vmResourceId is None or not vmResourceId:
vmResourceId = get_vmresourceid_from_metadata()
hutil_log_info('vmResourceId from Metadata API is {0}'.format(vmResourceId))
if vmResourceId is None:
hutil_log_info('This may be a classic VM')
enableAutomaticManagement = public_settings.get('enableAutomaticManagement')
if (enableAutomaticManagement is not None
and enableAutomaticManagement is True):
hutil_log_info('enableAutomaticManagement is set to true; the ' \
'workspace ID and key will be determined by the OMS ' \
'service.')
workspaceInfo = retrieve_managed_workspace(vmResourceId)
if (workspaceInfo is None or 'WorkspaceId' not in workspaceInfo
or 'WorkspaceKey' not in workspaceInfo):
raise OneClickException('Workspace info was not determined')
else:
# Note: do NOT log workspace keys!
hutil_log_info('Managed workspaceInfo has been retrieved')
workspaceId = workspaceInfo['WorkspaceId']
workspaceKey = workspaceInfo['WorkspaceKey']
try:
check_workspace_id_and_key(workspaceId, workspaceKey)
except InvalidParameterError as e:
raise OMSServiceOneClickException('Received invalid ' \
'workspace info: ' \
'{0}'.format(e))
else:
workspaceId = public_settings.get('workspaceId')
workspaceKey = protected_settings.get('workspaceKey')
check_workspace_id_and_key(workspaceId, workspaceKey)
# Check if omsadmin script is available
if not os.path.exists(OMSAdminPath):
log_and_exit('Enable', EnableCalledBeforeSuccessfulInstall,
'OMSAgent onboarding script {0} does not exist. Enable ' \
'cannot be called before install.'.format(OMSAdminPath))
vmResourceIdParam = '-a {0}'.format(vmResourceId)
proxy = protected_settings.get('proxy')
proxyParam = ''
if proxy is not None:
proxyParam = '-p {0}'.format(proxy)
# get domain from protected settings
domain = protected_settings.get('domain')
if domain is None:
# detect opinsights domain using IMDS
domain = get_azure_cloud_domain()
else:
hutil_log_info("Domain retrieved from protected settings '{0}'".format(domain))
domainParam = ''
if domain:
domainParam = '-d {0}'.format(domain)
optionalParams = '{0} {1} {2}'.format(domainParam, proxyParam, vmResourceIdParam)
onboard_cmd = OnboardCommandWithOptionalParams.format(OMSAdminPath,
workspaceId,
workspaceKey,
optionalParams)
hutil_log_info('Handler initiating onboarding.')
exit_code, output = run_command_with_retries_output(onboard_cmd, retries = 5,
retry_check = retry_onboarding,
final_check = raise_if_no_internet,
check_error = True, log_cmd = False)
# now ensure the permissions and ownership is set recursively
try:
workspaceId = public_settings.get('workspaceId')
etc_final_path = os.path.join(EtcOMSAgentPath, workspaceId)
if (os.path.isdir(etc_final_path)):
uid = pwd.getpwnam(AgentUser).pw_uid
gid = grp.getgrnam(AgentGroup).gr_gid
os.chown(etc_final_path, uid, gid)
os.system('chmod {1} {0}'.format(etc_final_path, 750))
for root, dirs, files in os.walk(etc_final_path):
for d in dirs:
os.chown(os.path.join(root, d), uid, gid)
os.system('chmod {1} {0}'.format(os.path.join(root, d), 750))
for f in files:
os.chown(os.path.join(root, f), uid, gid)
os.system('chmod {1} {0}'.format(os.path.join(root, f), 640))
except:
hutil_log_info('Failed to set permissions for OMS directories, could potentially have issues uploading.')
if exit_code == 0:
# Create a marker file to denote the workspace that was
# onboarded using the extension. This will allow supporting
# multi-homing through the extension like Windows does
extension_marker_path = os.path.join(EtcOMSAgentPath, workspaceId,
'conf/.azure_extension_marker')
if os.path.exists(extension_marker_path):
hutil_log_info('Extension marker file {0} already ' \
'created'.format(extension_marker_path))
else:
try:
open(extension_marker_path, 'w').close()
hutil_log_info('Created extension marker file ' \
'{0}'.format(extension_marker_path))
except IOError as e:
try:
open(extension_marker_path, 'w+').close()
hutil_log_info('Created extension marker file ' \
'{0}'.format(extension_marker_path))
except IOError as ex:
hutil_log_error('Error creating {0} with error: ' \
'{1}'.format(extension_marker_path, ex))
# we are having some kind of permissions issue creating the marker file
output = "Couldn't create marker file"
exit_code = 52 # since it is a missing dependency
# Sleep to prevent bombarding the processes, then restart all processes
# to resolve any issues with auto-started processes from --upgrade
time.sleep(PostOnboardingSleepSeconds)
if HUtilObject and HUtilObject.is_seq_smaller():
log_output = "Current sequence number {0} is smaller than or egual to the sequence number of the most recent executed configuration, skipping omsagent process restart.".format(HUtilObject._context._seq_no)
hutil_log_info(log_output)
else:
hutil_log_info('Restart omsagent service via service_control script.')
run_command_and_log(RestartOMSAgentServiceCommand)
#start telemetry process if enable is successful
start_telemetry_process()
#save sequence number
HUtilObject.save_seq()
return exit_code, output
def remove_workspace_configuration():
"""
This is needed to distinguish between extension removal vs extension upgrade.
Its a workaround for waagent upgrade routine calling 'remove' on an old version
before calling 'upgrade' on new extension version issue.
In upgrade case, we need workspace configuration to persist when in
remove case we need all the files be removed.
This method will remove all the files/folders from the workspace path in Etc and Var.
"""
public_settings, _ = get_settings()
workspaceId = public_settings.get('workspaceId')
etc_remove_path = os.path.join(EtcOMSAgentPath, workspaceId)
var_remove_path = os.path.join(VarOMSAgentPath, workspaceId)
shutil.rmtree(etc_remove_path, True)
shutil.rmtree(var_remove_path, True)
hutil_log_info('Moved oms etc configuration directory and cleaned up var directory')
def is_arc_installed():
"""
Check if the system is on an Arc machine
"""
# Using systemctl to check this since Arc only supports VMs that have systemd
check_arc = os.system('systemctl status himdsd 1>/dev/null 2>&1')
return check_arc == 0
def get_arc_endpoint():
"""
Find the endpoint for Arc Hybrid IMDS
"""
endpoint_filepath = '/lib/systemd/system.conf.d/azcmagent.conf'
endpoint = ''
try:
with open(endpoint_filepath, 'r') as f:
data = f.read()
endpoint = data.split("\"IMDS_ENDPOINT=")[1].split("\"\n")[0]
except:
hutil_log_error('Unable to load Arc IMDS endpoint from {0}'.format(endpoint_filepath))
return endpoint
def get_imds_endpoint():
"""
Find the endpoint for IMDS, whether Arc or not
"""
azure_imds_endpoint = 'http://169.254.169.254/metadata/instance?api-version=2018-10-01'
if (is_arc_installed()):
hutil_log_info('Arc is installed, loading Arc-specific IMDS endpoint')
imds_endpoint = get_arc_endpoint()
if imds_endpoint:
imds_endpoint += '/metadata/instance?api-version=2019-08-15'
else:
# Fall back to the traditional IMDS endpoint; the cloud domain and VM
# resource id detection logic are resilient to failed queries to IMDS
imds_endpoint = azure_imds_endpoint
hutil_log_info('Falling back to default Azure IMDS endpoint')
else:
imds_endpoint = azure_imds_endpoint
hutil_log_info('Using IMDS endpoint "{0}"'.format(imds_endpoint))
return imds_endpoint
def get_vmresourceid_from_metadata():
imds_endpoint = get_imds_endpoint()
req = urllib.Request(imds_endpoint)
req.add_header('Metadata', 'True')
try:
response = json.loads(urllib.urlopen(req).read())
if ('compute' not in response or response['compute'] is None):
return None # classic vm
if response['compute']['vmScaleSetName']:
return '/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachineScaleSets/{2}/virtualMachines/{3}'.format(response['compute']['subscriptionId'],response['compute']['resourceGroupName'],response['compute']['vmScaleSetName'],response['compute']['name'])
else:
return '/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachines/{2}'.format(response['compute']['subscriptionId'],response['compute']['resourceGroupName'],response['compute']['name'])
except urlerror.HTTPError as e:
hutil_log_error('Request to Metadata service URL ' \
'failed with an HTTPError: {0}'.format(e))
hutil_log_info('Response from Metadata service: ' \
'{0}'.format(e.read()))
return None
except:
hutil_log_error('Unexpected error from Metadata service')
return None
def get_azure_environment_from_imds():
imds_endpoint = get_imds_endpoint()
req = urllib.Request(imds_endpoint)
req.add_header('Metadata', 'True')
try:
response = json.loads(urllib.urlopen(req).read())
if ('compute' not in response or response['compute'] is None):
return None # classic vm
if ('azEnvironment' not in response['compute'] or response['compute']['azEnvironment'] is None):
return None # classic vm
return response['compute']['azEnvironment']
except urlerror.HTTPError as e:
hutil_log_error('Request to Metadata service URL ' \
'failed with an HTTPError: {0}'.format(e))
hutil_log_info('Response from Metadata service: ' \
'{0}'.format(e.read()))
return None
except:
hutil_log_error('Unexpected error from Metadata service')
return None
def get_azure_cloud_domain():
try:
environment = get_azure_environment_from_imds()
if environment:
for cloud, domain in CloudDomainMap.items():
if environment.lower() == cloud.lower():
hutil_log_info('Detected cloud environment "{0}" via IMDS. The domain "{1}" will be used.'.format(cloud, domain))
return domain
hutil_log_info('Unknown cloud environment "{0}"'.format(environment))
except Exception as e:
hutil_log_error('Failed to detect cloud environment: {0}'.format(e))
hutil_log_info('Falling back to default domain "{0}"'.format(CloudDomainMap[DefaultCloudName]))
return CloudDomainMap[DefaultCloudName]
def retrieve_managed_workspace(vm_resource_id):
"""
EnableAutomaticManagement has been set to true; the
ManagedIdentity extension and the VM Resource ID are also
required for the OneClick scenario
Using these and the Metadata API, we will call the OMS service
to determine what workspace ID and key to onboard to
"""
# Check for OneClick scenario requirements:
if not os.path.exists(ManagedIdentityExtListeningURLPath):
raise ManagedIdentityExtMissingException
# Determine the Tenant ID using the Metadata API
tenant_id = get_tenant_id_from_metadata_api(vm_resource_id)
# Retrieve an OAuth token using the ManagedIdentity extension
if tenant_id is not None:
hutil_log_info('Tenant ID from Metadata API is {0}'.format(tenant_id))
access_token = get_access_token(tenant_id, OAuthTokenResource)
else:
return None
# Query OMS service for the workspace info for onboarding
if tenant_id is not None and access_token is not None:
return get_workspace_info_from_oms(vm_resource_id, tenant_id,
access_token)
else:
return None
def disable():
"""
Disable all OMS workspace processes on the VM.
Note: disable operation times out from WAAgent at 15 minutes
"""
#stop the telemetry process
stop_telemetry_process()
# Check if the service control script is available
if not os.path.exists(OMSAgentServiceScript):
log_and_exit('Disable', 1, 'OMSAgent service control script {0} does' \
'not exist. Disable cannot be called ' \
'before install.'.format(OMSAgentServiceScript))
return 1
exit_code, output = run_command_and_log(DisableOMSAgentServiceCommand)
return exit_code, output
# Dictionary of operations strings to methods
operations = {'Disable' : disable,
'Uninstall' : uninstall,
'Install' : install,
'Enable' : enable,
# For update call we will only prepare the update by taking some backup of the state
# since omsagent.py->install() will be called
# everytime upgrade is done due to upgradeMode =
# "UpgradeWithInstall" set in HandlerManifest
'Update' : prepare_update,
'Telemetry' : telemetry
}
def parse_context(operation):
"""
Initialize a HandlerUtil object for this operation.
If the required modules have not been imported, this will return None.
"""
hutil = None
if ('Utils.WAAgentUtil' in sys.modules
and 'Utils.HandlerUtil' in sys.modules):
try:
logFileName = 'extension.log'
if (operation == 'Telemetry'):
logFileName = 'watcher.log'
hutil = HUtil.HandlerUtility(waagent.Log, waagent.Error, logFileName=logFileName)
hutil.do_parse_context(operation)
# parse_context may throw KeyError if necessary JSON key is not
# present in settings
except KeyError as e:
waagent_log_error('Unable to parse context with error: ' \
'{0}'.format(e))
raise ParameterMissingException
return hutil
def is_vm_supported_for_extension():
"""
Checks if the VM this extension is running on is supported by OMSAgent
Returns for platform.linux_distribution() vary widely in format, such as
'7.3.1611' returned for a VM with CentOS 7, so the first provided
digits must match
The supported distros of the OMSAgent-for-Linux are allowed to utilize
this VM extension. All other distros will get error code 51
"""
supported_dists = {'redhat' : ['7', '8', '9'], 'red hat' : ['7', '8', '9'], 'rhel' : ['7', '8', '9'], # Red Hat
'centos' : ['7', '8'], # CentOS
'oracle' : ['7', '8'], 'ol': ['7', '8'], # Oracle
'debian' : ['8', '9', '10', '11'], # Debian
'ubuntu' : ['14.04', '16.04', '18.04', '20.04', '22.04'], # Ubuntu
'suse' : ['12', '15'], 'sles' : ['12', '15'], # SLES
'opensuse' : ['15'], # openSUSE
'rocky' : ['8', '9'], # Rocky
'alma' : ['8', '9'], # Alma
'amzn' : ['2'] # AWS
}
vm_dist, vm_ver, vm_supported = '', '', False
parse_manually = False
# platform commands used below aren't available after Python 3.6
if sys.version_info < (3,7):
try:
vm_dist, vm_ver, vm_id = platform.linux_distribution()
except AttributeError:
try:
vm_dist, vm_ver, vm_id = platform.dist()
except AttributeError:
hutil_log_info("Falling back to /etc/os-release distribution parsing")
# Some python versions *IF BUILT LOCALLY* (ex 3.5) give string responses (ex. 'bullseye/sid') to platform.dist() function
# This causes exception in the method below. Thus adding a check to switch to manual parsing in this case
try:
temp_vm_ver = int(vm_ver.split('.')[0])
except:
parse_manually = True
else:
parse_manually = True
# Fallback if either of the above platform commands fail, or we switch to manual parsing
if (not vm_dist and not vm_ver) or parse_manually:
try:
with open('/etc/os-release', 'r') as fp:
for line in fp:
if line.startswith('ID='):
vm_dist = line.split('=')[1]
vm_dist = vm_dist.split('-')[0]
vm_dist = vm_dist.replace('\"', '').replace('\n', '')
elif line.startswith('VERSION_ID='):
vm_ver = line.split('=')[1]
vm_ver = vm_ver.replace('\"', '').replace('\n', '')
except:
return vm_supported, 'Indeterminate operating system', ''
# Find this VM distribution in the supported list
for supported_dist in list(supported_dists.keys()):
if not vm_dist.lower().startswith(supported_dist):
continue
# Check if this VM distribution version is supported
vm_ver_split = vm_ver.split('.')
for supported_ver in supported_dists[supported_dist]:
supported_ver_split = supported_ver.split('.')
# If vm_ver is at least as precise (at least as many digits) as
# supported_ver and matches all the supported_ver digits, then
# this VM is guaranteed to be supported
vm_ver_match = True
for idx, supported_ver_num in enumerate(supported_ver_split):
try:
supported_ver_num = int(supported_ver_num)
vm_ver_num = int(vm_ver_split[idx])
except IndexError:
vm_ver_match = False
break
if vm_ver_num is not supported_ver_num:
vm_ver_match = False
break
if vm_ver_match:
vm_supported = True
break
if vm_supported:
break
return vm_supported, vm_dist, vm_ver
def exit_if_vm_not_supported(operation):
"""
Check if this VM distro and version are supported by the OMSAgent.
If this VM is not supported, log the proper error code and exit.
"""
vm_supported, vm_dist, vm_ver = is_vm_supported_for_extension()
if not vm_supported:
log_and_exit(operation, UnsupportedOperatingSystem, 'Unsupported operating system: ' \
'{0} {1}'.format(vm_dist, vm_ver))
return 0
def exit_if_openssl_unavailable(operation):
"""
Check if the openssl commandline interface is available to use
If not, throw error to return UnsupportedOpenSSL error code
"""
exit_code, output = run_get_output('which openssl', True, False)
if exit_code != 0:
log_and_exit(operation, UnsupportedOpenSSL, 'OpenSSL is not available')
return 0
def exit_if_gpg_unavailable(operation):
"""
Check if gpg is available to use
If not, attempt to install
If install fails, throw error to return UnsupportedGpg error code
"""
# Check if VM is Debian (Debian 10 doesn't have gpg)
vm_supp, vm_dist, _ = is_vm_supported_for_extension()
if (vm_supp and (vm_dist.lower().startswith('debian'))):
# Check if GPG already on VM
check_exit_code, _ = run_get_output('which gpg', True, False)
if check_exit_code != 0:
# GPG not on VM, attempt to install
hutil_log_info('GPG not found, attempting to install')
exit_code, output = run_get_output(InstallExtraPackageCommandApt.format('gpg'))
if exit_code != 0:
log_and_exit(operation, UnsupportedGpg, 'GPG could not be installed: {0}'.format(output))
else:
hutil_log_info('GPG successfully installed')
else:
hutil_log_info('GPG already present on VM')
return 0
def check_workspace_id_and_key(workspace_id, workspace_key):
"""
Validate formats of workspace_id and workspace_key
"""
check_workspace_id(workspace_id)
# Validate that workspace_key is of the correct format (base64-encoded)
if workspace_key is None:
raise ParameterMissingException('Workspace key must be provided')
try:
encoded_key = base64.b64encode(base64.b64decode(workspace_key))
if sys.version_info >= (3,): # in python 3, base64.b64encode will return bytes, so decode to str for comparison
encoded_key = encoded_key.decode()
if encoded_key != workspace_key:
raise InvalidParameterError('Workspace key is invalid')
except TypeError:
raise InvalidParameterError('Workspace key is invalid')
def check_workspace_id(workspace_id):
"""
Validate that workspace_id matches the GUID regex
"""
if workspace_id is None:
raise ParameterMissingException('Workspace ID must be provided')
search = re.compile(GUIDOnlyRegex, re.M)
if not search.match(workspace_id):
raise InvalidParameterError('Workspace ID is invalid')
def detect_multiple_connections(workspace_id):
"""
If the VM already has a workspace/SCOM configured, then we should
disallow a new connection when stopOnMultipleConnections is used
Throw an exception in these cases:
- The workspace with the given workspace_id has not been onboarded
to the VM, but at least one other workspace has been
- The workspace with the given workspace_id has not been onboarded
to the VM, and the VM is connected to SCOM
If the extension operation is connecting to an already-configured
workspace, it is not a stopping case
"""
other_connection_exists = False
if os.path.exists(OMSAdminPath):
exit_code, utfoutput = run_get_output(WorkspaceCheckCommand,
chk_err = False)
# output may contain unicode characters not supported by ascii
# for e.g., generates the following error if used without conversion: UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 18: ordinal not in range(128)
# default encoding in python is ascii in python < 3
if sys.version_info < (3,):
output = utfoutput.decode('utf8').encode('utf8')
else:
output = utfoutput
if output.strip().lower() != 'no workspace':
for line in output.split('\n'):
if workspace_id in line:
hutil_log_info('The workspace to be enabled has already ' \
'been configured on the VM before; ' \
'continuing despite ' \
'stopOnMultipleConnections flag')
return
else:
# Note: if scom workspace dir is created, a line containing
# "Workspace(SCOM Workspace): scom" will be here
# If any other line is here, it may start sending data later
other_connection_exists = True
else:
for dir_name, sub_dirs, files in os.walk(EtcOMSAgentPath):
for sub_dir in sub_dirs:
sub_dir_name = os.path.basename(sub_dir)
workspace_search = re.compile(GUIDOnlyRegex, re.M)
if sub_dir_name == workspace_id:
hutil_log_info('The workspace to be enabled has already ' \
'been configured on the VM before; ' \
'continuing despite ' \
'stopOnMultipleConnections flag')
return
elif (workspace_search.match(sub_dir_name)
or sub_dir_name == 'scom'):
other_connection_exists = True
if other_connection_exists:
err_msg = ('This machine is already connected to some other Log ' \
'Analytics workspace, please set ' \
'stopOnMultipleConnections to false in public ' \
'settings or remove this property, so this machine ' \
'can connect to new workspaces, also it means this ' \
'machine will get billed multiple times for each ' \
'workspace it report to. ' \
'(LINUXOMSAGENTEXTENSION_ERROR_MULTIPLECONNECTIONS)')
# This exception will get caught by the main method
raise UnwantedMultipleConnectionsException(err_msg)
else:
detect_scom_connection()
def detect_scom_connection():
"""
If these two conditions are met, then we can assume the
VM is monitored
by SCOM:
1. SCOMPort is open and omiserver is listening on it
2. scx certificate is signed by SCOM server
To determine it check for existence of below two
conditions:
1. SCOMPort is open and omiserver is listening on it:
/etc/omi/conf/omiserver.conf can be parsed to
determine it.
2. scx certificate is signed by SCOM server: scom cert
is present @ /etc/opt/omi/ssl/omi-host-<hostname>.pem
(/etc/opt/microsoft/scx/ssl/scx.pem is a softlink to
this). If the VM is monitored by SCOM then issuer
field of the certificate will have a value like
CN=SCX-Certificate/title=<GUID>, DC=<SCOM server hostname>
(e.g CN=SCX-Certificate/title=SCX94a1f46d-2ced-4739-9b6a-1f06156ca4ac,
DC=NEB-OM-1502733)
Otherwise, if a scom configuration directory has been
created, we assume SCOM is in use
"""
scom_port_open = None # return when determine this is false
cert_signed_by_scom = False
if os.path.exists(OMSAdminPath):
scom_port_open = detect_scom_using_omsadmin()
if scom_port_open is False:
return
# If omsadmin.sh option is not available, use omiconfigeditor
if (scom_port_open is None and os.path.exists(OMIConfigEditorPath)
and os.path.exists(OMIServerConfPath)):
scom_port_open = detect_scom_using_omiconfigeditor()
if scom_port_open is False:
return
# If omiconfigeditor option is not available, directly parse omiserver.conf
if scom_port_open is None and os.path.exists(OMIServerConfPath):
scom_port_open = detect_scom_using_omiserver_conf()
if scom_port_open is False:
return
if scom_port_open is None:
hutil_log_info('SCOM port could not be determined to be open')
return
# Parse the certificate to determine if SCOM issued it
if os.path.exists(SCOMCertPath):
exit_if_openssl_unavailable('Install')
cert_cmd = 'openssl x509 -in {0} -noout -text'.format(SCOMCertPath)
cert_exit_code, cert_output = run_get_output(cert_cmd, chk_err = False,
log_cmd = False)
if cert_exit_code == 0:
issuer_re = re.compile(SCOMCertIssuerRegex, re.M)
if issuer_re.search(cert_output):
hutil_log_info('SCOM cert exists and is signed by SCOM server')
cert_signed_by_scom = True
else:
hutil_log_info('SCOM cert exists but is not signed by SCOM ' \
'server')
else:
hutil_log_error('Error reading SCOM cert; cert could not be ' \
'determined to be signed by SCOM server')
else:
hutil_log_info('SCOM cert does not exist')
if scom_port_open and cert_signed_by_scom:
err_msg = ('This machine may already be connected to a System ' \
'Center Operations Manager server. Please set ' \
'stopOnMultipleConnections to false in public settings ' \
'or remove this property to allow connection to the Log ' \
'Analytics workspace. ' \
'(LINUXOMSAGENTEXTENSION_ERROR_MULTIPLECONNECTIONS)')
raise UnwantedMultipleConnectionsException(err_msg)
def detect_scom_using_omsadmin():
"""
This method assumes that OMSAdminPath exists; if packages have not
been installed yet, this may not exist
Returns True if omsadmin.sh indicates that SCOM port is open
"""
omsadmin_cmd = '{0} -o'.format(OMSAdminPath)
exit_code, output = run_get_output(omsadmin_cmd, False, False)
# Guard against older omsadmin.sh versions
if ('illegal option' not in output.lower()
and 'unknown option' not in output.lower()):
if exit_code == 0:
hutil_log_info('According to {0}, SCOM port is ' \
'open'.format(omsadmin_cmd))
return True
elif exit_code == 1:
hutil_log_info('According to {0}, SCOM port is not ' \
'open'.format(omsadmin_cmd))
return False
def detect_scom_using_omiconfigeditor():
"""
This method assumes that the relevant files exist
Returns True if omiconfigeditor indicates that SCOM port is open
"""
omi_cmd = '{0} httpsport -q {1} < {2}'.format(OMIConfigEditorPath,
SCOMPort, OMIServerConfPath)
exit_code, output = run_get_output(omi_cmd, False, False)
# Guard against older omiconfigeditor versions
if ('illegal option' not in output.lower()
and 'unknown option' not in output.lower()):
if exit_code == 0:
hutil_log_info('According to {0}, SCOM port is ' \
'open'.format(omi_cmd))
return True
elif exit_code == 1:
hutil_log_info('According to {0}, SCOM port is not ' \
'open'.format(omi_cmd))
return False
def detect_scom_using_omiserver_conf():
"""
This method assumes that the relevant files exist
Returns True if omiserver.conf indicates that SCOM port is open
"""
with open(OMIServerConfPath, 'r') as omiserver_file:
omiserver_txt = omiserver_file.read()
httpsport_search = r'^[\s]*httpsport[\s]*=(.*)$'
httpsport_re = re.compile(httpsport_search, re.M)
httpsport_matches = httpsport_re.search(omiserver_txt)
if (httpsport_matches is not None and
httpsport_matches.group(1) is not None):
ports = httpsport_matches.group(1)
ports = ports.replace(',', ' ')
ports_list = ports.split(' ')
if str(SCOMPort) in ports_list:
hutil_log_info('SCOM port is listed in ' \
'{0}'.format(OMIServerConfPath))
return True
else:
hutil_log_info('SCOM port is not listed in ' \
'{0}'.format(OMIServerConfPath))
else:
hutil_log_info('SCOM port is not listed in ' \
'{0}'.format(OMIServerConfPath))
return False
def run_command_and_log(cmd, check_error = True, log_cmd = True):
"""
Run the provided shell command and log its output, including stdout and
stderr.
The output should not contain any PII, but the command might. In this case,
log_cmd should be set to False.
"""
exit_code, output = run_get_output(cmd, check_error, log_cmd)
if log_cmd:
hutil_log_info('Output of command "{0}": \n{1}'.format(cmd.rstrip(), output))
else:
hutil_log_info('Output: \n{0}'.format(output))
# For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log
if exit_code == 17:
if "Failed dependencies:" in output:
# 52 is the exit code for missing dependency
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 52
output = "Installation failed due to missing dependencies. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
elif "waiting for transaction lock" in output or "dpkg: error processing package systemd" in output or "dpkg-deb" in output or "dpkg:" in output:
# 52 is the exit code for missing dependency
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 52
output = "There seems to be an issue in your package manager dpkg or rpm being in lock state. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
elif "Errors were encountered while processing:" in output:
# 52 is the exit code for missing dependency
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 52
output = "There seems to be an issue while processing triggers in systemd. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
elif "Cannot allocate memory" in output:
# 52 is the exit code for missing dependency
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 52
output = "There seems to be insufficient memory for the installation. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
elif exit_code == 19:
if "rpmdb" in output or "cannot open Packages database" in output or "dpkg (subprocess): cannot set security execution context for maintainer script" in output or "is locked by another process" in output:
# OMI (19) happens to be the first package we install and if we get rpmdb failures, its a system issue
# 52 is the exit code for missing dependency i.e. rpmdb, libc6 or libpam-runtime
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 52
output = "There seems to be an issue in your package manager dpkg or rpm being in lock state. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
elif "libc6 is not installed" in output or "libpam-runtime is not installed" in output or "exited with status 52" in output or "/bin/sh is needed" in output:
# OMI (19) happens to be the first package we install and if we get rpmdb failures, its a system issue
# 52 is the exit code for missing dependency i.e. rpmdb, libc6 or libpam-runtime
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 52
output = "Installation failed due to missing dependencies. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
elif exit_code == 33:
if "Permission denied" in output:
# Enable failures
# 52 is the exit code for missing dependency.
# DSC metaconfig generation failure due to permissions.
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 52
output = "Installation failed due to insufficient permissions. Please ensure omsagent user is part of the sudoer file and has sufficient permissions, and omsconfig MetaConfig.mof can be generated. For details, check logs in /var/opt/microsoft/omsconfig/omsconfig.log and /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
elif exit_code == 18:
# Install failures
# DSC install failure
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
output = "Installation failed due to omsconfig package not being able to install. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
elif exit_code == 5:
if "Reason: InvalidWorkspaceKey" in output or "Reason: MissingHeader" in output:
# Enable failures
# 53 is the exit code for configuration errors
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 53
output = "Installation failed due to incorrect workspace key. Please check if the workspace key is correct. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
elif exit_code == 8:
if "Check the correctness of the workspace ID and shared key" in output or "internet connectivity" in output:
# Enable failures
# 53 is the exit code for configuration errors
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 53
output = "Installation failed due to curl error while onboarding. Please check the internet connectivity or the workspace key. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
if exit_code != 0 and exit_code != 52:
if "dpkg:" in output or "dpkg :" in output or "rpmdb:" in output or "rpm.lock" in output or "locked by another process" in output:
# If we get rpmdb failures, its a system issue.
# 52 is the exit code for missing dependency i.e. rpmdb, libc6 or libpam-runtime
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 52
output = "There seems to be an issue in your package manager dpkg or rpm being in lock state when installing omsagent bundle for one of the dependencies. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
if "conflicts with file from package" in output or "Failed dependencies:" in output or "Please install curl" in output or "is needed by" in output or "check_version_installable" in output or "Error: curl was not installed" in output or "Please install the ctypes package" in output or "gpg is not installed" in output:
# If we get rpmdb failures, its a system issue
# 52 is the exit code for missing dependency i.e. rpmdb, libc6 or libpam-runtime
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 52
output = "Installation failed due to missing dependencies. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
if "Permission denied" in output:
# Install/Enable failures
# 52 is the exit code for missing dependency.
# https://github.com/Azure/azure-marketplace/wiki/Extension-Build-Notes-Best-Practices#error-codes-and-messages-output-to-stderr
exit_code = 52
output = "Installation failed due to insufficient permissions. Please ensure omsagent user is part of the sudoer file and has sufficient permissions to install and onboard. For details, check logs in /var/log/azure/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux/extension.log"
return exit_code, output
def run_command_with_retries(cmd, retries, retry_check, final_check = None,
check_error = True, log_cmd = True,
initial_sleep_time = InitialRetrySleepSeconds,
sleep_increase_factor = 1):
"""
Caller provides a method, retry_check, to use to determine if a retry
should be performed. This must be a function with two parameters:
exit_code and output
The final_check can be provided as a method to perform a final check after
retries have been exhausted
Logic used: will retry up to retries times with initial_sleep_time in
between tries
If the retry_check returns True for retry_verbosely, we will try cmd with
the standard -v verbose flag added
"""
try_count = 0
sleep_time = initial_sleep_time
run_cmd = cmd
run_verbosely = False
while try_count <= retries:
if run_verbosely:
run_cmd = cmd + ' -v'
exit_code, output = run_command_and_log(run_cmd, check_error, log_cmd)
should_retry, retry_message, run_verbosely = retry_check(exit_code,
output)
if not should_retry:
break
try_count += 1
hutil_log_info(retry_message)
time.sleep(sleep_time)
sleep_time *= sleep_increase_factor
if final_check is not None:
exit_code = final_check(exit_code, output)
return exit_code
def run_command_with_retries_output(cmd, retries, retry_check, final_check = None,
check_error = True, log_cmd = True,
initial_sleep_time = InitialRetrySleepSeconds,
sleep_increase_factor = 1):
"""
Caller provides a method, retry_check, to use to determine if a retry
should be performed. This must be a function with two parameters:
exit_code and output
The final_check can be provided as a method to perform a final check after
retries have been exhausted
Logic used: will retry up to retries times with initial_sleep_time in
between tries
If the retry_check retuns True for retry_verbosely, we will try cmd with
the standard -v verbose flag added
"""
try_count = 0
sleep_time = initial_sleep_time
run_cmd = cmd
run_verbosely = False
while try_count <= retries:
if run_verbosely:
run_cmd = cmd + ' -v'
exit_code, output = run_command_and_log(run_cmd, check_error, log_cmd)
should_retry, retry_message, run_verbosely = retry_check(exit_code,
output)
if not should_retry:
break
try_count += 1
hutil_log_info(retry_message)
time.sleep(sleep_time)
sleep_time *= sleep_increase_factor
if final_check is not None:
exit_code = final_check(exit_code, output)
return exit_code, output
def is_dpkg_locked(exit_code, output):
"""
If dpkg is locked, the output will contain a message similar to 'dpkg
status database is locked by another process'
"""
if exit_code != 0:
dpkg_locked_search = r'^.*dpkg.+lock.*$'
dpkg_locked_re = re.compile(dpkg_locked_search, re.M)
if dpkg_locked_re.search(output):
return True
return False
def was_curl_found(exit_code, output):
"""
Returns false if exit_code indicates that curl was not installed; this can
occur when package lists need to be updated, or when some archives are
out-of-date
"""
if exit_code is InstallErrorCurlNotInstalled:
return False
return True
def retry_skip(exit_code, output):
"""
skip retires
"""
return False, '', False
def retry_if_dpkg_locked_or_curl_is_not_found(exit_code, output):
"""
Some commands fail because the package manager is locked (apt-get/dpkg
only); this will allow retries on failing commands.
Sometimes curl's dependencies (i.e. libcurl) are not installed; if this
is the case on a VM with apt-get, 'apt-get -f install' should be run
Sometimes curl is not installed and is also not found in the package list;
if this is the case on a VM with apt-get, update the package list
"""
retry_verbosely = False
dpkg_locked = is_dpkg_locked(exit_code, output)
curl_found = was_curl_found(exit_code, output)
apt_get_exit_code, apt_get_output = run_get_output('which apt-get',
chk_err = False,
log_cmd = False)
if dpkg_locked:
return True, 'Retrying command because package manager is locked.', \
retry_verbosely
elif (not curl_found and apt_get_exit_code == 0 and
('apt-get -f install' in output
or 'Unmet dependencies' in output.lower())):
hutil_log_info('Installing all dependencies of curl:')
run_command_and_log('apt-get -f install')
return True, 'Retrying command because curl and its dependencies ' \
'needed to be installed', retry_verbosely
elif not curl_found and apt_get_exit_code == 0:
hutil_log_info('Updating package lists to make curl available')
run_command_and_log('apt-get update')
return True, 'Retrying command because package lists needed to be ' \
'updated', retry_verbosely
else:
return False, '', False
def final_check_if_dpkg_locked(exit_code, output):
"""
If dpkg is still locked after the retries, we want to return a specific
error code
"""
dpkg_locked = is_dpkg_locked(exit_code, output)
if dpkg_locked:
exit_code = DPKGLockedErrorCode
return exit_code
def retry_onboarding(exit_code, output):
"""
Retry under any of these conditions:
- If the onboarding request returns 403: this may indicate that the agent
GUID and certificate should be re-generated
- If the onboarding request returns a different non-200 code: the OMS
service may be temporarily unavailable
- If the onboarding curl command returns an unaccounted-for error code,
we should retry with verbose logging
"""
retry_verbosely = False
if exit_code is EnableErrorOMSReturned403:
return True, 'Retrying the onboarding command to attempt generating ' \
'a new agent ID and certificate.', retry_verbosely
elif exit_code is EnableErrorOMSReturnedNon200:
return True, 'Retrying; the OMS service may be temporarily ' \
'unavailable.', retry_verbosely
elif exit_code is EnableErrorOnboarding:
return True, 'Retrying with verbose logging.', True
return False, '', False
def raise_if_no_internet(exit_code, output):
"""
Raise the CannotConnectToOMSException exception if the onboarding
script returns the error code to indicate that the OMS service can't be
resolved
"""
if exit_code is EnableErrorResolvingHost:
raise CannotConnectToOMSException
return exit_code
def get_settings():
"""
Retrieve the configuration for this extension operation
"""
global SettingsDict
public_settings = None
protected_settings = None
if HUtilObject is not None:
public_settings = HUtilObject.get_public_settings()
protected_settings = HUtilObject.get_protected_settings()
elif SettingsDict is not None:
public_settings = SettingsDict['public_settings']
protected_settings = SettingsDict['protected_settings']
else:
SettingsDict = {}
handler_env = get_handler_env()
try:
config_dir = str(handler_env['handlerEnvironment']['configFolder'])
except:
config_dir = os.path.join(os.getcwd(), 'config')
seq_no = get_latest_seq_no()
settings_path = os.path.join(config_dir, '{0}.settings'.format(seq_no))
try:
with open(settings_path, 'r') as settings_file:
settings_txt = settings_file.read()
settings = json.loads(settings_txt)
h_settings = settings['runtimeSettings'][0]['handlerSettings']
public_settings = h_settings['publicSettings']
SettingsDict['public_settings'] = public_settings
except:
hutil_log_error('Unable to load handler settings from ' \
'{0}'.format(settings_path))
if ('protectedSettings' in h_settings
and 'protectedSettingsCertThumbprint' in h_settings
and h_settings['protectedSettings'] is not None
and h_settings['protectedSettingsCertThumbprint'] is not None):
encoded_settings = h_settings['protectedSettings']
settings_thumbprint = h_settings['protectedSettingsCertThumbprint']
encoded_cert_path = os.path.join('/var/lib/waagent',
'{0}.crt'.format(
settings_thumbprint))
encoded_key_path = os.path.join('/var/lib/waagent',
'{0}.prv'.format(
settings_thumbprint))
decoded_settings = base64.standard_b64decode(encoded_settings)
decrypt_cmd = 'openssl smime -inform DER -decrypt -recip {0} ' \
'-inkey {1}'.format(encoded_cert_path,
encoded_key_path)
try:
session = subprocess.Popen([decrypt_cmd], shell = True,
stdin = subprocess.PIPE,
stderr = subprocess.STDOUT,
stdout = subprocess.PIPE)
output = session.communicate(decoded_settings)
except OSError:
pass
protected_settings_str = output[0]
if protected_settings_str is None:
log_and_exit('Enable', 1, 'Failed decrypting ' \
'protectedSettings')
protected_settings = ''
try:
protected_settings = json.loads(protected_settings_str)
except:
hutil_log_error('JSON exception decoding protected settings')
SettingsDict['protected_settings'] = protected_settings
return public_settings, protected_settings
def update_status_file(operation, exit_code, exit_status, message):
"""
Mimic HandlerUtil method do_status_report in case hutil method is not
available
Write status to status file
"""
handler_env = get_handler_env()
try:
extension_version = str(handler_env['version'])
status_dir = str(handler_env['handlerEnvironment']['statusFolder'])
except:
extension_version = "1.0"
status_dir = os.path.join(os.getcwd(), 'status')
status_txt = [{
"version" : extension_version,
"timestampUTC" : time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"status" : {
"name" : "Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux",
"operation" : operation,
"status" : exit_status,
"code" : exit_code,
"formattedMessage" : {
"lang" : "en-US",
"message" : message
}
}
}]
status_json = json.dumps(status_txt)
# Find the most recently changed config file and then use the
# corresponding status file
latest_seq_no = get_latest_seq_no()
status_path = os.path.join(status_dir, '{0}.status'.format(latest_seq_no))
status_tmp = '{0}.tmp'.format(status_path)
with open(status_tmp, 'w+') as tmp_file:
tmp_file.write(status_json)
os.rename(status_tmp, status_path)
def get_handler_env():
"""
Set and retrieve the contents of HandlerEnvironment.json as JSON
"""
global HandlerEnvironment
if HandlerEnvironment is None:
handler_env_path = os.path.join(os.getcwd(), 'HandlerEnvironment.json')
try:
with open(handler_env_path, 'r') as handler_env_file:
handler_env_txt = handler_env_file.read()
handler_env = json.loads(handler_env_txt)
if type(handler_env) == list:
handler_env = handler_env[0]
HandlerEnvironment = handler_env
except Exception as e:
waagent_log_error(str(e))
return HandlerEnvironment
def get_latest_seq_no():
"""
Determine the latest operation settings number to use
"""
global SettingsSequenceNumber
if SettingsSequenceNumber is None:
handler_env = get_handler_env()
try:
config_dir = str(handler_env['handlerEnvironment']['configFolder'])
except:
config_dir = os.path.join(os.getcwd(), 'config')
latest_seq_no = -1
cur_seq_no = -1
latest_time = None
try:
for dir_name, sub_dirs, file_names in os.walk(config_dir):
for file_name in file_names:
file_basename = os.path.basename(file_name)
match = re.match(r'[0-9]{1,10}\.settings', file_basename)
if match is None:
continue
cur_seq_no = int(file_basename.split('.')[0])
file_path = os.path.join(config_dir, file_name)
cur_time = os.path.getmtime(file_path)
if latest_time is None or cur_time > latest_time:
latest_time = cur_time
latest_seq_no = cur_seq_no
except:
pass
if latest_seq_no < 0:
latest_seq_no = 0
SettingsSequenceNumber = latest_seq_no
return SettingsSequenceNumber
def run_get_output(cmd, chk_err = False, log_cmd = True):
"""
Mimic waagent mothod RunGetOutput in case waagent is not available
Run shell command and return exit code and output
"""
if 'Utils.WAAgentUtil' in sys.modules:
# WALinuxAgent-2.0.14 allows only 2 parameters for RunGetOutput
# If checking the number of parameters fails, pass 2
try:
sig = inspect.signature(waagent.RunGetOutput)
params = sig.parameters
waagent_params = len(params)
except:
try:
spec = inspect.getargspec(waagent.RunGetOutput)
params = spec.args
waagent_params = len(params)
except:
waagent_params = 2
if waagent_params >= 3:
exit_code, output = waagent.RunGetOutput(cmd, chk_err, log_cmd)
else:
exit_code, output = waagent.RunGetOutput(cmd, chk_err)
else:
try:
output = subprocess.check_output(cmd, stderr = subprocess.STDOUT,
shell = True)
output = output.decode('latin-1')
exit_code = 0
except subprocess.CalledProcessError as e:
exit_code = e.returncode
output = e.output.decode('latin-1')
output = output.encode('utf-8', 'ignore')
# On python 3, encode returns a byte object, so we must decode back to a string
if sys.version_info >= (3,):
output = output.decode()
return exit_code, output.strip()
def get_tenant_id_from_metadata_api(vm_resource_id):
"""
Retrieve the Tenant ID using the Metadata API of the VM resource ID
Since we have not authenticated, the Metadata API will throw a 401, but
the headers of the 401 response will contain the tenant ID
"""
tenant_id = None
metadata_endpoint = get_metadata_api_endpoint(vm_resource_id)
metadata_request = urllib.Request(metadata_endpoint)
try:
# This request should fail with code 401
metadata_response = urllib.urlopen(metadata_request)
hutil_log_info('Request to Metadata API did not fail as expected; ' \
'attempting to use headers from response to ' \
'determine Tenant ID')
metadata_headers = metadata_response.headers
except urlerror.HTTPError as e:
metadata_headers = e.headers
if metadata_headers is not None and 'WWW-Authenticate' in metadata_headers:
auth_header = metadata_headers['WWW-Authenticate']
auth_header_regex = r'authorization_uri=\"https:\/\/login\.windows\.net/(' + GUIDRegex + ')\"'
auth_header_search = re.compile(auth_header_regex)
auth_header_matches = auth_header_search.search(auth_header)
if not auth_header_matches:
raise MetadataAPIException('The WWW-Authenticate header in the ' \
'response does not contain expected ' \
'authorization_uri format')
else:
tenant_id = auth_header_matches.group(1)
else:
raise MetadataAPIException('Expected information from Metadata API ' \
'is not present')
return tenant_id
def get_metadata_api_endpoint(vm_resource_id):
"""
Extrapolate Metadata API endpoint from VM Resource ID
Example VM resource ID: /subscriptions/306ee7f1-3d0a-4605-9f39-ff253cc02708/resourceGroups/LinuxExtVMResourceGroup/providers/Microsoft.Compute/virtualMachines/lagalbraOCUb16C
Corresponding example endpoint: https://management.azure.com/subscriptions/306ee7f1-3d0a-4605-9f39-ff253cc02708/resourceGroups/LinuxExtVMResourceGroup?api-version=2016-09-01
"""
# Will match for ARM and Classic VMs, Availability Sets, VM Scale Sets
vm_resource_id_regex = r'^\/subscriptions\/(' + GUIDRegex + ')\/' \
'resourceGroups\/([^\/]+)\/providers\/Microsoft' \
'\.(?:Classic){0,1}Compute\/(?:virtualMachines|' \
'availabilitySets|virtualMachineScaleSets)' \
'\/[^\/]+$'
vm_resource_id_search = re.compile(vm_resource_id_regex, re.M)
vm_resource_id_matches = vm_resource_id_search.search(vm_resource_id)
if not vm_resource_id_matches:
raise InvalidParameterError('VM Resource ID is invalid')
else:
subscription_id = vm_resource_id_matches.group(1)
resource_group = vm_resource_id_matches.group(2)
metadata_url = 'https://management.azure.com/subscriptions/{0}' \
'/resourceGroups/{1}'.format(subscription_id,
resource_group)
metadata_data = urlparse.urlencode({'api-version' : '2016-09-01'})
metadata_endpoint = '{0}?{1}'.format(metadata_url, metadata_data)
return metadata_endpoint
def get_access_token(tenant_id, resource):
"""
Retrieve an OAuth token by sending an OAuth2 token exchange
request to the local URL that the ManagedIdentity extension is
listening to
"""
# Extract the endpoint that the ManagedIdentity extension is listening on
with open(ManagedIdentityExtListeningURLPath, 'r') as listening_file:
listening_settings_txt = listening_file.read()
try:
listening_settings = json.loads(listening_settings_txt)
listening_url = listening_settings['url']
except:
raise ManagedIdentityExtException('Could not extract listening URL ' \
'from settings file')
# Send an OAuth token exchange request
oauth_data = {'authority' : 'https://login.microsoftonline.com/' \
'{0}'.format(tenant_id),
'resource' : resource
}
oauth_request = urllib.Request(listening_url + '/oauth2/token',
urlparse.urlencode(oauth_data))
oauth_request.add_header('Metadata', 'true')
try:
oauth_response = urllib.urlopen(oauth_request)
oauth_response_txt = oauth_response.read()
except urlerror.HTTPError as e:
hutil_log_error('Request to ManagedIdentity extension listening URL ' \
'failed with an HTTPError: {0}'.format(e))
hutil_log_info('Response from ManagedIdentity extension: ' \
'{0}'.format(e.read()))
raise ManagedIdentityExtException('Request to listening URL failed ' \
'with HTTPError {0}'.format(e))
except:
raise ManagedIdentityExtException('Unexpected error from request to ' \
'listening URL')
try:
oauth_response_json = json.loads(oauth_response_txt)
except:
raise ManagedIdentityExtException('Error parsing JSON from ' \
'listening URL response')
if (oauth_response_json is not None
and 'access_token' in oauth_response_json):
return oauth_response_json['access_token']
else:
raise ManagedIdentityExtException('Could not retrieve access token ' \
'in the listening URL response')
def get_workspace_info_from_oms(vm_resource_id, tenant_id, access_token):
"""
Send a request to the OMS service with the VM information to
determine the workspace the OMSAgent should onboard to
"""
oms_data = {'ResourceId' : vm_resource_id,
'TenantId' : tenant_id,
'JwtToken' : access_token
}
oms_request_json = json.dumps(oms_data)
oms_request = urllib.Request(OMSServiceValidationEndpoint)
oms_request.add_header('Content-Type', 'application/json')
retries = 5
initial_sleep_time = AutoManagedWorkspaceCreationSleepSeconds
sleep_increase_factor = 1
try_count = 0
sleep_time = initial_sleep_time
# Workspace may not be provisioned yet; sleep and retry if
# provisioning has been accepted
while try_count <= retries:
try:
oms_response = urllib.urlopen(oms_request, oms_request_json)
oms_response_txt = oms_response.read()
except urlerror.HTTPError as e:
hutil_log_error('Request to OMS threw HTTPError: {0}'.format(e))
hutil_log_info('Response from OMS: {0}'.format(e.read()))
raise OMSServiceOneClickException('ValidateMachineIdentity ' \
'request returned an error ' \
'HTTP code: {0}'.format(e))
except:
raise OMSServiceOneClickException('Unexpected error from ' \
'ValidateMachineIdentity ' \
'request')
should_retry = retry_get_workspace_info_from_oms(oms_response)
if not should_retry:
# TESTED
break
elif try_count == retries:
# TESTED
hutil_log_error('Retries for ValidateMachineIdentity request ran ' \
'out: required workspace information cannot be ' \
'extracted')
raise OneClickException('Workspace provisioning did not complete ' \
'within the allotted time')
# TESTED
try_count += 1
time.sleep(sleep_time)
sleep_time *= sleep_increase_factor
if not oms_response_txt:
raise OMSServiceOneClickException('Body from ValidateMachineIdentity ' \
'response is empty; required ' \
'workspace information cannot be ' \
'extracted')
try:
oms_response_json = json.loads(oms_response_txt)
except:
raise OMSServiceOneClickException('Error parsing JSON from ' \
'ValidateMachineIdentity response')
if (oms_response_json is not None and 'WorkspaceId' in oms_response_json
and 'WorkspaceKey' in oms_response_json):
return oms_response_json
else:
hutil_log_error('Could not retrieve both workspace ID and key from ' \
'the OMS service response {0}; cannot determine ' \
'workspace ID and key'.format(oms_response_json))
raise OMSServiceOneClickException('Required workspace information ' \
'was not found in the ' \
'ValidateMachineIdentity response')
def retry_get_workspace_info_from_oms(oms_response):
"""
Return True to retry if the response from OMS for the
ValidateMachineIdentity request incidates that the request has
been accepted, but the managed workspace is still being
provisioned
"""
try:
oms_response_http_code = oms_response.getcode()
except:
hutil_log_error('Unable to get HTTP code from OMS repsonse')
return False
if (oms_response_http_code == 202 or oms_response_http_code == 204
or oms_response_http_code == 404):
hutil_log_info('Retrying ValidateMachineIdentity OMS request ' \
'because workspace is still being provisioned; HTTP ' \
'code from OMS is {0}'.format(oms_response_http_code))
return True
else:
hutil_log_info('Workspace is provisioned; HTTP code from OMS is ' \
'{0}'.format(oms_response_http_code))
return False
def init_waagent_logger():
"""
Initialize waagent logger
If waagent has not been imported, catch the exception
"""
try:
waagent.LoggerInit('/var/log/waagent.log', '/dev/stdout', True)
except Exception as e:
print('Unable to initialize waagent log because of exception ' \
'{0}'.format(e))
def waagent_log_info(message):
"""
Log informational message, being cautious of possibility that waagent may
not be imported
"""
if 'Utils.WAAgentUtil' in sys.modules:
waagent.Log(message)
else:
print('Info: {0}'.format(message))
def waagent_log_error(message):
"""
Log error message, being cautious of possibility that waagent may not be
imported
"""
if 'Utils.WAAgentUtil' in sys.modules:
waagent.Error(message)
else:
print('Error: {0}'.format(message))
def hutil_log_info(message):
"""
Log informational message, being cautious of possibility that hutil may
not be imported and configured
"""
if HUtilObject is not None:
HUtilObject.log(message)
else:
print('Info: {0}'.format(message))
def hutil_log_error(message):
"""
Log error message, being cautious of possibility that hutil may not be
imported and configured
"""
if HUtilObject is not None:
HUtilObject.error(message)
else:
print('Error: {0}'.format(message))
def log_and_exit(operation, exit_code = 1, message = ''):
"""
Log the exit message and perform the exit
"""
if exit_code == 0:
waagent_log_info(message)
hutil_log_info(message)
exit_status = 'success'
else:
waagent_log_error(message)
hutil_log_error(message)
exit_status = 'failed'
if HUtilObject is not None:
HUtilObject.do_exit(exit_code, operation, exit_status, str(exit_code),
message)
else:
update_status_file(operation, str(exit_code), exit_status, message)
sys.exit(exit_code)
# Exceptions
# If these exceptions are expected to be caught by the main method, they
# include an error_code field with an integer with which to exit from main
class OmsAgentForLinuxException(Exception):
"""
Base exception class for all exceptions; as such, its error code is the
basic error code traditionally returned in Linux: 1
"""
error_code = 1
def get_error_message(self, operation):
"""
Return a descriptive error message based on this type of exception
"""
return '{0} failed with exit code {1}'.format(operation,
self.error_code)
class ParameterMissingException(OmsAgentForLinuxException):
"""
There is a missing parameter for the OmsAgentForLinux Extension
"""
error_code = MissingorInvalidParameterErrorCode
def get_error_message(self, operation):
return '{0} failed due to a missing parameter: {1}'.format(operation,
self)
class InvalidParameterError(OmsAgentForLinuxException):
"""
There is an invalid parameter for the OmsAgentForLinux Extension
ex. Workspace ID does not match GUID regex
"""
error_code = MissingorInvalidParameterErrorCode
def get_error_message(self, operation):
return '{0} failed due to an invalid parameter: {1}'.format(operation,
self)
class UnwantedMultipleConnectionsException(OmsAgentForLinuxException):
"""
This VM is already connected to a different Log Analytics workspace
and stopOnMultipleConnections is set to true
"""
error_code = UnwantedMultipleConnectionsErrorCode
def get_error_message(self, operation):
return '{0} failed due to multiple connections: {1}'.format(operation,
self)
class CannotConnectToOMSException(OmsAgentForLinuxException):
"""
The OMSAgent cannot connect to the OMS service
"""
error_code = CannotConnectToOMSErrorCode # error code to indicate no internet access
def get_error_message(self, operation):
return 'The agent could not connect to the Microsoft Operations ' \
'Management Suite service. Please check that the system ' \
'either has Internet access, or that a valid HTTP proxy has ' \
'been configured for the agent. Please also check the ' \
'correctness of the workspace ID.'
class OneClickException(OmsAgentForLinuxException):
"""
A generic exception for OneClick-related issues
"""
error_code = OneClickErrorCode
def get_error_message(self, operation):
return 'Encountered an issue related to the OneClick scenario: ' \
'{0}'.format(self)
class ManagedIdentityExtMissingException(OneClickException):
"""
This extension being present is required for the OneClick scenario
"""
error_code = ManagedIdentityExtMissingErrorCode
def get_error_message(self, operation):
return 'The ManagedIdentity extension is required to be installed ' \
'for Automatic Management to be enabled. Please set ' \
'EnableAutomaticManagement to false in public settings or ' \
'install the ManagedIdentityExtensionForLinux Azure VM ' \
'extension.'
class ManagedIdentityExtException(OneClickException):
"""
Thrown when we encounter an issue with ManagedIdentityExtensionForLinux
"""
error_code = ManagedIdentityExtErrorCode
def get_error_message(self, operation):
return 'Encountered an issue with the ManagedIdentity extension: ' \
'{0}'.format(self)
class MetadataAPIException(OneClickException):
"""
Thrown when we encounter an issue with Metadata API
"""
error_code = MetadataAPIErrorCode
def get_error_message(self, operation):
return 'Encountered an issue with the Metadata API: {0}'.format(self)
class OMSServiceOneClickException(OneClickException):
"""
Thrown when prerequisites were satisfied but could not retrieve the managed
workspace information from OMS service
"""
error_code = OMSServiceOneClickErrorCode
def get_error_message(self, operation):
return 'Encountered an issue with the OMS service: ' \
'{0}'.format(self)
if __name__ == '__main__' :
main()