AzureMonitorAgent/ama_tst/modules/main.py (179 lines of code) (raw):
import os
import sys
from helpers import get_input
from logcollector import run_logcollector
from error_codes import *
from errors import get_input, is_error, err_summary
from install.install import check_installation
from connect.connect import check_connection
from general_health.general_health import check_general_health
from high_cpu_mem.high_cpu_mem import check_high_cpu_memory
from syslog_tst.syslog import check_syslog
from custom_logs.custom_logs import check_custom_logs
# check to make sure the user is running as root
def check_sudo():
if (os.geteuid() != 0):
print("The troubleshooter is not currently being run as root. In order to have accurate results, we ask that you run this troubleshooter as root.")
print("NOTE: it will not add, modify, or delete any files without express permission.")
print("Please try running the troubleshooter again with 'sudo'. Thank you!")
return False
else:
return True
def check_all(interactive):
all_success = NO_ERROR
# 1: Install
checked_install = check_installation(interactive)
if (is_error(checked_install)):
return checked_install
else:
all_success = checked_install
print("================================================================================")
# 2: Connection
checked_connection = check_connection(interactive)
if (is_error(checked_connection)):
return checked_connection
else:
all_success = checked_connection
print("================================================================================")
# 3: General Health
checked_general_health = check_general_health(interactive)
if (is_error(checked_general_health)):
return checked_general_health
else:
all_success = checked_general_health
print("================================================================================")
# 4: High CPU/Memory Usage
checked_highcpumem = check_high_cpu_memory(interactive)
if (is_error(checked_highcpumem)):
return checked_highcpumem
else:
all_success = checked_highcpumem
print("================================================================================")
# 5: Syslog
checked_syslog = check_syslog(interactive)
if (is_error(checked_syslog)):
return checked_syslog
else:
all_success = checked_syslog
print("================================================================================")
# 6: Custom logs
checked_custom_logs = check_custom_logs(interactive)
if (is_error(checked_custom_logs)):
return checked_custom_logs
else:
all_success = checked_custom_logs
return all_success
def collect_logs():
# get output directory for logs
print("Please input an existing, absolute filepath to a directory where the output for the zip file will be placed upon completion.")
output_location = get_input("Output Directory", (lambda x : os.path.isdir(x)), \
"Please input an existing, absolute filepath.")
print("Collecting AMA logs...")
print("================================================================================")
run_logcollector(output_location)
def print_results(success):
print("================================================================================")
print("================================================================================")
# print out all errors/warnings
if (len(err_summary) > 0):
print("ALL ERRORS/WARNINGS ENCOUNTERED:")
for err in err_summary:
print(" {0}".format(err))
print("--------------------------------------------------------------------------------")
# no errors found
if (success == NO_ERROR):
print("No errors were found.")
# user requested to exit
elif (success == USER_EXIT):
return
# error found
else:
print("Please review the errors found above.")
'''
give information to user about next steps
'''
def print_next_steps():
print("================================================================================")
print("If you still have an issue, please run the troubleshooter again and collect the logs for AMA.\n"\
"In addition, please include the following information:\n"\
" - Azure Subscription ID where the Log Analytics Workspace is located\n"\
" - Workspace ID the agent has been onboarded to\n"\
" - Workspace Name\n"\
" - Region Workspace is located\n"\
" - Pricing Tier assigned to the Workspace\n"\
" - Linux Distribution on the VM\n"\
" - Azure Monitor Agent Version")
print("================================================================================")
print("Restarting AMA can solve some of the problems. If you need to restart Azure Monitor Agent on this machine, "\
"please execute the following commands as the root user:")
print(" $ cd /var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-<agent version number>/")
print(" $ ./shim.sh -disable")
print(" $ ./shim.sh -enable")
### MAIN FUNCTION BODY BELOW ###
def run_troubleshooter():
# check if running as sudo
if (not check_sudo()):
return
# run all checks from command line
if len(sys.argv) > 1 and sys.argv[1] == '-A':
success = check_all(False)
print_results(success)
print_next_steps()
return
# run log collector from command line
if len(sys.argv) > 1 and sys.argv[1] == '-L':
collect_logs()
return
# check if want to run again
run_again = True
print("Welcome to the Azure Monitor Linux Agent Troubleshooter! What is your issue?\n")
while (run_again):
print("================================================================================\n"\
# TODO: come up with scenarios
"1: Installation failures. \n"\
"2: Agent doesn't start or cannot connect to Log Analytics service.\n"\
"3: Agent in unhealthy state. \n"\
"4: Agent consuming high CPU/memory. \n"\
"5: Syslog not flowing. \n"\
"6: Custom logs not flowing. \n"\
"================================================================================\n"\
"A: Run through all scenarios.\n"\
"L: Collect the logs for AMA.\n"\
"Q: Press 'Q' to quit.\n"\
"================================================================================")
switcher = {
'1': check_installation,
'2': check_connection,
'3': check_general_health,
'4': check_high_cpu_memory,
'5': check_syslog,
'6': check_custom_logs,
'A': check_all
}
issue = get_input("Please select an option",\
(lambda x : x.lower() in ['1','2','3','4','5','6','q','quit','l','a']),\
"Please enter an integer corresponding with your issue (1-6) to\n"\
"continue, 'A' to run through all scenarios, 'L' to run the log collector, or 'Q' to quit.")
# quit troubleshooter
if (issue.lower() in ['q','quit']):
print("Exiting the troubleshooter...")
return
# collect logs
if (issue.lower() == 'l'):
collect_logs()
return
# silent vs interactive mode
print("--------------------------------------------------------------------------------")
print("The troubleshooter can be run in two different modes.\n"\
" - Silent Mode runs through with no input required\n"\
" - Interactive Mode includes extra checks that require input")
mode = get_input("Do you want to run the troubleshooter in silent (s) or interactive (i) mode?",\
(lambda x : x.lower() in ['s','silent','i','interactive','q','quit']),\
"Please enter 's'/'silent' to run silent mode, 'i'/'interactive' to run \n"\
"interactive mode, or 'q'/'quit' to quit.")
if (mode.lower() in ['q','quit']):
print("Exiting the troubleshooter...")
return
elif (mode.lower() in ['s','silent']):
print("Running troubleshooter in silent mode...")
interactive_mode = False
elif (mode.lower() in ['i','interactive']):
print("Running troubleshooter in interactive mode...")
interactive_mode = True
# run troubleshooter
section = switcher.get(issue.upper(), lambda: "Invalid input")
print("================================================================================")
success = section(interactive=interactive_mode)
print_results(success)
# if user ran single scenario, ask if they want to run again
if (issue in ['1', '2', '3', '4', '5', '6']):
run_again = get_input("Do you want to run another scenario? (y/n)",\
(lambda x : x.lower() in ['y','yes','n','no']),\
"Please type either 'y'/'yes' or 'n'/'no' to proceed.")
if (run_again.lower() in ['y', 'yes']):
print("Please select another scenario below:")
elif (run_again.lower() in ['n', 'no']):
run_again = False
else:
run_again = False
print_next_steps()
return
if __name__ == '__main__':
run_troubleshooter()