AzureMonitorAgent/ama_tst/modules/general_health/check_status.py (52 lines of code) (raw):
import subprocess
import re
import platform
from error_codes import *
from errors import error_info
from helpers import run_cmd_output, get_input, is_metrics_configured
def check_restart_status(interactive):
"""
check if the subcomponents restart in a given time interval
"""
subcomponents = {'azuremonitoragent': 'azuremonitoragent'}
subcomponents['azuremonitor-agentlauncher'] = 'agentlauncher'
subcomponents['azuremonitor-coreagent'] = 'amacoreagent'
if is_metrics_configured():
subcomponents['metrics-extension'] = 'MetricsExtension'
subcomponents['metrics-sourcer'] = 'Telegraf'
restart_logs = ""
start = "yesterday"
end = "now"
since = "--since={0}".format(start)
until = "--until={0}".format(end)
if interactive:
print("--------------------------------------------------------------------------------")
print("Please enter a certain time range that you want to filter logs (default time range: from yesterday to now):\n")
print("(e.g. Since: <yyyy-mm-dd hh:mm:ss>) or <yyyy-mm-dd>")
start_input = get_input("Since: ")
end_input = get_input("Until: ")
print("--------------------------------------------------------------------------------")
if start_input != "":
since = '--since=\"{0}\"'.format(start_input)
start = start_input
if end_input != "":
until = '--until=\"{0}\"'.format(end_input)
end = end_input
for key in subcomponents.keys():
cmd = 'journalctl -n 100 --no-pager -u {0} {1} {2}'.format(key, since, until)
output = run_cmd_output(cmd)
lines = output.split('\n')
process_logs = {}
for line in lines:
match = re.findall(".*{0}\[.*\].*".format(subcomponents[key]), line)
if len(match) == 0:
continue
log = match[0]
pid = log.split('[')[1].split(']')[0]
if pid not in process_logs:
process_logs[pid] = log
# add to warning if restart more than 10 times recently
if len(process_logs) > 10:
logs = '\n'.join(process_logs.values())
restart_logs = restart_logs + "Possible restart loop in {0} detected ({1} restarts from {2} to {3}):\n{4}".format(key, len(process_logs), start, end, logs)
restart_logs = restart_logs + "\n--------------------------------------------------------------------------------\n"
if restart_logs:
error_info.append((restart_logs,))
return WARN_RESTART_LOOP
return NO_ERROR