AzureMonitorAgent/ama_tst/modules/logcollector.py (236 lines of code) (raw):
import datetime
import os
import platform
import shutil
import helpers
from error_codes import *
from connect.check_imds import check_metadata
DPKG_CMD = "dpkg -s azuremonitoragent"
RPM_CMD = "rpm -qi azuremonitoragent"
PS_CMD = "ps -ef | grep {0} | grep -v grep"
OPENSSL_CMD = "echo | openssl s_client -connect {0}:443 -brief"
SYSTEMCTL_CMD = "systemctl status {0} --no-pager"
JOURNALCTL_CMD = "journalctl -u {0} --no-pager --since \"30 days ago\" > {1}"
PS_CMD_CPU = "ps aux --sort=-pcpu | head -10"
PS_CMD_RSS = "ps aux --sort -rss | head -10"
PS_CMD_VSZ = "ps aux --sort -vsz | head -10"
DU_CMD = "du -h -d 1 {0} /var/opt/microsoft/azuremonitoragent/events"
VAR_DU_CMD = "du -h -d 1 {0} /var"
LS_CMD = "ls -al {0}"
NAMEI_CMD = "namei -om {0}"
TAIL_SYSLOG_CMD = "tail -10000 /var/log/{0} > {1}"
ArcSettingsFile = '/var/opt/azcmagent/localconfig.json'
PERMISSION_CHECK_FILES = ["/etc/opt/microsoft/azuremonitoragent/config-cache",
"/etc/opt/microsoft/azuremonitoragent",
"/var/opt/microsoft/azuremonitoragent",
"/var/run/azuremonitoragent",
"/opt/microsoft/azuremonitoragent",
"/run/azuremonitoragent",
"/var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-*"]
# File copying functions
def copy_file(src, dst):
if (os.path.isfile(src)):
print("Copying file {0}".format(src))
try:
if (not os.path.isdir(dst)):
os.mkdir(dst)
shutil.copy2(src, dst)
except Exception as e:
print("ERROR: Could not copy {0}: {1}".format(src, e))
print("Skipping over file {0}".format(src))
else:
print("File {0} doesn't exist, skipping".format(src))
return
def copy_dircontents(src, dst):
if (os.path.isdir(src)):
print("Copying contents of directory {0}".format(src))
try:
shutil.copytree(src, dst)
except Exception as e:
print("ERROR: Could not copy {0}: {1}".format(src, e))
print("Skipping over contents of directory {0}".format(src))
else:
print("Directory {0} doesn't exist, skipping".format(src))
return
# Log collecting functions
def collect_logs(output_dirpath, pkg_manager):
# collect MDSD information
copy_file("/etc/default/azuremonitoragent", os.path.join(output_dirpath,"mdsd"))
copy_file("/var/opt/microsoft/azuremonitoragent/events/taskstate.json", os.path.join(output_dirpath,"mdsd"))
copy_dircontents("/var/opt/microsoft/azuremonitoragent/log", os.path.join(output_dirpath,"mdsd","logs"))
# collect AMA DCR
copy_dircontents("/etc/opt/microsoft/azuremonitoragent", os.path.join(output_dirpath,"DCR"))
# get all AzureMonitorLinuxAgent-* directory names
for config_dir in filter((lambda x : x.startswith("Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-")), os.listdir("/var/lib/waagent")):
# collect AMA config and status information for all AzureMonitorLinuxAgent-* directories
ver = (config_dir.split('-'))[-1]
copy_dircontents(os.path.join("/var/lib/waagent",config_dir,"status"), os.path.join(output_dirpath,ver+"-status"))
copy_dircontents(os.path.join("/var/lib/waagent",config_dir,"config"), os.path.join(output_dirpath,ver+"-config"))
# collect system logs
system_logs = ""
if (pkg_manager == "dpkg"):
system_logs = "syslog"
elif (pkg_manager == "rpm"):
system_logs = "messages"
if (system_logs != ""):
for systemlog_file in filter((lambda x : x.startswith(system_logs)), os.listdir("/var/log")):
helpers.run_cmd_output(TAIL_SYSLOG_CMD.format(systemlog_file, os.path.join(output_dirpath,"system_logs")))
# collect rsyslog information (if present)
copy_file("/etc/rsyslog.conf", os.path.join(output_dirpath,"rsyslog"))
copy_dircontents("/etc/rsyslog.d", os.path.join(output_dirpath,"rsyslog","rsyslog.d"))
if (os.path.isfile("/etc/rsyslog.conf")):
helpers.run_cmd_output(JOURNALCTL_CMD.format("rsyslog", os.path.join(output_dirpath,"rsyslog","journalctl_output.log")))
# collect syslog-ng information (if present)
copy_dircontents("/etc/syslog-ng", os.path.join(output_dirpath,"syslog-ng"))
return
def collect_arc_logs(output_dirpath, pkg_manager):
# collect GC Extension logs
copy_dircontents("/var/lib/GuestConfig/ext_mgr_logs", os.path.join(output_dirpath,"GC_Extension"))
# collect AMA Extension logs
for config_dir in filter((lambda x : x.startswith("Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-")), os.listdir("/var/lib/GuestConfig/extension_logs")):
# collect AMA config and status information for all AzureMonitorLinuxAgent-* directories
ver = (config_dir.split('-'))[-1]
copy_dircontents(os.path.join("/var/lib/GuestConfig/extension_logs",config_dir), os.path.join(output_dirpath,ver+"-extension_logs"))
copy_file(ArcSettingsFile, os.path.join(output_dirpath,"Arc"))
# collect logs same to both Arc + Azure VM
collect_logs(output_dirpath, pkg_manager)
print("Arc logs collected")
return
def collect_azurevm_logs(output_dirpath, pkg_manager):
# collect waagent logs
for waagent_file in filter((lambda x : x.startswith("waagent.log")), os.listdir("/var/log")):
copy_file(os.path.join("/var/log",waagent_file), os.path.join(output_dirpath,"waagent"))
# collect AMA Extension logs
copy_dircontents("/var/log/azure/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent", os.path.join(output_dirpath,"Microsoft.Azure.Monitor.AzureMonitorLinuxAgent"))
# collect logs same to both Arc + Azure VM
collect_logs(output_dirpath, pkg_manager)
print("Azure VM logs collected")
return
# Outfile function
def create_outfile(output_dirpath, logs_date, pkg_manager):
with open(os.path.join(output_dirpath,"amalinux.out"), 'w') as outfile:
outfile.write("Log Collection Start Time: {0}\n".format(logs_date))
outfile.write("--------------------------------------------------------------------------------\n")
# detected OS + version
vm_dist, vm_ver, _ = helpers.find_vm_distro()
if (vm_dist and vm_ver):
outfile.write("Linux OS detected: {0}\n".format(vm_dist))
outfile.write("Linux OS version detected: {0}\n".format(vm_ver))
else:
outfile.write("Indeterminate OS.\n")
# detected package manager
if (pkg_manager != ""):
outfile.write("Package manager detected: {0}\n".format(pkg_manager))
else:
outfile.write("Indeterminate package manager.\n")
outfile.write("--------------------------------------------------------------------------------\n")
# uname info
os_uname = os.uname()
outfile.write("Hostname: {0}\n".format(os_uname[1]))
outfile.write("Release Version: {0}\n".format(os_uname[2]))
outfile.write("Linux UName: {0}\n".format(os_uname[3]))
outfile.write("Machine Type: {0}\n".format(os_uname[4]))
outfile.write("--------------------------------------------------------------------------------\n")
# python version
outfile.write("Python Version: {0}\n".format(platform.python_version()))
outfile.write("--------------------------------------------------------------------------------\n")
# /etc/os-release
if (os.path.isfile("/etc/os-release")):
outfile.write("Contents of /etc/os-release:\n")
with open("/etc/os-release", 'r') as os_info:
for line in os_info:
outfile.write(line)
outfile.write("--------------------------------------------------------------------------------\n")
# VM Metadata
attributes = ['azEnvironment', 'resourceId', 'location']
outfile.write("VM Metadata from IMDS:\n")
for attr in attributes:
attr_result = helpers.geninfo_lookup(attr)
if (not attr_result) and (check_metadata() == NO_ERROR):
attr_result = helpers.geninfo_lookup(attr)
if (attr_result != None):
outfile.write("{0}: {1}\n".format(attr, attr_result))
outfile.write("--------------------------------------------------------------------------------\n")
outfile.write("--------------------------------------------------------------------------------\n")
# AMA install status
(ama_vers, _) = helpers.find_ama_version()
(ama_installed, ama_unique) = helpers.check_ama_installed(ama_vers)
outfile.write("AMA Install Status: {0}\n".format("installed" if ama_installed else "not installed"))
if (ama_installed):
if (not ama_unique):
outfile.write("Multiple AMA versions detected: {0}\n".format(', '.join(ama_vers)))
else:
outfile.write("AMA Version: {0}\n".format(ama_vers[0]))
outfile.write("--------------------------------------------------------------------------------\n")
# connection to endpoints
wkspc_id, wkspc_region, e = helpers.find_dcr_workspace()
if e == None:
outfile.write("Workspace ID: {0}\n".format(str(wkspc_id)))
outfile.write("Workspace region: {0}\n".format(str(wkspc_region)))
outfile.write("--------------------------------------------------------------------------------\n")
# AMA package info (dpkg/rpm)
if (pkg_manager == "dpkg"):
outfile.write("Output of command: {0}\n".format(DPKG_CMD))
outfile.write("========================================\n")
outfile.write(helpers.run_cmd_output(DPKG_CMD))
outfile.write("--------------------------------------------------------------------------------\n")
elif (pkg_manager == "rpm"):
outfile.write("Output of command: {0}\n".format(RPM_CMD))
outfile.write("========================================\n")
outfile.write(helpers.run_cmd_output(RPM_CMD))
outfile.write("--------------------------------------------------------------------------------\n")
outfile.write("--------------------------------------------------------------------------------\n")
# ps -ef output
for process in ["azuremonitoragent", "mdsd", "telegraf"]:
ps_process_cmd = PS_CMD.format(process)
outfile.write("Output of command: {0}\n".format(ps_process_cmd))
outfile.write("========================================\n")
outfile.write(helpers.run_cmd_output(ps_process_cmd))
outfile.write("--------------------------------------------------------------------------------\n")
outfile.write("--------------------------------------------------------------------------------\n")
# rsyslog / syslog-ng status via systemctl
for syslogd in ["rsyslog", "syslog-ng"]:
systemctl_cmd = SYSTEMCTL_CMD.format(syslogd)
outfile.write("Output of command: {0}\n".format(systemctl_cmd))
outfile.write("========================================\n")
outfile.write(helpers.run_cmd_output(systemctl_cmd))
outfile.write("--------------------------------------------------------------------------------\n")
outfile.write("--------------------------------------------------------------------------------\n")
# ps aux output
for cmd in [PS_CMD_CPU, PS_CMD_RSS, PS_CMD_VSZ]:
outfile.write("Output of command: {0}\n".format(cmd))
outfile.write("========================================\n")
outfile.write(helpers.run_cmd_output(cmd))
outfile.write("--------------------------------------------------------------------------------\n")
outfile.write("--------------------------------------------------------------------------------\n")
# du output on events folder
for flag in ["", "--apparent-size"]:
du_full_cmd = DU_CMD.format(flag)
outfile.write("Output of command: {0}\n".format(du_full_cmd))
outfile.write("========================================\n")
outfile.write(helpers.run_cmd_output(du_full_cmd))
outfile.write("--------------------------------------------------------------------------------\n")
outfile.write("--------------------------------------------------------------------------------\n")
# du output on /var folder
for flag in ["", "--apparent-size"]:
du_full_cmd = VAR_DU_CMD.format(flag)
outfile.write("Output of command: {0}\n".format(du_full_cmd))
outfile.write("========================================\n")
outfile.write(helpers.run_cmd_output(du_full_cmd))
outfile.write("--------------------------------------------------------------------------------\n")
# file permission check
for file in PERMISSION_CHECK_FILES:
file_permission_cmd = LS_CMD.format(file)
outfile.write("Output of command: {0}\n".format(file_permission_cmd))
outfile.write("========================================\n")
outfile.write(helpers.run_cmd_output(file_permission_cmd))
outfile.write("--------------------------------------------------------------------------------\n")
outfile.write("--------------------------------------------------------------------------------\n")
# parent directory permission check
for file in PERMISSION_CHECK_FILES:
dir_permission_cmd = NAMEI_CMD.format(file)
outfile.write("Output of command: {0}\n".format(dir_permission_cmd))
outfile.write("========================================\n")
outfile.write(helpers.run_cmd_output(dir_permission_cmd))
outfile.write("--------------------------------------------------------------------------------\n")
outfile.write("--------------------------------------------------------------------------------\n")
### MAIN FUNCTION BODY BELOW ###
def run_logcollector(output_location):
# check if Arc is being used
is_arc_vm = helpers.is_arc_installed()
# create directory to hold copied logs
vm_type = "azurearc" if is_arc_vm else "azurevm"
logs_date = str(datetime.datetime.utcnow().isoformat()).replace(":", ".") # ':' causes issues with tar
output_dirname = "amalogs-{0}-{1}".format(vm_type, logs_date)
output_dirpath = os.path.join(output_location, output_dirname)
try:
os.mkdir(output_dirpath)
except OSError as e:
print("ERROR: Could not create output directory: {0}".format(e))
return
# get VM information needed for log collection
pkg_manager = helpers.find_package_manager()
# collect the logs
if (is_arc_vm):
print("Azure Arc detected, collecting logs for Azure Arc.")
print("--------------------------------------------------------------------------------")
collect_arc_logs(output_dirpath, pkg_manager)
else:
print("Azure Arc not detected, collected logs for Azure VM.")
print("--------------------------------------------------------------------------------")
collect_azurevm_logs(output_dirpath, pkg_manager)
print("--------------------------------------------------------------------------------")
# create out file (for simple checks)
print("Creating 'amalinux.out' file")
create_outfile(output_dirpath, logs_date, pkg_manager)
print("--------------------------------------------------------------------------------")
# zip up logs
print("Zipping up logs and removing temporary output directory")
tgz_filename = "{0}.tgz".format(output_dirname)
tgz_filepath = os.path.join(output_location, tgz_filename)
print("--------------------------------------------------------------------------------")
print(helpers.run_cmd_output("cd {0}; tar -zcf {1} {2}".format(output_location, tgz_filename, output_dirname)))
shutil.rmtree(output_dirpath, ignore_errors=True)
print("--------------------------------------------------------------------------------")
print("You can find the AMA logs at the following location: {0}".format(tgz_filepath))
return