in azurelinuxagent/agent.py [0:0]
def collect_logs(self, is_full_mode):
logger.set_prefix("LogCollector")
if is_full_mode:
logger.info("Running log collector mode full")
else:
logger.info("Running log collector mode normal")
LogCollector.initialize_telemetry()
# Check the cgroups unit
log_collector_monitor = None
tracked_controllers = []
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
try:
cgroup_api = create_cgroup_api()
logger.info("Using cgroup {0} for resource enforcement and monitoring".format(cgroup_api.get_cgroup_version()))
except InvalidCgroupMountpointException as e:
event.warn(WALAEventOperation.LogCollection, "The agent does not support cgroups if the default systemd mountpoint is not being used: {0}", ustr(e))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
except CGroupsException as e:
event.warn(WALAEventOperation.LogCollection, "Unable to determine which cgroup version to use: {0}", ustr(e))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR)
tracked_controllers = log_collector_cgroup.get_controllers()
for controller in tracked_controllers:
logger.info("{0} controller for cgroup: {1}".format(controller.get_controller_type(), controller))
if len(tracked_controllers) != len(log_collector_cgroup.get_supported_controller_names()):
event.warn(WALAEventOperation.LogCollection, "At least one required controller is missing. The following controllers are required for the log collector to run: {0}", log_collector_cgroup.get_supported_controller_names())
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
expected_slice = cgroupconfigurator.LOGCOLLECTOR_SLICE
if not log_collector_cgroup.check_in_expected_slice(expected_slice):
event.warn(WALAEventOperation.LogCollection, "The Log Collector process is not in the proper cgroups. Expected slice: {0}", expected_slice)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
try:
log_collector = LogCollector(is_full_mode)
# Running log collector resource monitoring only if agent starts the log collector.
# If Log collector start by any other means, then it will not be monitored.
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
for controller in tracked_controllers:
if isinstance(controller, _CpuController):
controller.initialize_cpu_usage()
break
log_collector_monitor = get_log_collector_monitor_handler(tracked_controllers)
log_collector_monitor.run()
archive, total_uncompressed_size = log_collector.collect_logs_and_get_archive()
logger.info("Log collection successfully completed. Archive can be found at {0} "
"and detailed log output can be found at {1}".format(archive, OUTPUT_RESULTS_FILE_PATH))
if log_collector_monitor is not None:
log_collector_monitor.stop()
try:
metrics_summary = log_collector_monitor.get_max_recorded_metrics()
metrics_summary['Total Uncompressed File Size (B)'] = total_uncompressed_size
msg = json.dumps(metrics_summary)
logger.info(msg)
event.add_event(op=event.WALAEventOperation.LogCollection, message=msg, log_event=False)
except Exception as e:
msg = "An error occurred while reporting log collector resource usage summary: {0}".format(ustr(e))
logger.warn(msg)
event.add_event(op=event.WALAEventOperation.LogCollection, is_success=False, message=msg, log_event=False)
except Exception as e:
logger.error("Log collection completed unsuccessfully. Error: {0}".format(ustr(e)))
logger.info("Detailed log output can be found at {0}".format(OUTPUT_RESULTS_FILE_PATH))
sys.exit(1)
finally:
if log_collector_monitor is not None:
log_collector_monitor.stop()