in azurelinuxagent/ga/collect_logs.py [0:0]
def _collect_logs(self):
logger.info("Starting log collection...")
# Invoke the command line tool in the agent to collect logs. The --scope option starts the process as a systemd
# transient scope unit. The --property option is used to set systemd memory and cpu properties on the scope.
systemd_cmd = [
"systemd-run",
"--unit={0}".format(logcollector.CGROUPS_UNIT),
"--slice={0}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE), "--scope"
] + CGroupConfigurator.get_instance().get_logcollector_unit_properties()
# The log tool is invoked from the current agent's egg with the command line option
collect_logs_cmd = [sys.executable, "-u", sys.argv[0], "-collect-logs"]
final_command = systemd_cmd + collect_logs_cmd
def exec_command():
start_time = datetime.datetime.utcnow()
success = False
msg = None
try:
shellutil.run_command(final_command, log_error=False)
duration = elapsed_milliseconds(start_time)
archive_size = os.path.getsize(COMPRESSED_ARCHIVE_PATH)
msg = "Successfully collected logs. Archive size: {0} b, elapsed time: {1} ms.".format(archive_size,
duration)
logger.info(msg)
success = True
return True
except Exception as e:
duration = elapsed_milliseconds(start_time)
err_msg = ustr(e)
if isinstance(e, CommandError):
# pylint has limited (i.e. no) awareness of control flow w.r.t. typing. we disable=no-member
# here because we know e must be a CommandError but pylint still considers the case where
# e is a different type of exception.
err_msg = ustr("Log Collector exited with code {0}").format(e.returncode) # pylint: disable=no-member
if e.returncode == logcollector.INVALID_CGROUPS_ERRCODE: # pylint: disable=no-member
logger.info("Disabling periodic log collection until service restart due to process error.")
self.stop()
# When the log collector memory limit is exceeded, Agent gracefully exit the process with this error code.
# Stop the periodic operation because it seems to be persistent.
elif e.returncode == logcollector.GRACEFUL_KILL_ERRCODE: # pylint: disable=no-member
logger.info("Disabling periodic log collection until service restart due to exceeded process memory limit.")
self.stop()
else:
logger.info(err_msg)
msg = "Failed to collect logs. Elapsed time: {0} ms. Error: {1}".format(duration, err_msg)
# No need to log to the local log since we logged stdout, stderr from the process.
return False
finally:
add_event(
name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.LogCollection,
is_success=success,
message=msg,
log_event=False)
return exec_command()