in internal/pkg/agent/cmd/run.go [165:401]
func runElasticAgent(ctx context.Context, cancel context.CancelFunc, override application.CfgOverrider, stop chan bool, testingMode bool, fleetInitTimeout time.Duration, modifiers ...component.PlatformModifier) error {
cfg, err := loadConfig(ctx, override)
if err != nil {
return err
}
logLvl := logger.DefaultLogLevel
if cfg.Settings.LoggingConfig != nil {
logLvl = cfg.Settings.LoggingConfig.Level
}
baseLogger, err := logger.NewFromConfig("", cfg.Settings.LoggingConfig, cfg.Settings.EventLoggingConfig, true)
if err != nil {
return err
}
// Make sure to flush any buffered logs before we're done.
defer baseLogger.Sync() //nolint:errcheck // flushing buffered logs is best effort.
l := baseLogger.With("log", map[string]interface{}{
"source": agentName,
})
// try early to check if running as root
isRoot, err := utils.HasRoot()
if err != nil {
return logReturn(l, fmt.Errorf("failed to check for root/Administrator privileges: %w", err))
}
l.Infow("Elastic Agent started",
"process.pid", os.Getpid(),
"agent.version", version.GetAgentPackageVersion(),
"agent.unprivileged", !isRoot)
cfg, err = tryDelayEnroll(ctx, l, cfg, override)
if err != nil {
return logReturn(l, errors.New(err, "failed to perform delayed enrollment"))
}
pathConfigFile := paths.AgentConfigFile()
// agent ID needs to stay empty in bootstrap mode
createAgentID := true
if cfg.Fleet != nil && cfg.Fleet.Server != nil && cfg.Fleet.Server.Bootstrap {
createAgentID = false
}
// Ensure we have the agent secret created.
// The secret is not created here if it exists already from the previous enrollment.
// This is needed for compatibility with agent running in standalone mode,
// that writes the agentID into fleet.enc (encrypted fleet.yml) before even loading the configuration.
err = secret.CreateAgentSecret(ctx, vault.WithUnprivileged(!isRoot))
if err != nil {
return logReturn(l, fmt.Errorf("failed to read/write secrets: %w", err))
}
// Migrate .yml files if the corresponding .enc does not exist
// the encrypted config does not exist but the unencrypted file does
err = migration.MigrateToEncryptedConfig(ctx, l, paths.AgentConfigYmlFile(), paths.AgentConfigFile())
if err != nil {
return logReturn(l, errors.New(err, "error migrating fleet config"))
}
// the encrypted state does not exist but the unencrypted file does
err = migration.MigrateToEncryptedConfig(ctx, l,
paths.AgentStateStoreYmlFile(),
paths.AgentStateStoreFile())
if err != nil {
return logReturn(l, errors.New(err, "error migrating agent state"))
}
agentInfo, err := info.NewAgentInfoWithLog(ctx, defaultLogLevel(cfg, logLvl.String()), createAgentID)
if err != nil {
return logReturn(l, errors.New(err,
"could not load agent info",
errors.TypeFilesystem,
errors.M(errors.MetaKeyPath, pathConfigFile)))
}
// Ensure that the log level now matches what is configured in the agentInfo.
if agentInfo.LogLevel() != "" {
var lvl logp.Level
err = lvl.Unpack(agentInfo.LogLevel())
if err != nil {
l.Error(errors.New(err, "failed to parse agent information log level"))
} else {
logLvl = lvl
logger.SetLevel(lvl)
}
} else {
// Set the initial log level (either default or from config file)
logger.SetLevel(logLvl)
}
// initiate agent watcher
if _, err := upgrade.InvokeWatcher(l, paths.TopBinaryPath()); err != nil {
// we should not fail because watcher is not working
l.Error(errors.New(err, "failed to invoke rollback watcher"))
}
execPath, err := reexecPath()
if err != nil {
return logReturn(l, fmt.Errorf("failed to get reexec path: %w", err))
}
rexLogger := l.Named("reexec")
rex := reexec.NewManager(rexLogger, execPath)
tracer, err := initTracer(agentName, release.Version(), cfg.Settings.MonitoringConfig)
if err != nil {
return logReturn(l, fmt.Errorf("could not initiate APM tracer: %w", err))
}
if tracer != nil {
l.Info("APM instrumentation enabled")
defer func() {
tracer.Flush(nil)
tracer.Close()
}()
} else {
l.Info("APM instrumentation disabled")
}
isBootstrap := configuration.IsFleetServerBootstrap(cfg.Fleet)
coord, configMgr, _, err := application.New(ctx, l, baseLogger, logLvl, agentInfo, rex, tracer, testingMode, fleetInitTimeout, isBootstrap, override, modifiers...)
if err != nil {
return logReturn(l, err)
}
monitoringServer, err := setupMetrics(l, cfg.Settings.DownloadConfig.OS(), cfg.Settings.MonitoringConfig, tracer, coord)
if err != nil {
return logReturn(l, err)
}
coord.RegisterMonitoringServer(monitoringServer)
defer func() {
if monitoringServer != nil {
_ = monitoringServer.Stop()
}
}()
diagHooks := diagnostics.GlobalHooks()
diagHooks = append(diagHooks, coord.DiagnosticHooks()...)
controlLog := l.Named("control")
control := server.New(controlLog, agentInfo, coord, tracer, diagHooks, cfg.Settings.GRPC)
// if the configMgr implements the TestModeConfigSetter in means that Elastic Agent is in testing mode and
// the configuration will come in over the control protocol, so we set the config setting on the control protocol
// server so when the configuration comes in it gets passed to the coordinator
testingSetter, ok := configMgr.(server.TestModeConfigSetter)
if ok {
control.SetTestModeConfigSetter(testingSetter)
}
// start the control listener
if err := control.Start(); err != nil {
return logReturn(l, err)
}
defer control.Stop()
// create symlink from /run/elastic-agent.sock to `paths.ControlSocket()` when running as root
// this provides backwards compatibility as the control socket was moved with the addition of --unprivileged
// option during installation
//
// Windows `paths.ControlSocketRunSymlink()` is `""` so this is always skipped on Windows.
controlSocketRunSymlink := paths.ControlSocketRunSymlink(paths.InstallNamespace())
if isRoot && paths.RunningInstalled() && controlSocketRunSymlink != "" {
socketPath := strings.TrimPrefix(paths.ControlSocket(), "unix://")
socketLog := controlLog.With("path", socketPath).With("link", controlSocketRunSymlink)
// ensure it doesn't exist before creating the symlink
if err := os.Remove(controlSocketRunSymlink); err != nil && !errors.Is(err, os.ErrNotExist) {
socketLog.Errorf("Failed to remove existing control socket symlink %s: %s", controlSocketRunSymlink, err)
}
if err := os.Symlink(socketPath, controlSocketRunSymlink); err != nil {
socketLog.Errorf("Failed to create control socket symlink %s -> %s: %s", controlSocketRunSymlink, socketPath, err)
} else {
socketLog.Infof("Created control socket symlink %s -> %s; allowing unix://%s connection", controlSocketRunSymlink, socketPath, controlSocketRunSymlink)
}
defer func() {
// delete the symlink on exit; ignore the error
if err := os.Remove(controlSocketRunSymlink); err != nil {
socketLog.Errorf("Failed to remove control socket symlink %s: %s", controlSocketRunSymlink, err)
}
}()
}
appDone := make(chan bool)
appErr := make(chan error)
// Spawn the main Coordinator goroutine
go func() {
err := coord.Run(ctx)
close(appDone)
appErr <- err
}()
// listen for signals
signals := make(chan os.Signal, 1)
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGHUP)
isRex := false
logShutdown := true
LOOP:
for {
select {
case <-stop:
l.Info("service.ProcessWindowsControlEvents invoked stop function. Shutting down")
break LOOP
case <-appDone:
l.Info("application done, coordinator exited")
logShutdown = false
break LOOP
case <-rex.ShutdownChan():
l.Info("reexec shutdown channel triggered")
isRex = true
logShutdown = false
break LOOP
case sig := <-signals:
l.Infof("signal %q received", sig)
if sig == syscall.SIGHUP {
rexLogger.Infof("SIGHUP triggered re-exec")
isRex = true
rex.ReExec(nil)
} else {
break LOOP
}
}
}
if logShutdown {
l.Info("Shutting down Elastic Agent and sending last events...")
}
cancel()
err = <-appErr
if logShutdown {
l.Info("Shutting down completed.")
}
if isRex {
rex.ShutdownComplete()
}
return logReturn(l, err)
}