in rootcmd.go [273:469]
func (p *program) run() {
log.GetLogger().Infof("Starting...... version: %s githash: %s", version.AssistVersion, version.GitCommitHash)
SingleAppLock = single.New("AliyunAssistClientSingleLock")
if err := SingleAppLock.CheckLock(); err != nil && err == single.ErrAlreadyRunning {
log.GetLogger().Fatal("another instance of the app is already running, exiting")
}
checkagentpanic.RedirectStdouterr()
G_Running = true
G_StopEvent = make(chan struct{})
initConfiguration(log.GetLogger().WithField("phase", "InitConfig"))
if err := timermanager.InitTimerManager(); err != nil {
log.GetLogger().Fatalln("Failed to initialize timer manager: " + err.Error())
return
}
channel.TryStartGshellChannel()
if runtime.GOOS == "windows" {
pathutil.SetCurrentEnvPath()
}
// Logging current working directory information
if currentWorkingDirectory, err := os.Getwd(); err == nil {
log.GetLogger().Infof("Current working directory is: %s", currentWorkingDirectory)
} else {
log.GetLogger().WithError(err).Errorln("Failed to obtain current working directory")
}
sleep_internals_seconds := 3
for {
host := util.GetServerHost()
if host != "" {
log.GetLogger().Println("GET_HOST_OK ", host)
break
} else {
log.GetLogger().Println("GET_HOST_ERROR")
}
time.Sleep(time.Duration(sleep_internals_seconds) * time.Second)
sleep_internals_seconds = sleep_internals_seconds * 2
if sleep_internals_seconds > 180 {
sleep_internals_seconds = 180
}
}
// Use clientreport.LogAndReportPanic as default panic handler to report panic
wrapgo.SetDefaultPanicHandler(clientreport.LogAndReportPanic)
// Try to handle panic from code below
defer func() {
if panicPayload := recover(); panicPayload != nil {
stacktrace := debug.Stack()
wrapgo.CallDefaultPanicHandler(panicPayload, stacktrace)
}
}()
// Check last panic and report it
wrapgo.CallWithPanicHandler(checkagentpanic.CheckAgentPanic, clientreport.LogAndReportIgnorePanic)
if instance.IsHybrid() {
// Check hybrid instance's fingerprint file
hybrid.CheckFingerprint()
util.SetHTTPPostErrHandler(func(httpResp *HttpRequest.Response, httpErr error) {
if httpResp != nil {
content, _ := httpResp.Content()
respJson := gjson.Parse(content)
errMsg := respJson.Get("errMsg")
if errMsg.Exists() && errMsg.String() == "instance_deregistered" {
log.GetLogger().Info("Clean up hybrid instance info and stop agent process self, because of errMsg: ", errMsg.String())
// Service process will be stopped after hybrid.CleanUpRegisterDataAndExit()
hybrid.CleanUpRegisterDataAndExit()
}
}
})
}
// Check in main goroutine and update as soon as possible, which use stricter
// timeout limitation. NOTE: The preparation phase timeout parameter should
// be considered as the whole timeout toleration minus minimum sleeping time
// for safe updating (5s) minus normal execution time of updating script
// (usually less than 5s), e.g., 50s - 5s - 5s = 40s.
if err := update.SafeBootstrapUpdate(time.Duration(40)*time.Second, time.Duration(30)*time.Second); err != nil {
log.GetLogger().Errorln("Failed to check update when starting: " + err.Error())
// Failed to update at starting phase would not terminate agent
// return
}
if err := update.InitCheckUpdateTimer(); err != nil {
log.GetLogger().Fatalln("Failed to initialize update checker: " + err.Error())
metrics.GetUpdateFailedEvent(
"errormsg", fmt.Sprintf("InitCheckUpdateTimer error: %s", err.Error()),
).ReportEvent()
return
}
if disabled, err := flagging.DetectNormalizingCRLFDisabled(); disabled {
log.GetLogger().WithError(err).Warning("CRLF-normalization has been disabled due to configuration")
}
if disabled, err := flagging.DetectTaskOutputRingbufferDisabled(); disabled {
log.GetLogger().WithError(err).Warning("TaskOutput-Ringbuffer has been disabled due to configuration")
}
channel.StartChannelMgr()
// Register callback functions that will be called when the network recover
heartbeat.RegisterActionWhenNetRecover(map[string]func(){
"SelectAvailableChannel": channel.OnNetworkRecover,
})
if err := heartbeat.InitHeartbeatTimer(); err != nil {
log.GetLogger().Fatalln("Failed to initialize heartbeat: " + err.Error())
return
}
// Start ipc server and init cryptdata package before fetching tasks,
// because they may be relied on by tasks.
cryptdata.Init()
// Init commander manager
commandermanager.InitCommanderManager("")
// Initialize and serve inter-process functionalities in parallel
wrapgo.GoWithDefaultPanicHandler(func() {
messagebus_server.ListenAndServe(log.GetLogger(), buses.GetCentralEndpoint(true), nil,
[]messagebus_server.RegisterFunc{
cryptdata_server.RegisterAssistAgentServer,
commander_server.RegisterAssistAgentServer,
configure_server.RegisterAssistAgentServer,
},
)
})
// TODO: First heart-beat may fail and be failed to indicate agent is ready.
// Retrying should be tried here.
heartbeat.PingwithRetries(3)
// Finally, fetching tasks could be allowed and agent starts to run normally.
taskengine.EnableFetchingTask()
log.GetLogger().Infoln("Started successfully")
// And also log to stdout, which would be written to systemd-journal as well
// as console via systemd
fmt.Println("Started successfully")
Started = true
// Periodic tasks are retrieved only once at startup.
// The interval between startup fetch task and the first heart-beat should
// be minimized as much as possible.
wrapgo.CallWithDefaultPanicHandler(func() {
isColdstart, err := flagging.IsColdstart()
if err != nil {
log.GetLogger().WithError(err).Errorln("Error encountered when detecting cold-start flag")
} else {
startType := "not cold start"
if isColdstart {
startType = "cold start"
}
metrics.GetBaseStartupEvent(
"type", startType,
"osName", osutil.GetVersion(),
).ReportEvent()
}
if !taskengine.IsStartupFetched() {
taskengine.Fetch(false, "", taskengine.NormalTaskType)
} else {
log.GetLogger().Infoln("Startup tasks has been fetched together with kick_off tasks")
}
})
// Execute operations that are not time sensitive finally, minimize the interval between critical
// steps like fetch startup task and the first heart-beat.
wrapgo.CallWithDefaultPanicHandler(func() {
// Report last os panic if panic record found
if isColdstart, err := flagging.IsColdstart(); err != nil || isColdstart {
wrapgo.GoWithDefaultPanicHandler(checkospanic.ReportLastOsPanic)
}
// Initialize non-critical periodic items, failure of initialization will not interrupt agent.
if err := statemanager.InitStateManagerTimer(); err != nil {
log.GetLogger().Errorln("Failed to initialize statemanager: " + err.Error())
}
pluginmanager.InitPluginCheckTimer()
if err := checkkdump.CheckKdumpTimer(); err != nil {
log.GetLogger().Errorln("Failed to StartKdumpCheckTimer: ", err)
} else {
log.GetLogger().Infoln("Start StartKdumpCheckTimer")
}
if err := checkvirt.StartVirtIoVersionReport(); err != nil {
log.GetLogger().Errorln("Failed to StartVirtIoVersionReport: " + err.Error())
} else {
log.GetLogger().Infoln("Start StartVirtIoVersionReport success")
}
// Start self kill monitor
time.Sleep(time.Duration(3*60) * time.Second)
log.GetLogger().Infoln("Start PerfMon ......")
perfmon.StartSelfKillMon()
})
}