func()

in rootcmd.go [273:469]


func (p *program) run() {
	log.GetLogger().Infof("Starting...... version: %s githash: %s", version.AssistVersion, version.GitCommitHash)
	SingleAppLock = single.New("AliyunAssistClientSingleLock")
	if err := SingleAppLock.CheckLock(); err != nil && err == single.ErrAlreadyRunning {
		log.GetLogger().Fatal("another instance of the app is already running, exiting")
	}
	checkagentpanic.RedirectStdouterr()
	G_Running = true
	G_StopEvent = make(chan struct{})

	initConfiguration(log.GetLogger().WithField("phase", "InitConfig"))

	if err := timermanager.InitTimerManager(); err != nil {
		log.GetLogger().Fatalln("Failed to initialize timer manager: " + err.Error())
		return
	}
	channel.TryStartGshellChannel()

	if runtime.GOOS == "windows" {
		pathutil.SetCurrentEnvPath()
	}
	// Logging current working directory information
	if currentWorkingDirectory, err := os.Getwd(); err == nil {
		log.GetLogger().Infof("Current working directory is: %s", currentWorkingDirectory)
	} else {
		log.GetLogger().WithError(err).Errorln("Failed to obtain current working directory")
	}

	sleep_internals_seconds := 3
	for {
		host := util.GetServerHost()
		if host != "" {
			log.GetLogger().Println("GET_HOST_OK ", host)
			break
		} else {
			log.GetLogger().Println("GET_HOST_ERROR")
		}
		time.Sleep(time.Duration(sleep_internals_seconds) * time.Second)
		sleep_internals_seconds = sleep_internals_seconds * 2
		if sleep_internals_seconds > 180 {
			sleep_internals_seconds = 180
		}
	}

	// Use clientreport.LogAndReportPanic as default panic handler to report panic
	wrapgo.SetDefaultPanicHandler(clientreport.LogAndReportPanic)

	// Try to handle panic from code below
	defer func() {
		if panicPayload := recover(); panicPayload != nil {
			stacktrace := debug.Stack()
			wrapgo.CallDefaultPanicHandler(panicPayload, stacktrace)
		}
	}()

	// Check last panic and report it
	wrapgo.CallWithPanicHandler(checkagentpanic.CheckAgentPanic, clientreport.LogAndReportIgnorePanic)
	if instance.IsHybrid() {
		// Check hybrid instance's fingerprint file
		hybrid.CheckFingerprint()
		util.SetHTTPPostErrHandler(func(httpResp *HttpRequest.Response, httpErr error) {
			if httpResp != nil {
				content, _ := httpResp.Content()
				respJson := gjson.Parse(content)
				errMsg := respJson.Get("errMsg")
				if errMsg.Exists() && errMsg.String() == "instance_deregistered" {
					log.GetLogger().Info("Clean up hybrid instance info and stop agent process self, because of errMsg: ", errMsg.String())
					// Service process will be stopped after hybrid.CleanUpRegisterDataAndExit()
					hybrid.CleanUpRegisterDataAndExit()
				}
			}
		})
	}

	// Check in main goroutine and update as soon as possible, which use stricter
	// timeout limitation. NOTE: The preparation phase timeout parameter should
	// be considered as the whole timeout toleration minus minimum sleeping time
	// for safe updating (5s) minus normal execution time of updating script
	// (usually less than 5s), e.g., 50s - 5s - 5s = 40s.
	if err := update.SafeBootstrapUpdate(time.Duration(40)*time.Second, time.Duration(30)*time.Second); err != nil {
		log.GetLogger().Errorln("Failed to check update when starting: " + err.Error())
		// Failed to update at starting phase would not terminate agent
		// return
	}

	if err := update.InitCheckUpdateTimer(); err != nil {
		log.GetLogger().Fatalln("Failed to initialize update checker: " + err.Error())
		metrics.GetUpdateFailedEvent(
			"errormsg", fmt.Sprintf("InitCheckUpdateTimer error: %s", err.Error()),
		).ReportEvent()
		return
	}

	if disabled, err := flagging.DetectNormalizingCRLFDisabled(); disabled {
		log.GetLogger().WithError(err).Warning("CRLF-normalization has been disabled due to configuration")
	}
	if disabled, err := flagging.DetectTaskOutputRingbufferDisabled(); disabled {
		log.GetLogger().WithError(err).Warning("TaskOutput-Ringbuffer has been disabled due to configuration")
	}

	channel.StartChannelMgr()

	// Register callback functions that will be called when the network recover
	heartbeat.RegisterActionWhenNetRecover(map[string]func(){
		"SelectAvailableChannel": channel.OnNetworkRecover,
	})

	if err := heartbeat.InitHeartbeatTimer(); err != nil {
		log.GetLogger().Fatalln("Failed to initialize heartbeat: " + err.Error())
		return
	}

	// Start ipc server and init cryptdata package before fetching tasks,
	// because they may be relied on by tasks.
	cryptdata.Init()
	// Init commander manager
	commandermanager.InitCommanderManager("")
	// Initialize and serve inter-process functionalities in parallel
	wrapgo.GoWithDefaultPanicHandler(func() {
		messagebus_server.ListenAndServe(log.GetLogger(), buses.GetCentralEndpoint(true), nil,
			[]messagebus_server.RegisterFunc{
				cryptdata_server.RegisterAssistAgentServer,
				commander_server.RegisterAssistAgentServer,
				configure_server.RegisterAssistAgentServer,
			},
		)
	})

	// TODO: First heart-beat may fail and be failed to indicate agent is ready.
	// Retrying should be tried here.
	heartbeat.PingwithRetries(3)

	// Finally, fetching tasks could be allowed and agent starts to run normally.
	taskengine.EnableFetchingTask()
	log.GetLogger().Infoln("Started successfully")
	// And also log to stdout, which would be written to systemd-journal as well
	// as console via systemd
	fmt.Println("Started successfully")
	Started = true

	// Periodic tasks are retrieved only once at startup.
	// The interval between startup fetch task and the first heart-beat should
	// be minimized as much as possible.
	wrapgo.CallWithDefaultPanicHandler(func() {
		isColdstart, err := flagging.IsColdstart()
		if err != nil {
			log.GetLogger().WithError(err).Errorln("Error encountered when detecting cold-start flag")
		} else {
			startType := "not cold start"
			if isColdstart {
				startType = "cold start"
			}
			metrics.GetBaseStartupEvent(
				"type", startType,
				"osName", osutil.GetVersion(),
			).ReportEvent()
		}
		if !taskengine.IsStartupFetched() {
			taskengine.Fetch(false, "", taskengine.NormalTaskType)
		} else {
			log.GetLogger().Infoln("Startup tasks has been fetched together with kick_off tasks")
		}
	})

	// Execute operations that are not time sensitive finally, minimize the interval between critical
	// steps like fetch startup task and the first heart-beat.
	wrapgo.CallWithDefaultPanicHandler(func() {
		// Report last os panic if panic record found
		if isColdstart, err := flagging.IsColdstart(); err != nil || isColdstart {
			wrapgo.GoWithDefaultPanicHandler(checkospanic.ReportLastOsPanic)
		}

		// Initialize non-critical periodic items, failure of initialization will not interrupt agent.
		if err := statemanager.InitStateManagerTimer(); err != nil {
			log.GetLogger().Errorln("Failed to initialize statemanager: " + err.Error())
		}

		pluginmanager.InitPluginCheckTimer()

		if err := checkkdump.CheckKdumpTimer(); err != nil {
			log.GetLogger().Errorln("Failed to StartKdumpCheckTimer: ", err)
		} else {
			log.GetLogger().Infoln("Start StartKdumpCheckTimer")
		}

		if err := checkvirt.StartVirtIoVersionReport(); err != nil {
			log.GetLogger().Errorln("Failed to StartVirtIoVersionReport: " + err.Error())
		} else {
			log.GetLogger().Infoln("Start StartVirtIoVersionReport success")
		}

		// Start self kill monitor
		time.Sleep(time.Duration(3*60) * time.Second)
		log.GetLogger().Infoln("Start PerfMon ......")
		perfmon.StartSelfKillMon()
	})
}