in agent/perfmon/perfmon.go [143:211]
func checkCpuMemLoad(cpuUsage float64, memory int64) {
var _taskFactory *taskengine.TaskFactory = taskengine.GetTaskFactory()
if _taskFactory.IsAnyTaskRunning() || update.IsCPUIntensiveActionRunning() || taskengine.GetSessionFactory().IsAnyTaskRunning() { //没有任务执行时才监控性能
return
}
if statemanager.IsStateManagerTimerRunning() || statemanager.IsStateConfigTimerRunning() {
// 拉取并解析终态配置时、应用或监控终态配置时不监控性能
return
}
if cpuUsage >= flagging.GetResourceCpuLimit() {
cpu_overload_count += 1
go func(cpuUsageNow float64, cpuOverLoadCount int) {
var profileBuf bytes.Buffer
var cpuProfile, cpuProfileErr string
// pprof.StartCPUProfile will return err if profiling is already enabled.
if rand.Intn(10000) > 100 {
cpuProfileErr = "only sampe cpu profile with a probability of 1/100"
} else if err := pprof.StartCPUProfile(&profileBuf); err == nil {
time.Sleep(10 * time.Second)
pprof.StopCPUProfile()
cpuProfile = base64.StdEncoding.EncodeToString(profileBuf.Bytes())
} else {
cpuProfileErr = err.Error()
}
metrics.GetCpuOverloadEvent(
"cpu", fmt.Sprintf("%.2f", cpuUsageNow),
"info", fmt.Sprintf("CPU Overload... CPU=%.2f", cpuUsageNow),
"count", strconv.Itoa(cpuOverLoadCount),
"cpuProfile", cpuProfile,
"cpuProfileErr", cpuProfileErr,
).ReportEvent()
}(cpuUsage, cpu_overload_count)
log.GetLogger().Infoln("CPU Overload... CPU=", cpuUsage)
} else {
cpu_overload_count = 0
}
if memory >= flagging.GetResourceMemLimit() {
// 上报memStats
mem_overload_count += 1
memStats := &runtime.MemStats{}
runtime.ReadMemStats(memStats)
metrics.GetMemOverloadEvent(
"mem", fmt.Sprintf("%d", memory),
"info", fmt.Sprintf("Memory Overload... MEM=%d", memory),
"count", strconv.Itoa(mem_overload_count),
"HeapAlloc", strconv.FormatUint(memStats.HeapAlloc, 10),
"HeapIdle", strconv.FormatUint(memStats.HeapIdle, 10),
"HeapInuse", strconv.FormatUint(memStats.HeapInuse, 10),
"HeapReleased", strconv.FormatUint(memStats.HeapReleased, 10),
"StackInuse", strconv.FormatUint(memStats.StackInuse, 10),
).ReportEvent()
log.GetLogger().Infoln("Memory Overload... MEM=", memory)
} else {
mem_overload_count = 0
}
limit := int(flagging.GetResourceOverloadLimit())
if cpu_overload_count >= limit {
cpu_overload_count = reachCpuOverloadLimit(cpu_overload_count, cpuUsage)
}
if mem_overload_count >= limit {
report := clientreport.ClientReport{
ReportType: "self_kill",
Info: fmt.Sprintf("mem=%f", float64(memory)),
}
clientreport.SendReport(report)
log.GetLogger().Fatalln("self kill for Memory Overload... Mem=", memory)
}
}