core/monitor/Monitor.cpp (550 lines of code) (raw):
// Copyright 2022 iLogtail Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "Monitor.h"
#include "MetricRecord.h"
#if defined(__linux__)
#include <asm/param.h>
#include <unistd.h>
#elif defined(_MSC_VER)
#include <Psapi.h>
#endif
#include <fstream>
#include <functional>
#include "app_config/AppConfig.h"
#include "application/Application.h"
#include "collection_pipeline/CollectionPipelineManager.h"
#include "common/DevInode.h"
#include "common/ExceptionBase.h"
#include "common/LogtailCommonFlags.h"
#include "common/MachineInfoUtil.h"
#include "common/RuntimeUtil.h"
#include "common/StringTools.h"
#include "common/TimeUtil.h"
#include "common/version.h"
#include "constants/Constants.h"
#include "file_server/event_handler/LogInput.h"
#include "go_pipeline/LogtailPlugin.h"
#include "logger/Logger.h"
#include "monitor/AlarmManager.h"
#include "monitor/SelfMonitorServer.h"
#include "plugin/flusher/sls/FlusherSLS.h"
#include "protobuf/sls/sls_logs.pb.h"
#include "provider/Provider.h"
#include "runner/FlusherRunner.h"
#ifdef __ENTERPRISE__
#include "config/provider/EnterpriseConfigProvider.h"
#endif
using namespace std;
using namespace sls_logs;
DEFINE_FLAG_BOOL(logtail_dump_monitor_info, "enable to dump Logtail monitor info (CPU, mem)", false);
DECLARE_FLAG_BOOL(check_profile_region);
namespace logtail {
string LoongCollectorMonitor::mHostname;
string LoongCollectorMonitor::mIpAddr;
string LoongCollectorMonitor::mOsDetail;
string LoongCollectorMonitor::mUsername;
int32_t LoongCollectorMonitor::mSystemBootTime = -1;
string LoongCollectorMonitor::mStartTime;
inline void CpuStat::Reset() {
#if defined(__linux__)
mUserTime = 0;
mSysTime = 0;
mSysTotalTime = GetCurrentTimeInMilliSeconds();
#elif defined(_MSC_VER)
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
mNumProcessors = sysInfo.dwNumberOfProcessors;
FILETIME ftime, fsys, fuser;
GetSystemTimeAsFileTime(&ftime);
memcpy(&mLastCPU, &ftime, sizeof(FILETIME));
mSelf = GetCurrentProcess();
GetProcessTimes(mSelf, &ftime, &ftime, &fsys, &fuser);
memcpy(&mLastSysCPU, &fsys, sizeof(FILETIME));
memcpy(&mLastUserCPU, &fuser, sizeof(FILETIME));
#endif
mViolateNum = 0;
mCpuUsage = 0;
}
LogtailMonitor::LogtailMonitor() = default;
LogtailMonitor* LogtailMonitor::GetInstance() {
static LogtailMonitor instance;
return &instance;
}
bool LogtailMonitor::Init() {
mScaledCpuUsageUpLimit = AppConfig::GetInstance()->GetCpuUsageUpLimit();
mStatusCount = 0;
mShouldSuicide.store(false);
// Reset process and realtime CPU statistics.
mCpuStat.Reset();
mRealtimeCpuStat.Reset();
// Reset memory statistics.
mMemStat.Reset();
#if defined(__linux__)
// Reset OS CPU statistics.
CalCpuCores();
mScaledCpuUsageStep = 0.1;
mOsCpuStatForScale.Reset();
CalOsCpuStat();
for (int32_t i = 0; i < CPU_STAT_FOR_SCALE_ARRAY_SIZE; ++i) {
mCpuArrayForScale[i] = 0;
mOsCpuArrayForScale[i] = mOsCpuStatForScale.mOsCpuUsage;
}
mCpuArrayForScaleIdx = 0;
#endif
// Initialize monitor thread.
mThreadRes = async(launch::async, &LogtailMonitor::Monitor, this);
return true;
}
void LogtailMonitor::Stop() {
{
lock_guard<mutex> lock(mThreadRunningMux);
mIsThreadRunning = false;
}
mStopCV.notify_one();
if (!mThreadRes.valid()) {
return;
}
future_status s = mThreadRes.wait_for(chrono::seconds(1));
if (s == future_status::ready || mShouldSuicide.load()) {
LOG_INFO(sLogger, ("profiling", "stopped successfully"));
} else {
LOG_WARNING(sLogger, ("profiling", "forced to stopped"));
}
}
void LogtailMonitor::Monitor() {
LOG_INFO(sLogger, ("profiling", "started"));
int32_t lastMonitorTime = time(NULL), lastCheckHardLimitTime = time(nullptr);
CpuStat curCpuStat;
{
unique_lock<mutex> lock(mThreadRunningMux);
while (mIsThreadRunning) {
if (mStopCV.wait_for(lock, std::chrono::seconds(1), [this]() { return !mIsThreadRunning; })) {
break;
}
GetCpuStat(curCpuStat);
// Update mRealtimeCpuStat for InputFlowControl.
if (AppConfig::GetInstance()->IsInputFlowControl()) {
CalCpuStat(curCpuStat, mRealtimeCpuStat);
}
int32_t monitorTime = time(NULL);
#if defined(__linux__) // TODO: Add auto scale support for Windows.
// Update related CPU statistics for controlling resource auto scale (Linux only).
if (AppConfig::GetInstance()->IsResourceAutoScale()) {
CalCpuStat(curCpuStat, mCpuStatForScale);
CalOsCpuStat();
mCpuArrayForScale[mCpuArrayForScaleIdx % CPU_STAT_FOR_SCALE_ARRAY_SIZE] = mCpuStatForScale.mCpuUsage;
mOsCpuArrayForScale[mCpuArrayForScaleIdx % CPU_STAT_FOR_SCALE_ARRAY_SIZE]
= mOsCpuStatForScale.mOsCpuUsage;
++mCpuArrayForScaleIdx;
CheckScaledCpuUsageUpLimit();
LOG_DEBUG(sLogger,
("mCpuStatForScale", mCpuStatForScale.mCpuUsage)("mOsCpuStatForScale",
mOsCpuStatForScale.mOsCpuUsage));
}
#endif
static int32_t checkHardLimitInterval
= INT32_FLAG(monitor_interval) > 30 ? INT32_FLAG(monitor_interval) / 6 : 5;
if ((monitorTime - lastCheckHardLimitTime) >= checkHardLimitInterval) {
lastCheckHardLimitTime = monitorTime;
GetMemStat();
LoongCollectorMonitor::GetInstance()->SetAgentMemory(mMemStat.mRss);
CalCpuStat(curCpuStat, mCpuStat);
LoongCollectorMonitor::GetInstance()->SetAgentCpu(mCpuStat.mCpuUsage);
if (CheckHardMemLimit()) {
LOG_ERROR(sLogger,
("Resource used by program exceeds hard limit",
"prepare restart Logtail")("mem_rss", mMemStat.mRss));
mShouldSuicide.store(true);
break;
}
}
// Update statistics and send to logtail_status_profile regularly.
// If CPU or memory limit triggered, send to logtail_suicide_profile.
if ((monitorTime - lastMonitorTime) >= INT32_FLAG(monitor_interval)) {
lastMonitorTime = monitorTime;
// Memory usage has exceeded limit, try to free some timeout objects.
if (1 == mMemStat.mViolateNum) {
LOG_DEBUG(sLogger, ("Memory is upper limit", "run gabbage collection."));
LogInput::GetInstance()->SetForceClearFlag(true);
}
// CalCpuLimit and CalMemLimit will check if the number of violation (CPU
// or memory exceeds limit) // is greater or equal than limits (
// flag(cpu_limit_num) and flag(mem_limit_num)).
// Returning true means too much violations, so we have to prepare to restart
// logtail to release resource.
// Mainly for controlling memory because we have no idea to descrease memory usage.
if (CheckSoftCpuLimit() || CheckSoftMemLimit()) {
LOG_ERROR(sLogger,
("Resource used by program exceeds upper limit for some time",
"prepare restart Logtail")("cpu_usage", mCpuStat.mCpuUsage)("mem_rss", mMemStat.mRss));
mShouldSuicide.store(true);
break;
}
if (IsHostIpChanged()) {
mShouldSuicide.store(true);
break;
}
SendStatusProfile(false);
if (BOOL_FLAG(logtail_dump_monitor_info)) {
if (!DumpMonitorInfo(monitorTime))
LOG_ERROR(sLogger, ("Fail to dump monitor info", ""));
}
}
}
}
if (mShouldSuicide.load()) {
Suicide();
}
}
bool LogtailMonitor::SendStatusProfile(bool suicide) {
mStatusCount++;
if (!suicide && mStatusCount % 2 != 0)
return false;
auto now = GetCurrentLogtailTime();
// Check input thread.
int32_t lastReadEventTime = LogInput::GetInstance()->GetLastReadEventTime();
if (lastReadEventTime > 0
&& (now.tv_sec - lastReadEventTime > AppConfig::GetInstance()->GetForceQuitReadTimeout())) {
LOG_ERROR(sLogger, ("last read event time is too old", lastReadEventTime)("prepare force exit", ""));
AlarmManager::GetInstance()->SendAlarm(
LOGTAIL_CRASH_ALARM, "last read event time is too old: " + ToString(lastReadEventTime) + " force exit");
AlarmManager::GetInstance()->ForceToSend();
sleep(10);
_exit(1);
}
return mIsThreadRunning;
}
bool LogtailMonitor::GetMemStat() {
#if defined(__linux__)
const char* SELF_STATM_PATH = "/proc/self/statm";
std::ifstream fin;
fin.open(SELF_STATM_PATH);
if (!fin) {
LOG_ERROR(sLogger, ("open stat error", ""));
return false;
}
fin.ignore(100, ' ');
fin >> mMemStat.mRss;
uint32_t pagesize = getpagesize();
pagesize /= 1024; // page size in kb
mMemStat.mRss *= pagesize;
mMemStat.mRss /= 1024; // rss in mb
fin.close();
return true;
#elif defined(_MSC_VER)
PROCESS_MEMORY_COUNTERS pmc;
GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
mMemStat.mRss = pmc.WorkingSetSize / 1024 / 1024;
return true;
#endif
}
// Linux: get from /proc/self/stat.
// Windows: call GetSystemTimeAsFileTime to get global CPU time and
// call GetProcessTimes to get CPU times of logtail process.
// NOTE: for Linux, single core CPU usage is returned, but for Windows,
// it's whole CPU usage.
bool LogtailMonitor::GetCpuStat(CpuStat& cur) {
#if defined(__linux__)
const char* SELF_STAT_PATH = "/proc/self/stat";
std::ifstream fin;
fin.open(SELF_STAT_PATH);
uint64_t start = GetCurrentTimeInMilliSeconds();
if (!fin) {
LOG_ERROR(sLogger, ("open stat error", ""));
return false;
}
for (uint32_t i = 0; i < 13; ++i) {
fin.ignore(100, ' ');
}
fin >> cur.mUserTime;
fin >> cur.mSysTime;
fin.close();
uint64_t end = GetCurrentTimeInMilliSeconds();
cur.mSysTotalTime = (start + end) / 2;
return true;
#elif defined(_MSC_VER)
FILETIME ftime, fsys, fuser;
ULARGE_INTEGER now, sys, user;
GetSystemTimeAsFileTime(&ftime);
memcpy(&now, &ftime, sizeof(FILETIME));
GetProcessTimes(cur.mSelf, &ftime, &ftime, &fsys, &fuser);
memcpy(&sys, &fsys, sizeof(FILETIME));
memcpy(&user, &fuser, sizeof(FILETIME));
float percent = (sys.QuadPart - cur.mLastSysCPU.QuadPart) + (user.QuadPart - cur.mLastUserCPU.QuadPart);
percent /= (now.QuadPart - cur.mLastCPU.QuadPart);
// percent /= mCPUStat.mNumProcessors;//compute single core cpu util, as Linux logtail
cur.mLastCPU = now;
cur.mLastUserCPU = user;
cur.mLastSysCPU = sys;
cur.mCpuUsage = percent;
return true;
#endif
}
void LogtailMonitor::CalCpuStat(const CpuStat& curCpu, CpuStat& savedCpu) {
#if defined(__linux__)
const float MILLI_TICK_PER_SEC = 1000.0;
int64_t delta = curCpu.mSysTotalTime - savedCpu.mSysTotalTime;
if (delta == 0) {
return;
}
savedCpu.mCpuUsage = (curCpu.mUserTime + curCpu.mSysTime - savedCpu.mUserTime - savedCpu.mSysTime) * 1.0 / HZ
/ (delta / MILLI_TICK_PER_SEC);
savedCpu.mUserTime = curCpu.mUserTime;
savedCpu.mSysTime = curCpu.mSysTime;
savedCpu.mSysTotalTime = curCpu.mSysTotalTime;
#elif defined(_MSC_VER)
float percent = (curCpu.mLastSysCPU.QuadPart - savedCpu.mLastSysCPU.QuadPart)
+ (curCpu.mLastUserCPU.QuadPart - savedCpu.mLastUserCPU.QuadPart);
percent /= (curCpu.mLastCPU.QuadPart - savedCpu.mLastCPU.QuadPart);
savedCpu.mCpuUsage = percent;
savedCpu.mLastCPU = curCpu.mLastCPU;
savedCpu.mLastSysCPU = curCpu.mLastSysCPU;
savedCpu.mLastUserCPU = curCpu.mLastUserCPU;
#endif
}
bool LogtailMonitor::CheckSoftCpuLimit() {
float cpuUsageLimit = AppConfig::GetInstance()->IsResourceAutoScale()
? AppConfig::GetInstance()->GetScaledCpuUsageUpLimit()
: AppConfig::GetInstance()->GetCpuUsageUpLimit();
if (cpuUsageLimit < mCpuStat.mCpuUsage) {
if (++mCpuStat.mViolateNum > INT32_FLAG(cpu_limit_num))
return true;
} else
mCpuStat.mViolateNum = 0;
return false;
}
bool LogtailMonitor::CheckSoftMemLimit() {
if (mMemStat.mRss > AppConfig::GetInstance()->GetMemUsageUpLimit()) {
if (++mMemStat.mViolateNum > INT32_FLAG(mem_limit_num))
return true;
} else
mMemStat.mViolateNum = 0;
return false;
}
bool LogtailMonitor::CheckHardMemLimit() {
return mMemStat.mRss > 5 * AppConfig::GetInstance()->GetMemUsageUpLimit();
}
bool LogtailMonitor::DumpMonitorInfo(time_t monitorTime) {
string path = GetAgentLogDir() + GetMonitorInfoFileName();
ofstream outfile(path.c_str(), ofstream::app);
if (!outfile)
return false;
outfile << "time:" << monitorTime << "\t";
outfile << "cpu_usage:" << mCpuStat.mCpuUsage << "\t";
outfile << "mem_rss:" << mMemStat.mRss << "\n";
return true;
}
bool LogtailMonitor::IsHostIpChanged() {
if (AppConfig::GetInstance()->GetConfigIP().empty()) {
const std::string& interface = AppConfig::GetInstance()->GetBindInterface();
std::string ip = GetHostIp();
if (interface.size() > 0) {
ip = GetHostIp(interface);
}
if (ip.empty()) {
ip = GetAnyAvailableIP();
}
if (ip != LoongCollectorMonitor::mIpAddr) {
LOG_ERROR(sLogger,
("error", "host ip changed during running, prepare to restart Logtail")(
"original ip", LoongCollectorMonitor::mIpAddr)("current ip", ip));
return true;
}
return false;
}
return false;
}
void LogtailMonitor::Suicide() {
SendStatusProfile(true);
Application::GetInstance()->SetSigTermSignalFlag(true);
sleep(60);
_exit(1);
}
#if defined(__linux__) // Linux only methods, for scale up calculation, load average.
static const char* PROC_STAT_PATH = "/proc/stat";
std::string LogtailMonitor::GetLoadAvg() {
const char* PROC_LOAD_PATH = "/proc/loadavg";
std::ifstream fin;
std::string loadStr;
fin.open(PROC_LOAD_PATH);
if (!fin) {
LOG_ERROR(sLogger, ("open load error", ""));
return loadStr;
}
std::getline(fin, loadStr);
fin.close();
return loadStr;
}
uint32_t LogtailMonitor::GetCpuCores() {
if (!CalCpuCores()) {
return 0;
}
return mCpuCores;
}
// Get the number of cores in CPU.
bool LogtailMonitor::CalCpuCores() {
ifstream fin;
fin.open(PROC_STAT_PATH);
if (fin.fail()) {
LOG_ERROR(sLogger, ("get count of cpu cores fail, can't open file", PROC_STAT_PATH));
mCpuCores = 1;
return false;
}
char buf[2048];
string id;
mCpuCores = 0;
while (true) {
fin >> id;
fin.getline(buf, 2048);
if (id.find("cpu") != 0)
break;
if (id != "cpu")
++mCpuCores;
if (fin.eof())
break;
}
fin.close();
if (mCpuCores == 0) {
LOG_ERROR(sLogger, ("get count of cpu cores fail, can't parse file", PROC_STAT_PATH));
mCpuCores = 1;
return false;
}
LOG_INFO(sLogger, ("machine cpu cores", mCpuCores));
return true;
}
// Use mCpuArrayForScale and mOsCpuArrayForScale to calculate if ilogtail can scale up
// to use more CPU or scale down.
void LogtailMonitor::CheckScaledCpuUsageUpLimit() {
// flag(cpu_usage_up_limit) or cpu_usage_limit in loongcollector_config.json.
float cpuUsageUpLimit = AppConfig::GetInstance()->GetCpuUsageUpLimit();
// flag(machine_cpu_usage_threshold) or same name in loongcollector_config.json.
float machineCpuUsageThreshold = AppConfig::GetInstance()->GetMachineCpuUsageThreshold();
// mScaledCpuUsageUpLimit is greater or equal than cpuUsageUpLimit.
// It will be increased when Monitor finds the global CPU usage is low, which means
// Logtail can use more CPU.
// It will be descreasd when Monitor finds the global CPU usage is greater than
// machineCpuUsageThreshold specified.
// mScaledCpuUsageStep is used to control step for increasing and descreasing,
// 0.1 by default.
if (mOsCpuStatForScale.mOsCpuUsage >= machineCpuUsageThreshold) {
if ((mScaledCpuUsageUpLimit - mScaledCpuUsageStep) < cpuUsageUpLimit)
mScaledCpuUsageUpLimit = cpuUsageUpLimit;
else
mScaledCpuUsageUpLimit -= mScaledCpuUsageStep;
LOG_DEBUG(sLogger,
("os cpu usage", mOsCpuStatForScale.mOsCpuUsage)("desc mScaledCpuUsageUpLimit to value",
mScaledCpuUsageUpLimit));
return;
}
// If the global CPU usage is less than machineCpuUsageThreshold specifed,
// we can scale up the mScaledCpuUsageUpLimit.
// Maximum: mCpuCores * machineCpuUsageThreshold.
// If both of latest two CPU status (stored in ArrayForScale) can not satisfy,
// we can not scale up, otherwise, we can increase mScaledCpuUsageUpLimit by
// mScaledCpuUsageStep.
if (mCpuArrayForScaleIdx % CPU_STAT_FOR_SCALE_ARRAY_SIZE == 0) {
if ((mScaledCpuUsageUpLimit + mScaledCpuUsageStep) >= (mCpuCores * machineCpuUsageThreshold))
return;
for (int32_t i = 0; i < CPU_STAT_FOR_SCALE_ARRAY_SIZE; ++i) {
if ((mOsCpuArrayForScale[i] / machineCpuUsageThreshold) >= 0.95
|| (mCpuArrayForScale[i] / mScaledCpuUsageUpLimit) < 0.6)
return;
}
mScaledCpuUsageUpLimit += mScaledCpuUsageStep;
LOG_DEBUG(sLogger,
("os cpu usage", mOsCpuStatForScale.mOsCpuUsage)("inc mScaledCpuUsageUpLimit to value",
mScaledCpuUsageUpLimit));
}
}
bool LogtailMonitor::CalOsCpuStat() {
#if defined(__linux__)
ifstream fin;
fin.open(PROC_STAT_PATH);
if (fin.fail()) {
LOG_ERROR(sLogger, ("CalOsCpuStat fail, can't open file", PROC_STAT_PATH));
mOsCpuStatForScale.mOsCpuUsage = 0.5;
return false;
}
static char buf[2048];
string id;
int64_t user, nice, system, idle, iowait, irq, softirq;
fin >> id;
fin >> user;
fin >> nice;
fin >> system;
fin >> idle;
fin >> iowait;
fin >> irq;
fin >> softirq;
fin.getline(buf, 2048);
fin.close();
int64_t total = user + nice + system + idle + iowait + irq + softirq;
int64_t noIdle = user + nice + system + irq + softirq;
int64_t totalDelta = total - mOsCpuStatForScale.mTotal;
int64_t noIdleDelta = noIdle - mOsCpuStatForScale.mNoIdle;
mOsCpuStatForScale.mTotal = total;
mOsCpuStatForScale.mNoIdle = noIdle;
LOG_DEBUG(sLogger,
("cpu", mOsCpuStatForScale.mOsCpuUsage)("id", id)("user", user)("nice", nice)("system", system)(
"idle", idle)("iowait", iowait)("irq", irq)("softirq", softirq));
if (totalDelta <= 0 || noIdleDelta < 0)
return false;
else {
mOsCpuStatForScale.mOsCpuUsage = 1.0 * noIdleDelta / totalDelta;
return true;
}
#elif defined(_MSC_VER)
FILETIME idleTime, kernelTime, userTime;
if (!GetSystemTimes(&idleTime, &kernelTime, &userTime)) {
LOG_ERROR(sLogger, ("CalOsCpuStat fail, GetSystemTimes failed", GetLastError()));
mOsCpuStatForScale.mOsCpuUsage = 0.5;
return false;
}
ULARGE_INTEGER idle, kernel, user;
memcpy(&idle, &idleTime, sizeof(FILETIME));
memcpy(&kernel, &kernelTime, sizeof(FILETIME));
memcpy(&user, &userTime, sizeof(FILETIME));
if (mOsCpuStatForScale.mNoIdle > 0 && mOsCpuStatForScale.mTotal > 0) {
mOsCpuStatForScale.mOsCpuUsage = (kernel.QuadPart + user.QuadPart - mOsCpuStatForScale.mNoIdle)
/ (idle.QuadPart + user.QuadPart + kernel.QuadPart - mOsCpuStatForScale.mTotal);
}
mOsCpuStatForScale.mNoIdle = kernel.QuadPart + user.QuadPart;
mOsCpuStatForScale.mTotal = mOsCpuStatForScale.mNoIdle + idle.QuadPart;
return true;
#endif
}
#endif
LoongCollectorMonitor* LoongCollectorMonitor::GetInstance() {
static LoongCollectorMonitor* instance = new LoongCollectorMonitor();
return instance;
}
LoongCollectorMonitor::LoongCollectorMonitor() {
mHostname = GetHostName();
#if defined(_MSC_VER)
mHostname = EncodingConverter::GetInstance()->FromACPToUTF8(mHostname);
#endif
mIpAddr = GetHostIp();
mOsDetail = GetOsDetail();
mUsername = GetUsername();
}
LoongCollectorMonitor::~LoongCollectorMonitor() {
}
void LoongCollectorMonitor::Init() {
LOG_INFO(sLogger, ("LoongCollector monitor", "started"));
SelfMonitorServer::GetInstance()->Init();
// create metric record
MetricLabels labels;
labels.emplace_back(METRIC_LABEL_KEY_INSTANCE_ID, Application::GetInstance()->GetInstanceId());
labels.emplace_back(METRIC_LABEL_KEY_START_TIME, mStartTime);
labels.emplace_back(METRIC_LABEL_KEY_HOSTNAME, mHostname);
labels.emplace_back(METRIC_LABEL_KEY_OS, OS_NAME);
labels.emplace_back(METRIC_LABEL_KEY_OS_DETAIL, mOsDetail);
labels.emplace_back(METRIC_LABEL_KEY_UUID, Application::GetInstance()->GetUUID());
labels.emplace_back(METRIC_LABEL_KEY_VERSION, ILOGTAIL_VERSION);
DynamicMetricLabels dynamicLabels;
dynamicLabels.emplace_back(METRIC_LABEL_KEY_PROJECT, []() -> std::string { return FlusherSLS::GetAllProjects(); });
#ifdef __ENTERPRISE__
dynamicLabels.emplace_back(METRIC_LABEL_KEY_ALIUIDS,
[]() -> std::string { return EnterpriseConfigProvider::GetInstance()->GetAliuidSet(); });
dynamicLabels.emplace_back(METRIC_LABEL_KEY_USER_DEFINED_ID, []() -> std::string {
return EnterpriseConfigProvider::GetInstance()->GetUserDefinedIdSet();
});
#endif
WriteMetrics::GetInstance()->PrepareMetricsRecordRef(
mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_AGENT, std::move(labels), std::move(dynamicLabels));
// init value
mAgentCpu = mMetricsRecordRef.CreateDoubleGauge(METRIC_AGENT_CPU);
mAgentMemory = mMetricsRecordRef.CreateIntGauge(METRIC_AGENT_MEMORY);
mAgentGoMemory = mMetricsRecordRef.CreateIntGauge(METRIC_AGENT_MEMORY_GO);
mAgentGoRoutinesTotal = mMetricsRecordRef.CreateIntGauge(METRIC_AGENT_GO_ROUTINES_TOTAL);
mAgentOpenFdTotal = mMetricsRecordRef.CreateIntGauge(METRIC_AGENT_OPEN_FD_TOTAL);
mAgentConfigTotal = mMetricsRecordRef.CreateIntGauge(METRIC_AGENT_PIPELINE_CONFIG_TOTAL);
}
void LoongCollectorMonitor::Stop() {
SelfMonitorServer::GetInstance()->Stop();
LOG_INFO(sLogger, ("LoongCollector monitor", "stopped successfully"));
}
bool LoongCollectorMonitor::GetAgentMetric(SelfMonitorMetricEvent& event) {
lock_guard<mutex> lock(mGlobalMetricsMux);
event = mGlobalMetrics.mAgentMetric;
return true;
}
void LoongCollectorMonitor::SetAgentMetric(const SelfMonitorMetricEvent& event) {
lock_guard<mutex> lock(mGlobalMetricsMux);
mGlobalMetrics.mAgentMetric = event;
}
bool LoongCollectorMonitor::GetRunnerMetric(const std::string& runnerName, SelfMonitorMetricEvent& event) {
if (runnerName.empty()) {
return false;
}
lock_guard<mutex> lock(mGlobalMetricsMux);
if (mGlobalMetrics.mRunnerMetrics.find(runnerName) != mGlobalMetrics.mRunnerMetrics.end()) {
event = mGlobalMetrics.mRunnerMetrics[runnerName];
return true;
}
return false;
}
void LoongCollectorMonitor::SetRunnerMetric(const std::string& runnerName, const SelfMonitorMetricEvent& event) {
if (runnerName.empty()) {
return;
}
lock_guard<mutex> lock(mGlobalMetricsMux);
mGlobalMetrics.mRunnerMetrics[runnerName] = event;
}
} // namespace logtail