core/common/ProcParser.cpp (473 lines of code) (raw):
// Copyright 2025 iLogtail Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <charconv>
#include <climits>
#include <coolbpf/security/bpf_process_event_type.h>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include "FileSystemUtil.h"
#include "StringView.h"
#include "common/TimeUtil.h"
#if defined(__linux__)
#include <pwd.h>
#endif
#include "Logger.h"
#include "ProcParser.h"
#include "common/StringTools.h"
namespace logtail {
std::filesystem::path ProcParser::procPidPath(uint32_t pid, const std::string& subpath) const {
return mProcPath / std::to_string(pid) / subpath;
}
std::string ProcParser::readPidLink(uint32_t pid, const std::string& filename) const {
const auto fpath = procPidPath(pid, filename);
std::error_code ec;
std::string netStr = std::filesystem::read_symlink(fpath, ec).string();
if (ec) {
LOG_DEBUG(sLogger, ("[ReadPIDLink] failed pid", pid)("filename", filename)("e", ec.message()));
return "";
}
return netStr;
}
std::string ProcParser::readPidFile(uint32_t pid, const std::string& filename) const {
std::filesystem::path fpath = mProcPath / std::to_string(pid) / filename;
std::string content;
if (FileReadResult::kOK != ReadFileContent(fpath, content)) {
return "";
}
return content;
}
bool ProcParser::ParseProc(uint32_t pid, Proc& proc) const {
proc.pid = pid;
proc.tid = pid;
proc.cmdline = GetPIDCmdline(pid);
proc.comm = GetPIDComm(pid);
proc.exe = GetPIDExePath(pid);
proc.flags = GetPIDCWD(pid, proc.cwd);
proc.flags |= static_cast<uint32_t>(EVENT_PROCFS | EVENT_NEEDS_CWD | EVENT_NEEDS_AUID);
ProcessStat stats;
if (!ReadProcessStat(pid, stats)) {
LOG_WARNING(sLogger, ("GetProcStatStrings", "failed"));
return false;
}
proc.ppid = stats.parentPid;
proc.ktime = GetStatsKtime(stats);
ProcessStatus status;
if (!ReadProcessStatus(pid, status)) {
LOG_WARNING(sLogger, ("GetStatus failed", "failed"));
return false;
}
proc.realUid = status.realUid;
proc.effectiveUid = status.effectiveUid;
proc.savedUid = status.savedUid;
proc.fsUid = status.fsUid;
proc.realGid = status.realGid;
proc.effectiveGid = status.effectiveGid;
proc.savedGid = status.savedGid;
proc.fsGid = status.fsGid;
proc.nspid = status.nstgid[0];
proc.permitted = status.capPrm;
proc.effective = status.capEff;
proc.inheritable = status.capInh;
proc.auid = GetLoginUid(pid);
proc.uts_ns = GetPIDNsInode(pid, "uts");
proc.ipc_ns = GetPIDNsInode(pid, "ipc");
proc.mnt_ns = GetPIDNsInode(pid, "mnt");
proc.pid_ns = GetPIDNsInode(pid, "pid");
proc.pid_for_children_ns = GetPIDNsInode(pid, "pid_for_children");
proc.net_ns = GetPIDNsInode(pid, "net");
proc.cgroup_ns = GetPIDNsInode(pid, "cgroup");
proc.user_ns = GetPIDNsInode(pid, "user");
proc.time_ns = GetPIDNsInode(pid, "time");
proc.time_for_children_ns = GetPIDNsInode(pid, "time_for_children");
GetPIDDockerId(pid, proc.container_id);
if (proc.container_id.empty()) {
proc.nspid = 0;
}
if (proc.ppid) {
// proc.pcmdline = GetPIDCmdline(proc.ppid);
// auto parentComm = GetPIDComm(proc.ppid);
ProcessStat parentStats;
ReadProcessStat(proc.ppid, parentStats);
proc.pktime = GetStatsKtime(parentStats);
// proc.pexe = GetPIDExePath(proc.ppid);
// auto [pnspid, ppermitted, peffective, pinheritable] = GetPIDCaps(proc.ppid);
// std::string pDockerId = GetPIDDockerId(proc.ppid);
// if (pDockerId.empty()) {
// pnspid = 0;
// }
// proc.pnspid = pnspid;
// proc.pflags = static_cast<uint32_t>(EVENT_PROCFS | EVENT_NEEDS_CWD | EVENT_NEEDS_AUID);
}
return true;
}
std::string ProcParser::GetPIDCmdline(uint32_t pid) const {
return readPidFile(pid, "cmdline");
}
std::string ProcParser::GetPIDComm(uint32_t pid) const {
return readPidFile(pid, "comm");
}
std::string ProcParser::GetPIDEnviron(uint32_t pid) const {
return readPidFile(pid, "environ");
}
bool ProcParser::isValidContainerId(const StringView& id) {
// 检查长度是否匹配
if (id.size() != kContainerIdLength) {
return false;
}
// 这里假设合法 container id 只包含十六进制字符(即 0-9 和 a-f / A-F)
for (char ch : id) {
if (!((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))) {
return false;
}
}
return true;
}
int ProcParser::lookupContainerId(const StringView& cgroupline, StringView& containerId) {
if (cgroupline.length() <= kContainerIdLength || cgroupline.find(':') == std::string::npos
|| (cgroupline.find("pods") == std::string::npos && cgroupline.find("docker") == std::string::npos
&& cgroupline.find("containerd") == std::string::npos && cgroupline.find("libpod") == std::string::npos
&& cgroupline.find("lxc") == std::string::npos && cgroupline.find("podman") == std::string::npos
&& cgroupline.find("cri-") == std::string::npos)) {
containerId = kEmptyStringView;
return -1;
}
auto lastSlash = cgroupline.rfind('/');
if (lastSlash != std::string::npos && lastSlash + 1 < cgroupline.size()) {
auto lastSegment = cgroupline.substr(lastSlash + 1);
auto potentialId = lastSegment;
auto lastDash = potentialId.rfind('-');
if (lastDash != std::string::npos && lastDash + 1 < potentialId.size()) {
potentialId = potentialId.substr(lastDash + 1);
}
// 如果末尾有".scope"则去除
if (potentialId.size() > kContainerIdLength && potentialId.find("scope") != std::string::npos) {
potentialId = potentialId.substr(0, kContainerIdLength);
}
if (isValidContainerId(potentialId)) {
containerId = potentialId;
return containerId.data() - lastSegment.data();
}
}
containerId = kEmptyStringView;
return -1;
}
int ProcParser::GetContainerId(const std::string& cgroupPath, std::string& containerId) {
std::string content;
if (FileReadResult::kOK != ReadFileContent(cgroupPath, content)) {
LOG_DEBUG(sLogger, ("Failed to read cgroup file", cgroupPath));
containerId.clear();
return -1;
}
StringViewSplitter splitter(content, "\n");
for (const auto& line : splitter) {
LOG_DEBUG(sLogger, ("cgroup line", line.to_string()));
StringView containerIdView;
int offset = lookupContainerId(line, containerIdView);
if (offset >= 0) {
LOG_DEBUG(sLogger, ("Found container ID using lookup", containerIdView)("offset", offset));
containerId = containerIdView.to_string();
return offset;
}
}
LOG_DEBUG(sLogger, ("No valid container ID found in cgroup file", cgroupPath));
containerId.clear();
return -1;
}
int ProcParser::GetPIDDockerId(uint32_t pid, std::string& containerId) const {
std::filesystem::path fpath = mProcPath / std::to_string(pid) / "cgroup";
return GetContainerId(fpath, containerId);
}
std::string ProcParser::GetPIDExePath(uint32_t pid) const {
return readPidLink(pid, "exe");
}
uint32_t ProcParser::GetPIDCWD(uint32_t pid, std::string& cwd) const {
cwd.clear();
uint32_t flags = EVENT_UNKNOWN;
if (pid == 0) {
return flags;
}
try {
cwd = readPidLink(pid, "cwd");
if (cwd == "/") {
flags |= EVENT_ROOT_CWD;
}
return flags;
} catch (const std::filesystem::filesystem_error&) { // possibly kernel thread
flags |= EVENT_ROOT_CWD | EVENT_ERROR_CWD;
return flags;
}
}
std::string ProcParser::GetUserNameByUid(uid_t uid) {
static std::string sEmpty;
#if defined(__linux__)
thread_local static std::unordered_map<uid_t, std::string> sUserNameCache;
auto it = sUserNameCache.find(uid);
if (it != sUserNameCache.end()) {
return it->second;
}
struct passwd pwd {};
struct passwd* result = nullptr;
char buf[8192]; // This buffer size is quite large. If it's still not enough, it's unusual and we return an empty
// result.
int ret = getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
if (ret == 0 && result) {
if (sUserNameCache.size() > 10000) { // If we have too many entries, reset the cache.
sUserNameCache.clear();
}
sUserNameCache[uid] = pwd.pw_name;
return sUserNameCache[uid];
}
return sEmpty;
#elif defined(_MSC_VER)
return sEmpty;
#endif
}
int64_t ProcParser::GetStatsKtime(ProcessStat& procStat) const {
return procStat.startTicks * kNanoPerSeconds / GetTicksPerSecond();
}
uint32_t ProcParser::GetPIDNsInode(uint32_t pid, const std::string& nsStr) const {
std::string pidStr = std::to_string(pid);
std::filesystem::path netns = std::filesystem::path(mProcPath) / pidStr / "ns" / nsStr;
std::error_code ec;
std::string netStr = std::filesystem::read_symlink(netns, ec).string();
if (ec) {
LOG_WARNING(sLogger, ("namespace", netns)("error", ec.message()));
return 0;
}
std::vector<std::string> fields = SplitString(netStr, ":");
if (fields.size() < 2) {
LOG_WARNING(sLogger, ("parsing namespace fields less than 2, net str ", netStr)("netns", netns));
return 0;
}
auto openPos = netStr.find('[');
auto closePos = netStr.find_last_of(']');
if (openPos == std::string::npos || closePos == std::string::npos || openPos + 1 >= closePos) {
LOG_WARNING(sLogger, ("Invalid NsInode: ", netStr));
return 0;
}
uint32_t inodeEntry = 0;
if (!StringTo(netStr.data() + openPos + 1, netStr.data() + closePos, inodeEntry)) {
LOG_WARNING(sLogger, ("Invalid NsInode: ", netStr));
return 0;
}
return inodeEntry;
}
uid_t ProcParser::GetLoginUid(uint32_t pid) const {
uid_t loginUid = 0;
std::string loginStr = readPidFile(pid, "loginuid");
if (!StringTo(loginStr, loginUid)) {
LOG_WARNING(sLogger, ("Invalid loginuid: ", loginStr));
}
return loginUid;
}
std::tuple<std::string, std::string> ProcParser::ProcsFilename(const std::string& args) {
std::string filename;
std::string cmds;
size_t idx = args.find('\0');
if (idx == std::string::npos) {
filename = args;
} else {
cmds = args.substr(idx);
filename = args.substr(0, idx);
}
return std::make_tuple(cmds, filename);
}
bool ProcParser::ReadProcessStat(pid_t pid, ProcessStat& ps) const {
LOG_DEBUG(sLogger, ("read process stat", pid));
auto processStat = mProcPath / std::to_string(pid) / "stat";
std::string line;
if (FileReadResult::kOK != ReadFileContent(processStat.string(), line)) {
LOG_ERROR(sLogger, ("read process stat", "fail")("file", processStat));
return false;
}
return ParseProcessStat(pid, line, ps);
}
// 数据样例: /proc/1/stat
// 1 (cat) R 0 1 1 34816 1 4194560 1110 0 0 0 1 1 0 0 20 0 1 0 18938584 4505600 171 18446744073709551615 4194304 4238788
// 140727020025920 0 0 0 0 0 0 0 0 0 17 3 0 0 0 0 0 6336016 6337300 21442560 140727020027760 140727020027777
// 140727020027777 140727020027887 0
bool ProcParser::ParseProcessStat(pid_t pid, const std::string& line, ProcessStat& ps) const {
ps.pid = pid;
auto nameStartPos = line.find_first_of('(');
auto nameEndPos = line.find_last_of(')');
if (nameStartPos == std::string::npos || nameEndPos == std::string::npos || nameStartPos >= nameEndPos) {
LOG_ERROR(sLogger, ("can't find process name", pid)("stat", line));
return false;
}
nameStartPos++; // 跳过左括号
ps.name = line.substr(nameStartPos, nameEndPos - nameStartPos);
StringView lineview = StringView(line).substr(nameEndPos + 2); // 跳过右括号及空格
std::array<StringView, size_t(EnumProcessStat::_count)> words{};
StringViewSplitter splitter(lineview, " ");
size_t i = 0;
for (const auto& word : splitter) {
if (i >= words.size()) {
break;
}
words[i++] = word;
}
constexpr const EnumProcessStat offset = EnumProcessStat::state; // 跳过pid, comm
constexpr const int minCount = EnumProcessStat::processor - offset + 1; // 37
if (words.size() < minCount) {
LOG_ERROR(sLogger, ("unexpected item count", pid)("stat", line));
return false;
}
if (!words[EnumProcessStat::state - offset].empty()) {
ps.state = words[EnumProcessStat::state - offset].front();
}
if (!StringTo(words[EnumProcessStat::ppid - offset], ps.parentPid)) {
LOG_WARNING(sLogger, ("Invalid ppid:", words[EnumProcessStat::ppid - offset]));
}
if (!StringTo(words[EnumProcessStat::tty_nr - offset], ps.tty)) {
LOG_WARNING(sLogger, ("Invalid tty_nr:", words[EnumProcessStat::tty_nr - offset]));
}
if (!StringTo(words[EnumProcessStat::minflt - offset], ps.minorFaults)) {
LOG_WARNING(sLogger, ("Invalid minflt:", words[EnumProcessStat::minflt - offset]));
}
if (!StringTo(words[EnumProcessStat::majflt - offset], ps.majorFaults)) {
LOG_WARNING(sLogger, ("Invalid majflt:", words[EnumProcessStat::majflt - offset]));
}
if (!StringTo(words[EnumProcessStat::utime - offset], ps.utimeTicks)) {
LOG_WARNING(sLogger, ("Invalid utime:", words[EnumProcessStat::utime - offset]));
}
if (!StringTo(words[EnumProcessStat::stime - offset], ps.stimeTicks)) {
LOG_WARNING(sLogger, ("Invalid stime:", words[EnumProcessStat::stime - offset]));
}
if (!StringTo(words[EnumProcessStat::cutime - offset], ps.cutimeTicks)) {
LOG_WARNING(sLogger, ("Invalid cutime:", words[EnumProcessStat::cutime - offset]));
}
if (!StringTo(words[EnumProcessStat::cstime - offset], ps.cstimeTicks)) {
LOG_WARNING(sLogger, ("Invalid cstime:", words[EnumProcessStat::cstime - offset]));
}
if (!StringTo(words[EnumProcessStat::priority - offset], ps.priority)) {
LOG_WARNING(sLogger, ("Invalid priority:", words[EnumProcessStat::priority - offset]));
}
if (!StringTo(words[EnumProcessStat::nice - offset], ps.nice)) {
LOG_WARNING(sLogger, ("Invalid nice:", words[EnumProcessStat::nice - offset]));
}
if (!StringTo(words[EnumProcessStat::num_threads - offset], ps.numThreads)) {
LOG_WARNING(sLogger, ("Invalid num_threads:", words[EnumProcessStat::num_threads - offset]));
}
if (!StringTo(words[EnumProcessStat::starttime - offset], ps.startTicks)) {
LOG_WARNING(sLogger, ("Invalid starttime:", words[EnumProcessStat::starttime - offset]));
}
if (!StringTo(words[EnumProcessStat::vsize - offset], ps.vSize)) {
LOG_WARNING(sLogger, ("Invalid vsize:", words[EnumProcessStat::vsize - offset]));
}
if (!StringTo(words[EnumProcessStat::rss - offset], ps.rss)) {
LOG_WARNING(sLogger, ("Invalid rss:", words[EnumProcessStat::rss - offset]));
} else {
ps.rss <<= getpagesize();
}
if (!StringTo(words[EnumProcessStat::processor - offset], ps.processor)) {
LOG_WARNING(sLogger, ("Invalid processor:", words[EnumProcessStat::processor - offset]));
}
return true;
}
// 读取 /proc/<pid>/status 文件
bool ProcParser::ReadProcessStatus(pid_t pid, ProcessStatus& ps) const {
LOG_DEBUG(sLogger, ("read process status", pid));
auto processStatus = mProcPath / std::to_string(pid) / "status";
std::string content;
if (FileReadResult::kOK != ReadFileContent(processStatus.string(), content)) {
LOG_ERROR(sLogger, ("read process status", "fail")("file", processStatus));
return false;
}
return ParseProcessStatus(pid, content, ps);
}
// 解析/proc/<pid>/status 文件内容
// 数据样例:
// Name: bash
// Umask: 0022
// State: S (sleeping)
// Tgid: 17248
// ...
// Uid: 1000 10001000 1000
// Gid: 100 100 100 100
// ...
// NStgid: 17248 1
// ...
// CapInh: 0000000000000000
// CapPrm: 0000000000000000
// CapEff: 0000000000000000
// ...
bool ProcParser::ParseProcessStatus(pid_t pid, const std::string& content, ProcessStatus& ps) const {
ps.pid = pid;
StringViewSplitter lineSplitter(StringView(content), "\n");
for (const auto& line : lineSplitter) {
auto colonPos = line.find(':');
if (colonPos == StringView::npos || colonPos == line.size() - 1) {
continue;
}
StringView key = line.substr(0, colonPos);
StringView value = line.substr(colonPos + 1);
// 去除前导空格和制表符
while (!value.empty() && (value[0] == ' ' || value[0] == '\t')) {
value.remove_prefix(1);
}
if (key == "Uid") {
// Uid: real_uid effective_uid saved_uid fs_uid
StringViewSplitter uidSplitter(value, "\t");
size_t index = 0;
for (const auto& part : uidSplitter) {
if (part.empty()) {
continue;
}
switch (index) {
case 0:
if (!StringTo(part, ps.realUid)) {
LOG_WARNING(sLogger, ("Invalid real_uid:", part));
}
break;
case 1:
if (!StringTo(part, ps.effectiveUid)) {
LOG_WARNING(sLogger, ("Invalid effective_uid:", part));
}
break;
case 2:
if (!StringTo(part, ps.savedUid)) {
LOG_WARNING(sLogger, ("Invalid saved_uid:", part));
}
break;
case 3:
if (!StringTo(part, ps.fsUid)) {
LOG_WARNING(sLogger, ("Invalid fs_uid:", part));
}
break;
default:
break;
}
++index;
}
} else if (key == "Gid") {
// Gid: real_gid effective_gid saved_gid fs_gid
StringViewSplitter gidSplitter(value, "\t");
size_t index = 0;
for (const auto& part : gidSplitter) {
if (part.empty()) {
continue;
}
switch (index) {
case 0:
if (!StringTo(part, ps.realGid)) {
LOG_WARNING(sLogger, ("Invalid real_gid:", part));
}
break;
case 1:
if (!StringTo(part, ps.effectiveGid)) {
LOG_WARNING(sLogger, ("Invalid effective_gid:", part));
}
break;
case 2:
if (!StringTo(part, ps.savedGid)) {
LOG_WARNING(sLogger, ("Invalid saved_gid:", part));
}
break;
case 3:
if (!StringTo(part, ps.fsGid)) {
LOG_WARNING(sLogger, ("Invalid fs_gid:", part));
}
break;
default:
break;
}
++index;
}
} else if (key == "NStgid") {
// NStgid: namespace_tgid [namespace_tgid ...]
ps.nstgid.clear();
StringViewSplitter nstgidSplitter(value, "\t");
for (const auto& part : nstgidSplitter) {
if (!part.empty()) {
pid_t nstgid = 0;
if (!StringTo(part, nstgid)) {
LOG_WARNING(sLogger, ("Invalid nstgid:", value));
}
ps.nstgid.push_back(nstgid);
}
}
} else if (key == "CapPrm") { // CapPrm: 16进制表示的权限掩码
if (!StringTo(value, ps.capPrm, 16)) {
LOG_WARNING(sLogger, ("Invalid CapPrm:", value));
}
} else if (key == "CapEff") { // CapEff: 16进制表示的权限掩码
if (!StringTo(value, ps.capEff, 16)) {
LOG_WARNING(sLogger, ("Invalid CapEff:", value));
}
} else if (key == "CapInh") { // CapInh: 16进制表示的权限掩码
if (!StringTo(value, ps.capInh, 16)) {
LOG_WARNING(sLogger, ("Invalid CapInh:", value));
}
}
// 可以根据需要解析更多字段...
}
return true;
}
} // namespace logtail