core/common/FileSystemUtil.cpp (661 lines of code) (raw):
// Copyright 2022 iLogtail Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "FileSystemUtil.h"
#include <cstddef>
#include <cstdint>
#include <sys/types.h>
#if defined(_MSC_VER)
#include <direct.h>
#include <fcntl.h>
#elif defined(__linux__)
#include <fnmatch.h>
#include <sys/statvfs.h>
#endif
#include <fstream>
#include "boost/filesystem.hpp"
#include "RuntimeUtil.h"
#include "StringTools.h"
#include "logger/Logger.h"
using namespace std;
namespace logtail {
#if defined(__linux__)
const std::string PATH_SEPARATOR = "/";
#elif defined(_MSC_VER)
const std::string PATH_SEPARATOR = "\\";
#endif
std::string ParentPath(const std::string& path) {
boost::filesystem::path p(path);
return p.parent_path().string();
}
bool CheckExistance(const std::string& path) {
boost::system::error_code ec;
boost::filesystem::path p(path);
return boost::filesystem::exists(p, ec);
}
bool Mkdirs(const std::string& dirPath) {
if (Mkdir(dirPath)) {
return true;
}
if (errno != ENOENT) {
return false;
}
boost::filesystem::path p(dirPath);
if (!p.has_parent_path()) {
return false;
}
if (!Mkdirs(ParentPath(dirPath))) {
return false;
}
return Mkdir(dirPath);
}
bool Mkdir(const std::string& dirPath) {
#if defined(__linux__)
if (mkdir(dirPath.c_str(), S_IXUSR | S_IWUSR | S_IRUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) != 0) {
if (errno != EEXIST)
return false;
}
return true;
#elif defined(_MSC_VER)
if (-1 == _mkdir(dirPath.c_str())) {
if (errno != EEXIST)
return false;
}
return true;
#endif
}
bool IsRelativePath(const std::string& path) {
boost::filesystem::path checkPointFilePath(path);
return checkPointFilePath.is_relative();
}
std::string AbsolutePath(const std::string& path, const std::string& basepath) {
return boost::filesystem::absolute(path, basepath).string();
}
std::string NormalizePath(const std::string& path) {
boost::filesystem::path abs(path);
if (abs.filename_is_dot() || abs.filename_is_dot_dot()) {
abs.remove_filename();
}
return abs.string();
}
int FSeek(FILE* stream, int64_t offset, int origin) {
#if defined(_MSC_VER)
return _fseeki64(stream, offset, origin);
#elif defined(__linux__)
return fseek(stream, offset, origin);
#endif
}
int64_t FTell(FILE* stream) {
#if defined(_MSC_VER)
return _ftelli64(stream);
#elif defined(__linux__)
return ftell(stream);
#endif
}
void TrimLastSeperator(std::string& path) {
// do not trim '/'
if (path.size() > 1 && path[path.size() - 1] == PATH_SEPARATOR[0]) {
path = path.substr(0, path.size() - 1);
}
}
long GetPageSize() {
static long pageSize = sysconf(_SC_PAGESIZE);
return (pageSize > 0) ? static_cast<size_t>(pageSize) : 4096;
}
size_t GetBlockSize(const std::filesystem::path& path) {
#if defined(__linux__)
struct statvfs buf {};
if (statvfs(path.c_str(), &buf) == 0) {
return buf.f_bsize;
}
#endif
return 0UL;
}
FileReadResult ReadFileContent(const std::string& fileName, std::string& content, uint64_t maxFileSize) {
std::ifstream ifs(fileName, std::ios::binary);
if (!ifs) {
return FileReadResult::kError;
}
content.clear();
try {
constexpr uint64_t kFileReadBufferSize = 32 * 1024;
// 设定为32K,对于特殊文件(如 /proc 中的文件)
// 尽可能一次性读进来 https://github.com/giampaolo/psutil/issues/2050
uint64_t totalRead = 0;
uint64_t bytesRead = 0;
content.resize(std::min(kFileReadBufferSize, maxFileSize));
while (ifs && totalRead < maxFileSize) {
ifs.read(content.data() + totalRead, std::min(kFileReadBufferSize, maxFileSize - totalRead));
bytesRead = ifs.gcount();
totalRead += bytesRead;
if (bytesRead > 0 && totalRead < maxFileSize) {
content.resize(totalRead + kFileReadBufferSize);
}
}
content.resize(totalRead);
// Check if file is larger than maxFileSize
char extra = 0;
if (ifs.read(&extra, 1)) {
return FileReadResult::kTruncated;
}
} catch (const std::ios_base::failure& e) {
return FileReadResult::kError;
} catch (const std::filesystem::filesystem_error& e) {
// Handle filesystem errors (e.g., permissions)
return FileReadResult::kError;
}
return FileReadResult::kOK;
}
int GetLines(std::istream& is,
bool enableEmptyLine,
const std::function<void(const std::string&)>& pushBack,
std::string* errorMessage) {
std::string line;
// 此处必须判断eof,具体原因参见:
// https://stackoverflow.com/questions/40561482/getline-throws-basic-iosclear-exception-after-reading-the-last-line
while (!is.eof() && std::getline(is, line)) {
if (enableEmptyLine || !line.empty()) {
pushBack(line);
}
}
return 0;
}
int GetLines(const std::filesystem::path& filename,
bool enableEmptyLine,
const std::function<void(const std::string&)>& pushBack,
std::string* errorMessage) {
int ret = 0;
std::ifstream fin;
try {
fin.exceptions(std::ifstream::failbit | std::ifstream::badbit);
fin.open(filename.string(), std::ios_base::in);
fin.exceptions(std::ifstream::goodbit);
GetLines(fin, enableEmptyLine, pushBack, errorMessage);
fin.close();
} catch (const std::exception& fail) {
if (errorMessage != nullptr) {
LOG_ERROR(sLogger, ("open file fail", filename)("errno", strerror(errno)));
ret = -1;
}
fin.close();
}
return ret;
}
int GetFileLines(const std::filesystem::path& filename,
std::vector<std::string>& res,
bool enableEmptyLine,
std::string* errorMessage) {
return GetLines(filename, enableEmptyLine, [&res](const std::string& s) { res.push_back(s); }, errorMessage);
}
bool OverwriteFile(const std::string& fileName, const std::string& content) {
FILE* pFile = fopen(fileName.c_str(), "w");
if (pFile == NULL) {
APSARA_LOG_ERROR(sLogger, ("open file fail", fileName)("errno", strerror(errno)));
return false;
}
uint32_t writeBytes = fwrite(content.c_str(), 1, content.size(), pFile);
if (writeBytes != content.size()) {
APSARA_LOG_ERROR(sLogger,
("write file fail", fileName)("errno", strerror(errno))("content.size", content.size())(
"writeBytes", writeBytes));
fclose(pFile);
return false;
}
if (fclose(pFile) != 0) {
APSARA_LOG_ERROR(sLogger, ("close file fail", fileName)("errno", strerror(errno)));
return false;
}
return true;
}
bool WriteFile(const std::string& fileName, const std::string& content, std::string& errMsg) {
ofstream f(fileName, ios::trunc);
if (!f.is_open()) {
errMsg = "failed to open file " + fileName;
return false;
}
f.write(content.c_str(), content.size());
if (f.fail()) {
errMsg = strerror(errno);
return false;
}
return true;
}
bool IsAccessibleDirectory(const std::string& dirPath) {
boost::filesystem::directory_iterator end;
try {
boost::filesystem::directory_iterator dirIter(dirPath);
return (dirIter != end);
} catch (...) {
return false;
}
}
bool GetAllFiles(const std::string& dirPath, const std::string& filePattern, std::vector<std::string>& allFiles) {
allFiles.clear();
{
// If no wildcard characters in filePattern, only one file.
size_t i = 0;
for (; i < filePattern.size(); ++i) {
if (filePattern[i] == '?' || filePattern[i] == '*') {
break;
}
}
if (i == filePattern.size()) {
allFiles.push_back(filePattern);
return true;
}
}
fsutil::Dir dir(dirPath);
if (!dir.Open()) {
APSARA_LOG_ERROR(sLogger, ("Open dir fail", dirPath)("errno", GetErrno()));
return false;
}
int32_t readCount = 0;
while (auto ent = dir.ReadNext(false)) {
++readCount;
if (ent.IsDir())
continue;
if (0 == fnmatch(filePattern.c_str(), ent.Name().c_str(), FNM_PATHNAME)) {
allFiles.push_back(ent.Name());
}
}
APSARA_LOG_INFO(sLogger,
("load wildcard local event, dir",
dirPath)("file", filePattern)("read count", readCount)("matched count", allFiles.size()));
return true;
}
#if defined(_MSC_VER)
// Convert Windows file HANDLE to FILE*.
// By _open_osfhandle -> _fdopen, only need to close the last one.
FILE* FromFileHandle(HANDLE hFile, int flags, const char* mode) {
if (INVALID_HANDLE_VALUE == hFile)
return NULL;
int nHandle = _open_osfhandle((long)hFile, flags);
if (-1 == nHandle) {
CloseHandle(hFile);
return NULL;
}
FILE* f = _fdopen(nHandle, mode);
if (NULL == f) {
CloseHandle(hFile);
return NULL;
}
return f;
}
#endif
FILE* FileReadOnlyOpen(const char* filePath, const char* mode) {
#if defined(__linux__)
return fopen(filePath, mode);
#elif defined(_MSC_VER)
HANDLE hFile = CreateFile(filePath,
GENERIC_READ,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
return FromFileHandle(hFile, _O_RDONLY, mode);
#endif
}
FILE* FileWriteOnlyOpen(const char* filePath, const char* mode) {
#if defined(__linux__)
FILE* f = fopen(filePath, mode);
return f;
#elif defined(_MSC_VER)
HANDLE hFile = CreateFile(filePath,
GENERIC_WRITE,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
NULL,
OPEN_ALWAYS,
FILE_ATTRIBUTE_NORMAL,
NULL);
return FromFileHandle(hFile, _O_WRONLY, mode);
#endif
}
FILE* FileAppendOpen(const char* filePath, const char* mode) {
#if defined(__linux__)
FILE* f = fopen(filePath, mode);
return f;
#elif defined(_MSC_VER)
HANDLE hFile = CreateFile(filePath,
FILE_APPEND_DATA,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
NULL,
OPEN_ALWAYS,
FILE_ATTRIBUTE_NORMAL,
NULL);
auto f = FromFileHandle(hFile, _O_APPEND, mode);
if (NULL == f)
return NULL;
// We have to call fseek manually, because _fdopen will not synchronize offset
// from HANDLE during opening.
if (0 == FSeek(f, 0, SEEK_END))
return f;
LOG_WARNING(sLogger, ("Call fseek failed", errno)("FilePath", filePath));
fclose(f);
return NULL;
#endif
}
std::string GetFdPath(int fd) {
#if defined(_MSC_VER)
auto oshandle = _get_osfhandle(fd);
auto handle = (HANDLE)oshandle;
if (INVALID_HANDLE_VALUE == handle) {
return "";
}
char filePath[MAX_PATH + 1];
auto ret = GetFinalPathNameByHandle(handle, filePath, MAX_PATH + 1, VOLUME_NAME_DOS);
if (ret > MAX_PATH || ret <= 0) {
return "";
}
if (0 == memcmp(filePath, "\\\\?\\", 4)) {
return std::string(filePath + 4);
}
return std::string(filePath);
#else
/* NOTE: readlink() does not append a null byte to buf */
char buf[PATH_MAX + 1] = {0};
char path[PATH_MAX];
ssize_t ssize;
// here we use /proc/self/fd rather than /dev/fd because /dev/fd may not exist in docker.
snprintf(path, PATH_MAX, "/proc/self/fd/%d", fd);
if ((ssize = readlink(path, buf, PATH_MAX)) != -1) {
return std::string(buf);
}
return std::string();
#endif
}
void Chmod(const char* filePath, mode_t mode) {
#if defined(__linux__)
if (chmod(filePath, mode) == -1) {
APSARA_LOG_ERROR(sLogger, ("chmod error", filePath)("mode", mode)("errno", errno));
}
#endif
}
bool IsValidSuffix(const std::string& filename) {
// such as compress file (*.gz) or its rollback file (*.gz.*) will be ignored
static const std::string FILTER_LIST[] = {".gz", ".bz", ".tar"};
for (uint32_t i = 0; i < sizeof(FILTER_LIST) / sizeof(FILTER_LIST[0]); i++) {
if (EndWith(filename, FILTER_LIST[i])) {
return false;
};
}
return true;
}
namespace fsutil {
Dir::Dir(const std::string& dirPath) : mDirPath(dirPath) {
#if defined(__linux__)
mDir = nullptr;
#elif defined(_MSC_VER)
mFind = INVALID_HANDLE_VALUE;
#endif
}
Dir::~Dir() {
Close();
}
#if defined(_MSC_VER)
// Invalid entry is returned if fileName starts with ., such as ., .., hidden files.
static Entry ConstructEntry(const WIN32_FIND_DATA& findData) {
std::string fileName(findData.cFileName);
if (0 == fileName.find("."))
return Entry();
// NOTE: We assume there are only two types: DIR and REG_FILE, ignore symbolic.
// In fact, for symbolic path, both boost::filesystem::status and Windows stat
// will return FILE...
return Entry(fileName,
(findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) ? Entry::Type::DIR : Entry::Type::REG_FILE,
false);
}
#endif
bool Dir::IsOpened() const {
#if defined(__linux__)
return mDir != nullptr;
#elif defined(_MSC_VER)
return mFind != INVALID_HANDLE_VALUE;
#endif
}
bool Dir::Open() {
if (IsOpened())
return true;
if (mDirPath.empty()) {
// Log it but don't return, let following call to generate errno.
LOG_WARNING(sLogger, ("Empty dir path", mDirPath));
}
#if defined(__linux__)
DIR* dir = opendir(mDirPath.c_str());
if (NULL == dir) {
// TODO: Record errno
return false;
}
mDir = dir;
return true;
#elif defined(_MSC_VER)
auto findPath = PathJoin(mDirPath, "*");
WIN32_FIND_DATA ffd;
mFind = FindFirstFile(findPath.c_str(), &ffd);
if (INVALID_HANDLE_VALUE == mFind) {
// TODO: Record errno.
return false;
}
// Cache first entry.
mCachedEntry = ConstructEntry(ffd);
return true;
#endif
}
Entry Dir::ReadNext(bool resolveWithStat) {
if (!IsOpened())
return Entry();
#if defined(__linux__)
// Call readdir until it returns nullptr or non-dot entry.
while (true) {
struct dirent* ent = readdir(mDir);
if (nullptr == ent)
return Entry();
std::string fileName(ent->d_name);
if (0 == fileName.find('.')) {
continue;
}
Entry::Type type;
bool isSymbolic = false;
switch (ent->d_type) {
case DT_FIFO:
case DT_CHR:
case DT_BLK:
case DT_SOCK:
case DT_WHT:
// Known types that should be ignored.
type = Entry::Type::UNKNOWN;
break;
case DT_DIR:
type = Entry::Type::DIR;
break;
case DT_REG:
type = Entry::Type::REG_FILE;
break;
case DT_LNK:
// DT_UNKNOWN should also be resolved by stat again to against inaccurate
// meta info from file system, eg. expect DT_LNK, return DT_UNKNOWN.
// Ref: https://aone.alibaba-inc.com/issue/36979148.
case DT_UNKNOWN:
// All known types have already been enumerated at above, stat for unknown
// new types for better compatibility.
default: {
type = Entry::Type::UNKNOWN;
isSymbolic = (DT_LNK == ent->d_type);
if (!resolveWithStat) {
break;
}
// Try to get target type by stat, if failed, return UNKNOWN to remind
// caller that maybe the symbolic is invalid.
auto fullPath = PathJoin(mDirPath, fileName);
struct stat fileStat;
if (stat(fullPath.c_str(), &fileStat) != 0) {
LOG_WARNING(sLogger,
("Get file info fail",
fullPath)("errno", errno)("strerror", strerror(errno))("d_type", ent->d_type));
break;
}
if (S_ISDIR(fileStat.st_mode)) {
type = Entry::Type::DIR;
} else if (S_ISREG(fileStat.st_mode)) {
type = Entry::Type::REG_FILE;
}
break;
}
};
return Entry(fileName, type, isSymbolic);
}
#elif defined(_MSC_VER)
if (mCachedEntry) {
Entry entry = mCachedEntry;
mCachedEntry = Entry();
return entry;
}
WIN32_FIND_DATA ffd;
while (true) {
if (0 == FindNextFile(mFind, &ffd)) {
auto err = GetLastError();
if (err != ERROR_NO_MORE_FILES) {
LOG_WARNING(sLogger, ("Unexpected error when call FindNextFile", err)("dir path", mDirPath));
}
return Entry();
}
auto entry = ConstructEntry(ffd);
if (entry)
return entry;
}
#endif
}
void Dir::Close() {
if (!IsOpened())
return;
#if defined(__linux__)
if (closedir(mDir) != 0) {
LOG_WARNING(sLogger, ("Close dir failed", mDirPath)("errno", errno));
return;
}
mDir = nullptr;
#elif defined(_MSC_VER)
if (!FindClose(mFind)) {
LOG_WARNING(sLogger, ("Close dir failed", mDirPath)("errno", GetLastError()));
return;
}
mFind = INVALID_HANDLE_VALUE;
#endif
}
PathStat::PathStat() {
}
PathStat::~PathStat() {
}
bool PathStat::stat(const std::string& path, PathStat& ps) {
ps.mPath = path;
#if defined(__linux__)
return (0 == ::stat(path.c_str(), &(ps.mRawStat)));
#elif defined(_MSC_VER)
// For backward performance compatibility, check the path only
// when the flag is enabled by user configuration.
if (!BOOL_FLAG(enable_root_path_collection) || path.back() != ':') {
return 0 == ::_stat64(path.c_str(), &(ps.mRawStat));
}
// _stat64("D:") returns non-zero.
return 0 == ::_stat64((path + PATH_SEPARATOR).c_str(), &(ps.mRawStat));
#endif
}
bool PathStat::IsDir() const {
#if defined(__linux__)
return S_ISDIR(mRawStat.st_mode);
#elif defined(_MSC_VER)
return mRawStat.st_mode & S_IFDIR;
#endif
}
bool PathStat::IsRegFile() const {
#if defined(__linux__)
return S_ISREG(mRawStat.st_mode);
#elif defined(_MSC_VER)
return mRawStat.st_mode & S_IFREG;
#endif
}
bool PathStat::lstat(const std::string& path, PathStat& ps) {
ps.mPath = path;
#if defined(__linux__)
return (0 == ::lstat(path.c_str(), &(ps.mRawStat)));
#elif defined(_MSC_VER)
return (0 == ::_stat64(path.c_str(), &(ps.mRawStat)));
#endif
}
bool PathStat::IsLink() const {
#if defined(__linux__)
return S_ISLNK(mRawStat.st_mode);
#elif defined(_MSC_VER)
return false; // Windows shortcut is not symbolic link.
#endif
}
bool PathStat::fstat(FILE* file, PathStat& ps, bool resolvePath) {
#if defined(__linux__)
return (0 == ::fstat(fileno(file), &(ps.mRawStat)));
#elif defined(_MSC_VER)
return fstat(_fileno(file), ps, resolvePath);
#endif
}
bool PathStat::fstat(int fd, PathStat& ps, bool resolvePath) {
#if defined(__linux__)
return (0 == ::fstat(fd, &(ps.mRawStat)));
#elif defined(_MSC_VER)
auto fstatRet = ::_fstat64(fd, &(ps.mRawStat));
if (fstatRet != 0)
return false;
if (!resolvePath)
return true;
ps.mPath = GetFdPath(fd);
return !ps.mPath.empty();
#endif
}
#if defined(_MSC_VER)
// FILETIME2Time converts ft to time_t.
// @return second part.
static int64_t FILETIME2Time(const FILETIME ft, int64_t* nsec = nullptr) {
ULARGE_INTEGER ui;
ui.LowPart = ft.dwLowDateTime;
ui.HighPart = ft.dwHighDateTime;
int64_t sec = static_cast<int64_t>(ui.QuadPart / 10000000 - 11644473600);
if (nsec != nullptr) {
*nsec = static_cast<int64_t>(ui.QuadPart % 10000000) * 100;
}
return sec;
}
bool PathStat::fstat(HANDLE hFile, PathStat& ps, bool resolvePath) {
// st_mtime.
FILETIME mtim;
if (FALSE == GetFileTime(hFile, NULL, NULL, &mtim)) {
return false;
}
ps.mRawStat.st_mtime = static_cast<time_t>(FILETIME2Time(mtim));
// st_size.
LARGE_INTEGER liSize;
if (FALSE == GetFileSizeEx(hFile, &liSize)) {
return false;
}
ps.mRawStat.st_size = liSize.QuadPart;
if (!resolvePath) {
return true;
}
// ps.mPath.
char filePath[MAX_PATH + 1];
auto ret = GetFinalPathNameByHandle(hFile, filePath, MAX_PATH + 1, VOLUME_NAME_DOS);
if (ret > MAX_PATH || ret <= 0) {
return false;
}
if (0 == memcmp(filePath, "\\\\?\\", 4)) {
ps.mPath.assign(filePath + 4);
} else {
ps.mPath = filePath;
}
return true;
}
#endif
time_t PathStat::GetMtime() const {
return mRawStat.st_mtime;
}
void PathStat::GetLastWriteTime(int64_t& sec, int64_t& nsec) const {
#if defined(__linux__)
sec = mRawStat.st_mtim.tv_sec;
nsec = mRawStat.st_mtim.tv_nsec;
#elif defined(_MSC_VER)
HANDLE hFile = CreateFile(mPath.c_str(),
GENERIC_READ,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
NULL,
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS,
NULL);
if (INVALID_HANDLE_VALUE == hFile)
return;
FILETIME mtim;
auto ret = GetFileTime(hFile, NULL, NULL, &mtim);
CloseHandle(hFile);
if (!ret)
return;
sec = FILETIME2Time(mtim, &nsec);
#endif
}
DevInode PathStat::GetDevInode() const {
#if defined(__linux__)
return DevInode(mRawStat.st_dev, mRawStat.st_ino);
#elif defined(_MSC_VER)
return GetFileDevInode(mPath);
#endif
}
int64_t PathStat::GetFileSize() const {
return mRawStat.st_size;
}
} // namespace fsutil
bool ReadFile(const string& filepath, string& content) {
constexpr size_t read_size = size_t(4096);
ifstream fin(filepath);
if (!fin) {
return false;
}
string buf = string(read_size, '\0');
while (fin.read(&buf[0], read_size)) {
content.append(buf, 0, fin.gcount());
}
content.append(buf, 0, fin.gcount());
return true;
}
} // namespace logtail