libminifi/include/utils/file/FileUtils.h (458 lines of code) (raw):
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <filesystem>
#include <fstream>
#include <memory>
#include <sstream>
#include <tuple>
#include <utility>
#include <vector>
#include <cstdio>
#include <algorithm>
#ifndef WIN32
#include <unistd.h>
#include <sys/stat.h> //NOLINT
#include <pwd.h>
#include <grp.h>
#endif
#include <fcntl.h>
#ifdef WIN32
#include <stdio.h>
#include <direct.h>
#include <sys/stat.h> // stat // NOLINT
#include <sys/types.h> // NOLINT
#include <sys/utime.h> // _utime64
#include <tchar.h> // _tcscpy,_tcscat,_tcscmp
#include <windows.h> // winapi
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <Windows.h>
#include <WinSock2.h>
#include <WS2tcpip.h>
#pragma comment(lib, "Ws2_32.lib")
#include <string>
#include "properties/Properties.h"
#include "utils/Id.h"
#include "accctrl.h"
#include "aclapi.h"
#pragma comment(lib, "advapi32.lib")
#endif
#ifdef __APPLE__
#include <mach-o/dyld.h>
#endif
#include "core/logging/LoggerFactory.h"
#include "utils/StringUtils.h"
#include "utils/file/PathUtils.h"
#include "utils/gsl.h"
#ifndef S_ISDIR
#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
#endif
namespace org::apache::nifi::minifi::utils::file {
namespace FileUtils = ::org::apache::nifi::minifi::utils::file;
std::chrono::system_clock::time_point to_sys(std::filesystem::file_time_type file_time);
std::filesystem::file_time_type from_sys(std::chrono::system_clock::time_point sys_time);
inline int64_t delete_dir(const std::filesystem::path& path, bool delete_files_recursively = true) {
// Empty path is interpreted as the root of the current partition on Windows, which should not be allowed
if (path.empty()) {
return -1;
}
try {
if (std::filesystem::exists(path)) {
if (delete_files_recursively) {
std::filesystem::remove_all(path);
} else {
std::filesystem::remove(path);
}
}
} catch (const std::filesystem::filesystem_error&) {
return -1;
// display error message
}
return 0;
}
inline std::chrono::time_point<std::chrono::file_clock,
std::chrono::seconds> last_write_time_point(const std::filesystem::path& path) {
std::error_code ec;
auto result = std::filesystem::last_write_time(path, ec);
if (ec.value() == 0) {
return std::chrono::time_point_cast<std::chrono::seconds>(result);
}
return std::chrono::time_point<std::chrono::file_clock, std::chrono::seconds>{};
}
inline std::optional<std::filesystem::file_time_type> last_write_time(const std::filesystem::path& path) {
std::error_code ec;
auto result = std::filesystem::last_write_time(path, ec);
if (ec.value() == 0) {
return result;
}
return std::nullopt;
}
inline bool set_last_write_time(const std::filesystem::path& path, std::filesystem::file_time_type new_time) {
std::error_code ec;
std::filesystem::last_write_time(path, new_time, ec);
return ec.value() == 0;
}
inline uint64_t file_size(const std::filesystem::path& path) {
std::error_code ec;
auto file_size = std::filesystem::file_size(path, ec);
if (ec.value() != 0)
return 0;
return file_size;
}
inline bool get_permissions(const std::filesystem::path& path, uint32_t& permissions) {
std::error_code ec;
permissions = static_cast<uint32_t>(std::filesystem::status(path, ec).permissions());
return ec.value() == 0;
}
inline int set_permissions(const std::filesystem::path& path, const uint32_t permissions) {
std::error_code ec;
std::filesystem::permissions(path, static_cast<std::filesystem::perms>(permissions), ec);
return ec.value();
}
inline std::optional<std::string> get_permission_string(const std::filesystem::path& path) {
std::error_code ec;
auto permissions = std::filesystem::status(path, ec).permissions();
if (ec.value() != 0) {
return std::nullopt;
}
std::string permission_string;
permission_string += (permissions & std::filesystem::perms::owner_read) != std::filesystem::perms::none ? "r" : "-";
permission_string += (permissions & std::filesystem::perms::owner_write) != std::filesystem::perms::none ? "w" : "-";
permission_string += (permissions & std::filesystem::perms::owner_exec) != std::filesystem::perms::none ? "x" : "-";
permission_string += (permissions & std::filesystem::perms::group_read) != std::filesystem::perms::none ? "r" : "-";
permission_string += (permissions & std::filesystem::perms::group_write) != std::filesystem::perms::none ? "w" : "-";
permission_string += (permissions & std::filesystem::perms::group_exec) != std::filesystem::perms::none ? "x" : "-";
permission_string += (permissions & std::filesystem::perms::others_read) != std::filesystem::perms::none ? "r" : "-";
permission_string += (permissions & std::filesystem::perms::others_write) != std::filesystem::perms::none ? "w" : "-";
permission_string += (permissions & std::filesystem::perms::others_exec) != std::filesystem::perms::none ? "x" : "-";
return permission_string;
}
#ifndef WIN32
inline bool get_uid_gid(const std::string &path, uint64_t &uid, uint64_t &gid) {
struct stat result = {};
if (stat(path.c_str(), &result) == 0) {
uid = result.st_uid;
gid = result.st_gid;
return true;
}
return false;
}
#endif
inline bool is_directory(const std::filesystem::path &path) {
std::error_code ec;
bool result = std::filesystem::is_directory(path, ec);
if (ec.value() == 0) {
return result;
}
return false;
}
inline uint64_t path_size(const std::filesystem::path& path) {
uint64_t size = 0;
if (std::filesystem::is_regular_file(path)) {
return utils::file::file_size(path);
} else if (utils::file::is_directory(path)) {
for (const std::filesystem::directory_entry& entry : std::filesystem::recursive_directory_iterator(path, std::filesystem::directory_options::skip_permission_denied)) {
if (entry.is_regular_file()) {
size += entry.file_size();
}
}
}
return size;
}
inline bool exists(const std::filesystem::path &path) {
std::error_code ec;
bool result = std::filesystem::exists(path, ec);
if (ec.value() == 0) {
return result;
}
return false;
}
inline int create_dir(const std::filesystem::path& path, bool recursive = true) {
std::filesystem::path dir(path);
std::error_code ec;
if (!recursive) {
std::filesystem::create_directory(dir, ec);
} else {
std::filesystem::create_directories(dir, ec);
}
if (ec.value() == 0 || (ec.value() == EEXIST && is_directory(path.c_str()))) {
return 0;
}
return ec.value();
}
inline int copy_file(const std::filesystem::path& path_from, const std::filesystem::path& dest_path) {
std::error_code ec;
auto copy_success = std::filesystem::copy_file(path_from, dest_path, std::filesystem::copy_options::overwrite_existing, ec);
if (ec.value() != 0 || !copy_success)
return -1;
return 0;
}
inline void addFilesMatchingExtension(const std::shared_ptr<core::logging::Logger> &logger,
const std::filesystem::path& originalPath,
const std::filesystem::path& extension,
std::vector<std::filesystem::path>& accruedFiles) {
if (!utils::file::exists(originalPath)) {
logger->log_warn("Failed to open directory: %s", originalPath.string());
return;
}
if (utils::file::is_directory(originalPath)) {
// only perform a listing while we are not empty
logger->log_debug("Looking for files with %s extension in %s", extension.string(), originalPath.string());
for (const auto& entry: std::filesystem::directory_iterator(originalPath,
std::filesystem::directory_options::skip_permission_denied)) {
if (utils::file::is_directory(entry.path())) { // if this is a directory
addFilesMatchingExtension(logger, entry.path(), extension, accruedFiles);
} else {
if (entry.path().extension() == extension) {
logger->log_info("Adding %s to paths", entry.path().string());
accruedFiles.push_back(entry.path());
}
}
}
} else if (std::filesystem::is_regular_file(originalPath)) {
if (originalPath.extension() == extension) {
logger->log_info("Adding %s to paths", originalPath.string());
accruedFiles.push_back(originalPath);
}
} else {
logger->log_error("Could not access %s", originalPath.string());
}
}
/**
* Provides a platform-independent function to list a directory
* @param dir The directory to start the enumeration from.
* @param callback Callback is called for every file found: first argument is the path of the directory, second is the filename.
* Return value of the callback is used to continue (true) or stop (false) listing
* @param logger
* @param dir_callback Called for every child directory, its return value decides if we should descend and recursively
* process that directory or not.
*/
inline void list_dir(const std::filesystem::path& dir,
const std::function<bool(const std::filesystem::path&, const std::filesystem::path&)>& callback,
const std::shared_ptr<core::logging::Logger> &logger,
const std::function<bool(const std::filesystem::path&)>& dir_callback) {
logger->log_debug("Performing file listing against %s", dir.string());
if (!utils::file::exists(dir)) {
logger->log_warn("Failed to open directory: %s", dir.string());
return;
}
for (const auto &entry: std::filesystem::directory_iterator(dir, std::filesystem::directory_options::skip_permission_denied)) {
auto d_name = entry.path().filename();
auto path = entry.path();
if (utils::file::is_directory(path)) { // if this is a directory
if (dir_callback(path)) {
list_dir(path, callback, logger, dir_callback);
}
} else {
if (!callback(dir, d_name)) {
break;
}
}
}
}
inline void list_dir(const std::filesystem::path& dir,
const std::function<bool(const std::filesystem::path&, const std::filesystem::path&)>& callback,
const std::shared_ptr<core::logging::Logger> &logger,
bool recursive = true) {
list_dir(dir, callback, logger, [&] (const std::filesystem::path&) {
return recursive;
});
}
inline std::vector<std::pair<std::filesystem::path, std::filesystem::path>> list_dir_all(const std::filesystem::path& dir, const std::shared_ptr<core::logging::Logger> &logger,
bool recursive = true) {
std::vector<std::pair<std::filesystem::path, std::filesystem::path>> fileList;
auto lambda = [&fileList] (const std::filesystem::path& parent_path, const std::filesystem::path& filename) {
fileList.emplace_back(parent_path, filename);
return true;
};
list_dir(dir, lambda, logger, recursive);
return fileList;
}
inline std::filesystem::path create_temp_directory(char* format) {
#ifdef WIN32
const auto tempDirectory = std::filesystem::temp_directory_path() / minifi::utils::IdGenerator::getIdGenerator()->generate().to_string().view();
create_dir(tempDirectory);
return tempDirectory;
#else
if (mkdtemp(format) == nullptr) { return ""; }
return format;
#endif
}
inline bool is_hidden(const std::filesystem::path& path) {
#ifdef WIN32
DWORD attributes = GetFileAttributesA(path.string().c_str());
return ((attributes != INVALID_FILE_ATTRIBUTES) && ((attributes & FILE_ATTRIBUTE_HIDDEN) != 0));
#else
return path.filename().string().starts_with('.');
#endif
}
/*
* Returns the absolute path of the current executable
*/
inline std::filesystem::path get_executable_path() {
#if defined(__linux__)
std::vector<char> buf(1024U);
while (true) {
ssize_t ret = readlink("/proc/self/exe", buf.data(), buf.size());
if (ret < 0) {
return "";
}
if (static_cast<size_t>(ret) == buf.size()) {
/* It may have been truncated */
buf.resize(buf.size() * 2);
continue;
}
return std::string(buf.data(), ret);
}
#elif defined(__APPLE__)
std::vector<char> buf(PATH_MAX);
uint32_t buf_size = buf.size();
while (_NSGetExecutablePath(buf.data(), &buf_size) != 0) {
buf.resize(buf_size);
}
std::vector<char> resolved_name(PATH_MAX);
if (realpath(buf.data(), resolved_name.data()) == nullptr) {
return "";
}
return std::string(resolved_name.data());
#elif defined(WIN32)
HMODULE hModule = GetModuleHandleA(nullptr);
if (hModule == nullptr) {
return "";
}
std::vector<char> buf(1024U);
while (true) {
size_t ret = GetModuleFileNameA(hModule, buf.data(), gsl::narrow<DWORD>(buf.size()));
if (ret == 0U) {
return "";
}
if (ret == buf.size() && GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
/* It has been truncated */
buf.resize(buf.size() * 2);
continue;
}
return std::string(buf.data());
}
#else
return "";
#endif
}
inline std::filesystem::path resolve(const std::filesystem::path& base, const std::filesystem::path& path) {
if (path.is_absolute()) {
return path;
}
return base / path;
}
/*
* Returns the absolute path to the directory containing the current executable
*/
inline std::filesystem::path get_executable_dir() {
auto executable_path = get_executable_path();
if (executable_path.empty()) {
return "";
}
return executable_path.parent_path();
}
inline int close(int file_descriptor) {
#ifdef WIN32
return _close(file_descriptor);
#else
return ::close(file_descriptor);
#endif
}
uint64_t computeChecksum(const std::filesystem::path& file_name, uint64_t up_to_position);
inline std::string get_content(const std::filesystem::path& file_name) {
std::ifstream file(file_name, std::ifstream::binary);
std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
return content;
}
bool contains(const std::filesystem::path& file_path, std::string_view text_to_search);
inline std::optional<std::string> get_file_owner(const std::filesystem::path& file_path) {
#ifndef WIN32
struct stat info = {};
if (stat(file_path.c_str(), &info) != 0) {
return std::nullopt;
}
struct passwd pw = {};
pw.pw_name = nullptr;
struct passwd *result = nullptr;
char localbuf[1024] = {};
if (getpwuid_r(info.st_uid, &pw, localbuf, sizeof(localbuf), &result) != 0 || pw.pw_name == nullptr) {
return std::nullopt;
}
return std::string(pw.pw_name);
#else
DWORD return_code = 0;
PSID sid_owner = NULL;
BOOL bool_return = TRUE;
LPTSTR account_name = NULL;
LPTSTR domain_name = NULL;
DWORD account_name_dword = 1;
DWORD domain_name_dword = 1;
SID_NAME_USE sid_type = SidTypeUnknown;
HANDLE file_handle;
PSECURITY_DESCRIPTOR sec_descriptor = NULL;
// Get the handle of the file object.
file_handle = CreateFile(
TEXT(file_path.string().c_str()),
GENERIC_READ,
FILE_SHARE_READ,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
// Check GetLastError for CreateFile error code.
if (file_handle == INVALID_HANDLE_VALUE) {
return std::nullopt;
}
auto close_file_handle = gsl::finally([&file_handle] { CloseHandle(file_handle); });
// Get the owner SID of the file.
return_code = GetSecurityInfo(
file_handle,
SE_FILE_OBJECT,
OWNER_SECURITY_INFORMATION,
&sid_owner,
NULL,
NULL,
NULL,
&sec_descriptor);
// Check GetLastError for GetSecurityInfo error condition.
if (return_code != ERROR_SUCCESS) {
return std::nullopt;
}
// First call to LookupAccountSid to get the buffer sizes.
bool_return = LookupAccountSid(
NULL,
sid_owner,
account_name,
(LPDWORD)&account_name_dword,
domain_name,
(LPDWORD)&domain_name_dword,
&sid_type);
// Reallocate memory for the buffers.
account_name = (LPTSTR)GlobalAlloc(
GMEM_FIXED,
account_name_dword);
// Check GetLastError for GlobalAlloc error condition.
if (account_name == NULL) {
return std::nullopt;
}
auto cleanup_account_name = gsl::finally([&account_name] { GlobalFree(account_name); });
domain_name = (LPTSTR)GlobalAlloc(
GMEM_FIXED,
domain_name_dword);
// Check GetLastError for GlobalAlloc error condition.
if (domain_name == NULL) {
return std::nullopt;
}
auto cleanup_domain_name = gsl::finally([&domain_name] { GlobalFree(domain_name); });
// Second call to LookupAccountSid to get the account name.
bool_return = LookupAccountSid(
NULL, // name of local or remote computer
sid_owner, // security identifier
account_name, // account name buffer
(LPDWORD)&account_name_dword, // size of account name buffer
domain_name, // domain name
(LPDWORD)&domain_name_dword, // size of domain name buffer
&sid_type); // SID type
// Check GetLastError for LookupAccountSid error condition.
if (bool_return == FALSE) {
return std::nullopt;
}
auto result = std::string(account_name);
return result;
#endif
}
#ifndef WIN32
inline std::optional<std::string> get_file_group(const std::filesystem::path& file_path) {
struct stat info = {};
if (stat(file_path.c_str(), &info) != 0) {
return std::nullopt;
}
struct group gr = {};
gr.gr_name = nullptr;
struct group *result = nullptr;
char localbuf[1024] = {};
if ((getgrgid_r(info.st_uid, &gr, localbuf, sizeof(localbuf), &result) != 0) || gr.gr_name == nullptr) {
return std::nullopt;
}
return std::string(gr.gr_name);
}
#endif
inline std::optional<std::filesystem::path> get_relative_path(const std::filesystem::path& path, const std::filesystem::path& base_path) {
if (!utils::StringUtils::startsWith(path.string(), base_path.string())) {
return std::nullopt;
}
return std::filesystem::relative(path, base_path);
}
} // namespace org::apache::nifi::minifi::utils::file