in bistro/processes/CGroupSetup.cpp [148:248]
std::vector<std::string> cgroupSetup(
const std::string& cgname,
const cpp2::CGroupOptions& cg) {
// Make the cgroup dirs. If any fail, remove all created ones, then throw.
std::vector<std::string> dirs; // Will be removed on error.
std::vector<std::string> procs_paths; // The return value.
boost::system::error_code ec; // Reused, error messages go into `errors`.
std::vector<std::string> errors;
for (const auto& subsystem : *cg.subsystems_ref()) {
auto slice_dir =
boost::filesystem::path(*cg.root_ref()) / subsystem / *cg.slice_ref();
// The root & slice have to exist, otherwise the system is misconfigured.
if (!boost::filesystem::is_directory(slice_dir, ec) || ec) {
errors.emplace_back(folly::to<std::string>(
"CGroup root/subsystem/slice must be a directory: ",
slice_dir.native(), (ec ? ": " + ec.message() : "")
));
continue;
}
// Make the cgroup. On error, we will reap all new cgroups after the loop.
dirs.emplace_back((slice_dir / cgname).native());
if (!boost::filesystem::create_directories(dirs.back(), ec)) {
if (ec) {
errors.emplace_back(folly::to<std::string>(
"Creating ", dirs.back(), ": ", ec.message()
));
} else {
errors.emplace_back(folly::to<std::string>(
"CGroup ", dirs.back(), " already exists"
));
}
dirs.pop_back(); // No directory to remove at cleanup time.
continue;
}
// Check if "/cgroup.procs" is writable in our just-made cgroup.
procs_paths.emplace_back(folly::to<std::string>(
dirs.back(), "/", kCGroupProcs
));
int fd = folly::openNoInt(procs_paths.back().c_str(), cgFileFlags(cg));
if (fd == -1 || folly::closeNoInt(fd) == -1) {
errors.emplace_back(makeUnixError("Cannot write ", procs_paths.back()));
continue;
}
// Let the cgroup be auto-removed when empty (if "release_agent" is set).
//
// NB We could instead have set this on the parent cgroup, but that
// seems more likely to cause problems down the line, if e.g. we decide
// to reuse cgroups for perf reasons.
if (!writeToCGroupFile(
&errors, dirs.back(), kNotifyOnRelease, 1, cgFileFlags(cg)
)) {
continue;
}
// Limit CPU usage.
if (subsystem == kCPU && *cg.cpuShares_ref() &&
!writeToCGroupFile(
&errors,
dirs.back(),
kCPUShares,
*cg.cpuShares_ref(),
cgFileFlags(cg))) {
continue;
}
// Limit RAM usage.
//
// NB If you ever decide to use the OOM notifier instead of a hard limit,
// keep in mind that this can race with the freezer. Details here:
// https://issues.apache.org/jira/browse/MESOS-1689 & MESOS-1758
if (subsystem == kMemory && *cg.memoryLimitInBytes_ref() &&
!writeToCGroupFile(
&errors,
dirs.back(),
kMemoryLimitInBytes,
*cg.memoryLimitInBytes_ref(),
cgFileFlags(cg),
[&](int64_t written, int64_t read_back) {
// We don't mind if the kernel rounds up by up to 1MB.
return read_back >= written && (written - read_back <= (1 << 20));
})) {
continue;
}
}
// On error, try to remove created directories to avoid leaking cgroups.
if (!errors.empty()) {
for (const auto& path : dirs) {
boost::filesystem::remove(path, ec);
if (ec) {
errors.emplace_back(folly::to<std::string>(
"Removing ", path, ": ", ec.message()
));
}
}
throw BistroException(
"Failed to make cgroup directories: ", folly::join("; ", errors)
);
}
return procs_paths;
}