in rd-agent/src/main.rs [620:1025]
fn startup_checks(&mut self) -> Result<()> {
let sys = sysinfo::System::new();
// Obtain rd-hashd version.
let output = Command::new(&self.hashd_paths[0].bin)
.arg("--version")
.output()
.expect("cfg: \"rd-hashd --version\" failed");
let hashd_version = String::from_utf8(output.stdout)
.unwrap()
.lines()
.next()
.expect("cfg: Failed to read \"rd-hashd --version\" output")
.trim_start_matches("rd-hashd ")
.to_string();
// check cgroup2 & controllers
match path_to_mountpoint("/sys/fs/cgroup") {
Ok(mi) => {
if mi.fstype != "cgroup2" {
self.sr_failed
.add(SysReq::Controllers, "/sys/fs/cgroup is not cgroup2 fs");
}
if !mi.options.contains(&"memory_recursiveprot".to_string()) {
if self.enforce.mem {
match Command::new("mount")
.arg("-o")
.arg("remount,memory_recursiveprot")
.arg(&mi.dest)
.spawn()
.and_then(|mut x| x.wait())
{
Ok(rc) if rc.success() => {
info!("cfg: enabled memcg recursive protection")
}
Ok(rc) => {
self.sr_failed.add(
SysReq::MemCgRecursiveProt,
&format!(
"failed to enable memcg recursive protection ({:?})",
&rc
),
);
}
Err(e) => {
self.sr_failed.add(
SysReq::MemCgRecursiveProt,
&format!(
"failed to enable memcg recursive protection ({:?})",
&e
),
);
}
}
} else {
self.sr_failed.add(
SysReq::MemCgRecursiveProt,
"memcg recursive protection not enabled",
);
}
}
}
Err(e) => {
self.sr_failed.add(
SysReq::Controllers,
&format!("failed to obtain mountinfo for /sys/fs/cgroup ({:#})", &e),
);
}
}
let mut buf = String::new();
fs::File::open("/sys/fs/cgroup/cgroup.controllers")
.and_then(|mut f| f.read_to_string(&mut buf))?;
for ctrl in ["cpu", "memory", "io"].iter() {
if !buf.contains(ctrl) {
self.sr_failed.add(
SysReq::Controllers,
&format!("cgroup2 {} controller not available", ctrl),
);
}
}
if !Path::new("/sys/fs/cgroup/system.slice/cgroup.freeze").exists() {
self.sr_failed
.add(SysReq::Freezer, "cgroup2 freezer not available");
}
// IO controllers
self.check_iocost(self.enforce.io);
slices::check_other_io_controllers(&mut self.sr_failed);
// anon memory balance
match read_cgroup_flat_keyed_file("/proc/vmstat") {
Ok(stat) => {
if let None = stat.get("pgscan_anon") {
self.sr_failed.add(
SysReq::AnonBalance,
"/proc/vmstat doesn't contain pgscan_anon",
);
}
}
Err(e) => {
self.sr_failed.add(
SysReq::AnonBalance,
&format!("failed to read /proc/vmstat ({:#})", &e),
);
}
}
// scratch and root filesystems
let mi = self.check_one_fs(&self.scr_path.clone(), "Scratch dir", self.enforce.fs);
if mi.is_none() || mi.unwrap().dest != AsRef::<Path>::as_ref("/") {
self.check_one_fs("/", "Root fs", self.enforce.fs);
}
if self.scr_dev.starts_with("md") || self.scr_dev.starts_with("dm") {
if self.scr_dev_forced {
warn!(
"cfg: Composite device {:?} overridden with --dev, IO isolation likely won't work",
&self.scr_dev
);
} else {
self.sr_failed.add(
SysReq::NoCompositeStorage,
&format!(
"Scratch dir {:?} is on a composite dev {:?}, specify the real one with --dev",
&self.scr_path, &self.scr_dev
),
);
}
}
// mq-deadline scheduler
if self.enforce.io {
if let Ok(v) = read_iosched(&self.scr_dev) {
self.sr_iosched = Some(v);
}
if let Err(e) = set_iosched(&self.scr_dev, "mq-deadline") {
self.sr_failed.add(
SysReq::IoSched,
&format!(
"Failed to set mq-deadline iosched on {:?} ({})",
&self.scr_dev, &e
),
);
}
}
let scr_dev_iosched = match read_iosched(&self.scr_dev) {
Ok(v) => {
if v != "mq-deadline" {
self.sr_failed.add(
SysReq::IoSched,
&format!(
"cfg: iosched on {:?} is {} instead of mq-deadline",
&self.scr_dev, v
),
);
}
v
}
Err(e) => {
self.sr_failed.add(
SysReq::IoSched,
&format!("Failed to read iosched for {:?} ({})", &self.scr_dev, &e),
);
"UNKNOWN".into()
}
};
// wbt should be disabled
let wbt_path = format!("/sys/block/{}/queue/wbt_lat_usec", &self.scr_dev);
if let Ok(line) = read_one_line(&wbt_path) {
let wbt = line.trim().parse::<u64>()?;
if wbt != 0 {
if self.enforce.io {
info!("cfg: wbt is enabled on {:?}, disabling", &self.scr_dev);
if let Err(e) = write_one_line(&wbt_path, "0") {
self.sr_failed.add(
SysReq::NoWbt,
&format!("Failed to disable wbt on {:?} ({})", &self.scr_dev, &e),
);
}
self.sr_wbt = Some(wbt);
self.sr_wbt_path = Some(wbt_path);
} else {
self.sr_failed.add(
SysReq::NoWbt,
&format!("wbt is enabled on {:?}", &self.scr_dev),
);
}
}
}
// swap should be on the same device as scratch
for swap_dev in swap_devnames()?.iter() {
let dev = swap_dev.to_str().unwrap_or_default().to_string();
if dev != self.scr_dev {
if self.scr_dev_forced {
let det_scr_dev = path_to_devname(&self.scr_path).unwrap_or_default();
if dev != det_scr_dev.to_str().unwrap_or_default() {
warn!(
"cfg: Swap backing dev {:?} is different from forced scratch dev {:?}",
&swap_dev, &self.scr_dev
);
}
} else {
self.sr_failed.add(
SysReq::SwapOnScratch,
&format!(
"Swap backing dev {:?} is different from scratch backing dev {:?}",
&swap_dev, self.scr_dev
),
);
}
}
}
// swap configuration check
let swap_total = total_swap();
let swap_avail = swap_total - sys.used_swap() as usize * 1024;
if (swap_total as f64) < (total_memory() as f64 * 0.3) {
self.sr_failed.add(
SysReq::Swap,
&format!(
"Swap {:.2}G is smaller than 1/3 of memory {:.2}G",
to_gb(swap_total),
to_gb(total_memory() / 3)
),
);
}
if (swap_avail as f64) < (total_memory() as f64 * 0.3).min((31 << 30) as f64) {
self.sr_failed.add(
SysReq::Swap,
&format!(
"Available swap {:.2}G is smaller than min(1/3 of memory {:.2}G, 32G)",
to_gb(swap_avail),
to_gb(total_memory() / 3)
),
);
}
if let Ok(swappiness) = read_swappiness() {
if self.enforce.mem {
self.sr_swappiness = Some(swappiness);
}
if swappiness < 60 {
if self.enforce.mem {
info!(
"cfg: Swappiness {} is smaller than default 60, updating to 60",
swappiness
);
if let Err(e) = write_one_line(SWAPPINESS_PATH, "60") {
self.sr_failed.add(
SysReq::Swap,
&format!("Failed to update swappiness ({})", &e),
);
}
} else {
self.sr_failed.add(
SysReq::Swap,
&format!("Swappiness {} is smaller than default 60", swappiness),
);
}
}
}
if let Ok(zswap_enabled) = read_zswap_enabled() {
if self.enforce.mem {
self.sr_zswap_enabled = Some(zswap_enabled);
}
}
// do we have oomd?
if let Err(e) = &self.oomd_bin {
self.sr_failed.add(
SysReq::Oomd,
&format!(
"Failed to find oomd ({:#}), see https://github.com/facebookincubator/oomd",
&e
),
);
}
// make sure oomd or earlyoom isn't gonna interfere
if let Some(oomd_sys_svc) = &self.oomd_sys_svc {
if let Ok(svc) = systemd::Unit::new_sys(oomd_sys_svc.clone()) {
if svc.state == systemd::UnitState::Running && self.enforce.oomd {
self.sr_oomd_sys_svc = Some(svc);
let svc = self.sr_oomd_sys_svc.as_mut().unwrap();
info!("cfg: Stopping {:?} while resctl-demo is running", &svc.name);
let _ = svc.stop();
}
}
}
if let Ok(mut svc) = systemd::Unit::new_sys(OOMD_SVC_NAME.into()) {
let _ = svc.stop();
}
// Gotta re-read sysinfo to avoid reading cached oomd pid from
// before stopping it.
let sys = sysinfo::System::new();
let procs = sys.processes();
for (pid, proc) in procs {
let exe = proc
.exe()
.file_name()
.unwrap_or_default()
.to_str()
.unwrap_or_default();
match exe {
"oomd" | "earlyoom" => {
self.sr_failed.add(
SysReq::NoSysOomd,
&format!("{:?} detected (pid {}): disable", &exe, pid),
);
}
_ => {}
}
}
// base dependencies
for dep in &["python3"] {
if find_bin(dep, Option::<&str>::None).is_none() {
self.sr_failed.add(
SysReq::DepsBase,
&format!("Base dependency {:?} is missing", dep),
);
}
}
// support binaries for iocost_coef_gen.py
for dep in &["findmnt", "dd", "fio", "stdbuf"] {
if find_bin(dep, Option::<&str>::None).is_none() {
self.sr_failed.add(
SysReq::DepsIoCostCoefGen,
&format!("iocost_coef_gen.py dependency {:?} is missing", dep),
);
}
}
// hostcriticals - ones which can be restarted for relocation
for svc_name in ["systemd-journald.service", "sshd.service", "sssd.service"].iter() {
if let Err(e) =
Self::check_one_hostcritical_service(svc_name, true, self.enforce.crit_mem_prot)
{
self.sr_failed
.add(SysReq::HostCriticalServices, &format!("{}", &e));
}
}
// and the ones which can't
for svc_name in ["dbus.service", "dbus-broker.service"].iter() {
if let Err(e) =
Self::check_one_hostcritical_service(svc_name, false, self.enforce.crit_mem_prot)
{
self.sr_failed
.add(SysReq::HostCriticalServices, &format!("{}", &e));
}
}
// sideload checks
side::startup_checks(self);
let (scr_dev_model, scr_dev_fwrev, scr_dev_size) =
match devname_to_model_fwrev_size(&self.scr_dev) {
Ok(v) => v,
Err(e) => bail!(
"failed to determine model, fwrev and size of {:?} ({})",
&self.scr_dev,
&e
),
};
SysReqsReport {
satisfied: &*ALL_SYSREQS_SET ^ &self.sr_failed.map.keys().copied().collect(),
missed: self.sr_failed.clone(),
kernel_version: sys.kernel_version().expect("Failed to read kernel version"),
agent_version: FULL_VERSION.to_string(),
hashd_version,
nr_cpus: nr_cpus(),
total_memory: total_memory(),
total_swap: total_swap(),
scr_dev: self.scr_dev.clone(),
scr_devnr: self.scr_devnr,
scr_dev_model,
scr_dev_fwrev,
scr_dev_size,
scr_dev_iosched,
enforce: self.enforce.clone(),
}
.save(&self.sysreqs_path)?;
if self.sr_failed.map.is_empty() {
Ok(())
} else {
Err(anyhow!(
"{} startup checks failed",
self.sr_failed.map.len()
))
}
}