fn startup_checks()

in rd-agent/src/main.rs [620:1025]


    fn startup_checks(&mut self) -> Result<()> {
        let sys = sysinfo::System::new();

        // Obtain rd-hashd version.
        let output = Command::new(&self.hashd_paths[0].bin)
            .arg("--version")
            .output()
            .expect("cfg: \"rd-hashd --version\" failed");
        let hashd_version = String::from_utf8(output.stdout)
            .unwrap()
            .lines()
            .next()
            .expect("cfg: Failed to read \"rd-hashd --version\" output")
            .trim_start_matches("rd-hashd ")
            .to_string();

        // check cgroup2 & controllers
        match path_to_mountpoint("/sys/fs/cgroup") {
            Ok(mi) => {
                if mi.fstype != "cgroup2" {
                    self.sr_failed
                        .add(SysReq::Controllers, "/sys/fs/cgroup is not cgroup2 fs");
                }

                if !mi.options.contains(&"memory_recursiveprot".to_string()) {
                    if self.enforce.mem {
                        match Command::new("mount")
                            .arg("-o")
                            .arg("remount,memory_recursiveprot")
                            .arg(&mi.dest)
                            .spawn()
                            .and_then(|mut x| x.wait())
                        {
                            Ok(rc) if rc.success() => {
                                info!("cfg: enabled memcg recursive protection")
                            }
                            Ok(rc) => {
                                self.sr_failed.add(
                                    SysReq::MemCgRecursiveProt,
                                    &format!(
                                        "failed to enable memcg recursive protection ({:?})",
                                        &rc
                                    ),
                                );
                            }
                            Err(e) => {
                                self.sr_failed.add(
                                    SysReq::MemCgRecursiveProt,
                                    &format!(
                                        "failed to enable memcg recursive protection ({:?})",
                                        &e
                                    ),
                                );
                            }
                        }
                    } else {
                        self.sr_failed.add(
                            SysReq::MemCgRecursiveProt,
                            "memcg recursive protection not enabled",
                        );
                    }
                }
            }
            Err(e) => {
                self.sr_failed.add(
                    SysReq::Controllers,
                    &format!("failed to obtain mountinfo for /sys/fs/cgroup ({:#})", &e),
                );
            }
        }

        let mut buf = String::new();
        fs::File::open("/sys/fs/cgroup/cgroup.controllers")
            .and_then(|mut f| f.read_to_string(&mut buf))?;
        for ctrl in ["cpu", "memory", "io"].iter() {
            if !buf.contains(ctrl) {
                self.sr_failed.add(
                    SysReq::Controllers,
                    &format!("cgroup2 {} controller not available", ctrl),
                );
            }
        }

        if !Path::new("/sys/fs/cgroup/system.slice/cgroup.freeze").exists() {
            self.sr_failed
                .add(SysReq::Freezer, "cgroup2 freezer not available");
        }

        // IO controllers
        self.check_iocost(self.enforce.io);
        slices::check_other_io_controllers(&mut self.sr_failed);

        // anon memory balance
        match read_cgroup_flat_keyed_file("/proc/vmstat") {
            Ok(stat) => {
                if let None = stat.get("pgscan_anon") {
                    self.sr_failed.add(
                        SysReq::AnonBalance,
                        "/proc/vmstat doesn't contain pgscan_anon",
                    );
                }
            }
            Err(e) => {
                self.sr_failed.add(
                    SysReq::AnonBalance,
                    &format!("failed to read /proc/vmstat ({:#})", &e),
                );
            }
        }

        // scratch and root filesystems
        let mi = self.check_one_fs(&self.scr_path.clone(), "Scratch dir", self.enforce.fs);

        if mi.is_none() || mi.unwrap().dest != AsRef::<Path>::as_ref("/") {
            self.check_one_fs("/", "Root fs", self.enforce.fs);
        }

        if self.scr_dev.starts_with("md") || self.scr_dev.starts_with("dm") {
            if self.scr_dev_forced {
                warn!(
                    "cfg: Composite device {:?} overridden with --dev, IO isolation likely won't work",
                    &self.scr_dev
                );
            } else {
                self.sr_failed.add(
                    SysReq::NoCompositeStorage,
                    &format!(
                        "Scratch dir {:?} is on a composite dev {:?}, specify the real one with --dev",
                        &self.scr_path, &self.scr_dev
                    ),
                );
            }
        }

        // mq-deadline scheduler
        if self.enforce.io {
            if let Ok(v) = read_iosched(&self.scr_dev) {
                self.sr_iosched = Some(v);
            }
            if let Err(e) = set_iosched(&self.scr_dev, "mq-deadline") {
                self.sr_failed.add(
                    SysReq::IoSched,
                    &format!(
                        "Failed to set mq-deadline iosched on {:?} ({})",
                        &self.scr_dev, &e
                    ),
                );
            }
        }

        let scr_dev_iosched = match read_iosched(&self.scr_dev) {
            Ok(v) => {
                if v != "mq-deadline" {
                    self.sr_failed.add(
                        SysReq::IoSched,
                        &format!(
                            "cfg: iosched on {:?} is {} instead of mq-deadline",
                            &self.scr_dev, v
                        ),
                    );
                }
                v
            }
            Err(e) => {
                self.sr_failed.add(
                    SysReq::IoSched,
                    &format!("Failed to read iosched for {:?} ({})", &self.scr_dev, &e),
                );
                "UNKNOWN".into()
            }
        };

        // wbt should be disabled
        let wbt_path = format!("/sys/block/{}/queue/wbt_lat_usec", &self.scr_dev);
        if let Ok(line) = read_one_line(&wbt_path) {
            let wbt = line.trim().parse::<u64>()?;
            if wbt != 0 {
                if self.enforce.io {
                    info!("cfg: wbt is enabled on {:?}, disabling", &self.scr_dev);
                    if let Err(e) = write_one_line(&wbt_path, "0") {
                        self.sr_failed.add(
                            SysReq::NoWbt,
                            &format!("Failed to disable wbt on {:?} ({})", &self.scr_dev, &e),
                        );
                    }
                    self.sr_wbt = Some(wbt);
                    self.sr_wbt_path = Some(wbt_path);
                } else {
                    self.sr_failed.add(
                        SysReq::NoWbt,
                        &format!("wbt is enabled on {:?}", &self.scr_dev),
                    );
                }
            }
        }

        // swap should be on the same device as scratch
        for swap_dev in swap_devnames()?.iter() {
            let dev = swap_dev.to_str().unwrap_or_default().to_string();
            if dev != self.scr_dev {
                if self.scr_dev_forced {
                    let det_scr_dev = path_to_devname(&self.scr_path).unwrap_or_default();
                    if dev != det_scr_dev.to_str().unwrap_or_default() {
                        warn!(
                            "cfg: Swap backing dev {:?} is different from forced scratch dev {:?}",
                            &swap_dev, &self.scr_dev
                        );
                    }
                } else {
                    self.sr_failed.add(
                        SysReq::SwapOnScratch,
                        &format!(
                            "Swap backing dev {:?} is different from scratch backing dev {:?}",
                            &swap_dev, self.scr_dev
                        ),
                    );
                }
            }
        }

        // swap configuration check
        let swap_total = total_swap();
        let swap_avail = swap_total - sys.used_swap() as usize * 1024;

        if (swap_total as f64) < (total_memory() as f64 * 0.3) {
            self.sr_failed.add(
                SysReq::Swap,
                &format!(
                    "Swap {:.2}G is smaller than 1/3 of memory {:.2}G",
                    to_gb(swap_total),
                    to_gb(total_memory() / 3)
                ),
            );
        }
        if (swap_avail as f64) < (total_memory() as f64 * 0.3).min((31 << 30) as f64) {
            self.sr_failed.add(
                SysReq::Swap,
                &format!(
                    "Available swap {:.2}G is smaller than min(1/3 of memory {:.2}G, 32G)",
                    to_gb(swap_avail),
                    to_gb(total_memory() / 3)
                ),
            );
        }

        if let Ok(swappiness) = read_swappiness() {
            if self.enforce.mem {
                self.sr_swappiness = Some(swappiness);
            }
            if swappiness < 60 {
                if self.enforce.mem {
                    info!(
                        "cfg: Swappiness {} is smaller than default 60, updating to 60",
                        swappiness
                    );
                    if let Err(e) = write_one_line(SWAPPINESS_PATH, "60") {
                        self.sr_failed.add(
                            SysReq::Swap,
                            &format!("Failed to update swappiness ({})", &e),
                        );
                    }
                } else {
                    self.sr_failed.add(
                        SysReq::Swap,
                        &format!("Swappiness {} is smaller than default 60", swappiness),
                    );
                }
            }
        }

        if let Ok(zswap_enabled) = read_zswap_enabled() {
            if self.enforce.mem {
                self.sr_zswap_enabled = Some(zswap_enabled);
            }
        }

        // do we have oomd?
        if let Err(e) = &self.oomd_bin {
            self.sr_failed.add(
                SysReq::Oomd,
                &format!(
                    "Failed to find oomd ({:#}), see https://github.com/facebookincubator/oomd",
                    &e
                ),
            );
        }

        // make sure oomd or earlyoom isn't gonna interfere
        if let Some(oomd_sys_svc) = &self.oomd_sys_svc {
            if let Ok(svc) = systemd::Unit::new_sys(oomd_sys_svc.clone()) {
                if svc.state == systemd::UnitState::Running && self.enforce.oomd {
                    self.sr_oomd_sys_svc = Some(svc);
                    let svc = self.sr_oomd_sys_svc.as_mut().unwrap();
                    info!("cfg: Stopping {:?} while resctl-demo is running", &svc.name);
                    let _ = svc.stop();
                }
            }
        }

        if let Ok(mut svc) = systemd::Unit::new_sys(OOMD_SVC_NAME.into()) {
            let _ = svc.stop();
        }

        // Gotta re-read sysinfo to avoid reading cached oomd pid from
        // before stopping it.
        let sys = sysinfo::System::new();
        let procs = sys.processes();
        for (pid, proc) in procs {
            let exe = proc
                .exe()
                .file_name()
                .unwrap_or_default()
                .to_str()
                .unwrap_or_default();
            match exe {
                "oomd" | "earlyoom" => {
                    self.sr_failed.add(
                        SysReq::NoSysOomd,
                        &format!("{:?} detected (pid {}): disable", &exe, pid),
                    );
                }
                _ => {}
            }
        }

        // base dependencies
        for dep in &["python3"] {
            if find_bin(dep, Option::<&str>::None).is_none() {
                self.sr_failed.add(
                    SysReq::DepsBase,
                    &format!("Base dependency {:?} is missing", dep),
                );
            }
        }

        // support binaries for iocost_coef_gen.py
        for dep in &["findmnt", "dd", "fio", "stdbuf"] {
            if find_bin(dep, Option::<&str>::None).is_none() {
                self.sr_failed.add(
                    SysReq::DepsIoCostCoefGen,
                    &format!("iocost_coef_gen.py dependency {:?} is missing", dep),
                );
            }
        }

        // hostcriticals - ones which can be restarted for relocation
        for svc_name in ["systemd-journald.service", "sshd.service", "sssd.service"].iter() {
            if let Err(e) =
                Self::check_one_hostcritical_service(svc_name, true, self.enforce.crit_mem_prot)
            {
                self.sr_failed
                    .add(SysReq::HostCriticalServices, &format!("{}", &e));
            }
        }

        // and the ones which can't
        for svc_name in ["dbus.service", "dbus-broker.service"].iter() {
            if let Err(e) =
                Self::check_one_hostcritical_service(svc_name, false, self.enforce.crit_mem_prot)
            {
                self.sr_failed
                    .add(SysReq::HostCriticalServices, &format!("{}", &e));
            }
        }

        // sideload checks
        side::startup_checks(self);

        let (scr_dev_model, scr_dev_fwrev, scr_dev_size) =
            match devname_to_model_fwrev_size(&self.scr_dev) {
                Ok(v) => v,
                Err(e) => bail!(
                    "failed to determine model, fwrev and size of {:?} ({})",
                    &self.scr_dev,
                    &e
                ),
            };

        SysReqsReport {
            satisfied: &*ALL_SYSREQS_SET ^ &self.sr_failed.map.keys().copied().collect(),
            missed: self.sr_failed.clone(),
            kernel_version: sys.kernel_version().expect("Failed to read kernel version"),
            agent_version: FULL_VERSION.to_string(),
            hashd_version,
            nr_cpus: nr_cpus(),
            total_memory: total_memory(),
            total_swap: total_swap(),
            scr_dev: self.scr_dev.clone(),
            scr_devnr: self.scr_devnr,
            scr_dev_model,
            scr_dev_fwrev,
            scr_dev_size,
            scr_dev_iosched,
            enforce: self.enforce.clone(),
        }
        .save(&self.sysreqs_path)?;

        if self.sr_failed.map.is_empty() {
            Ok(())
        } else {
            Err(anyhow!(
                "{} startup checks failed",
                self.sr_failed.map.len()
            ))
        }
    }