in resctl-bench/src/bench/protection/mem_hog.rs [170:279]
fn run_one_int<F>(
rctx: &mut RunCtx,
run_name: &str,
hashd_load: f64,
hog_speed: MemHogSpeed,
do_base_hold: bool,
mut timeout: f64,
mut is_done: F,
) -> Result<(MemHogRun, Option<(u64, u64)>)>
where
F: FnMut(&AgentFiles, &rd_agent_intf::UsageReport, &BanditMemHogReport) -> bool,
{
info!(
"protection: Stabilizing hashd at {}% for {}",
format_pct(hashd_load),
run_name
);
super::warm_up_hashd(rctx, hashd_load).context("Warming up hashd")?;
let mut base_period = None;
if do_base_hold {
base_period = Some(super::baseline_hold(rctx)?);
}
info!("protection: Starting memory hog");
let hog_started_at = unix_now();
if rctx.test {
timeout = 10.0;
}
rctx.start_sysload("mem-hog", hog_speed.to_sideload_name())?;
let hog_svc_name = rd_agent_intf::sysload_svc_name(Self::NAME);
let mut first_hog_rep = Err(anyhow!("swap usage stayed zero"));
let mut hog_mem_rec = VecDeque::<usize>::new();
let mut failed = None;
// Memory hog is running. Monitor it until it dies or the
// timeout expires. Record the memory hog report when the swap
// usage is first seen, which will be used as the baseline for
// calculating the total amount of needed and performed IOs.
if let Err(e) = WorkloadMon::default()
.hashd()
.sysload("mem-hog")
.timeout(Duration::from_secs_f64(timeout))
.monitor_with_status(
rctx,
|wm: &WorkloadMon, af: &AgentFiles| -> Result<(bool, String)> {
let rep = &af.report.data;
let mut done = false;
if let (Some(usage), Ok(hog_rep)) =
(rep.usages.get(&hog_svc_name), Self::read_hog_rep(rep))
{
done = is_done(af, &usage, &hog_rep);
if first_hog_rep.is_err() {
if usage.swap_bytes > 0 || rctx.test {
first_hog_rep = Ok(hog_rep);
}
} else {
hog_mem_rec.push_front(usage.mem_bytes as usize);
hog_mem_rec.truncate(Self::MEM_AVG_PERIOD);
}
}
let (ws_done, status) = super::ws_status(wm, af)?;
Ok((done | ws_done, status))
},
)
{
// This usually happens when IO isolation isn't good enough to
// keep the agent and hashd healthy. Rather than failing the
// whole run, collect the available data and mark when and how
// it failed.
failed = Some((
timeout,
((unix_now() - hog_started_at) as f64).min(timeout),
format!("{}", &e),
));
}
// Memory hog is dead. Unwrap the first report and read the last
// report to calculate delta.
let first_hog_rep = first_hog_rep?;
let last_hog_rep =
rctx.access_agent_files::<_, Result<_>>(|af| Self::read_hog_rep(&af.report.data))?;
let last_hog_mem = if hog_mem_rec.len() > 0 {
hog_mem_rec.iter().sum::<usize>() / hog_mem_rec.len()
} else {
0
};
rctx.stop_sysload("mem-hog");
if failed.is_none() {
info!(
"protection: Memory hog terminated after {}, {} finished",
format_duration((unix_now() - hog_started_at) as f64),
run_name,
);
}
Ok((
MemHogRun {
failed,
first_hog_rep,
last_hog_rep,
last_hog_mem,
},
base_period,
))
}