in resctl-bench/src/bench/iocost_tune/merge.rs [28:97]
fn merge_model(
models: HashSet<IoCostModelParams>,
) -> (IoCostModelParams, HashMap<IoCostModelParams, bool>) {
// The bool indicates whether an outlier.
let mut models: Vec<(IoCostModelParams, bool)> =
models.into_iter().map(|model| (model, false)).collect();
// Convert to arrays of f64's.
let mut param_sets: [Vec<f64>; 6] = Default::default();
for model in models.iter() {
for (i, v) in model_to_array(&model.0).iter().enumerate() {
param_sets[i].push(*v);
}
}
// Filter out outliers if there are more than three models.
if models.len() > 3 {
let means: Vec<f64> = param_sets
.iter()
.map(|set| statistical::mean(set))
.collect();
let stdevs: Vec<f64> = param_sets
.iter()
.map(|set| statistical::standard_deviation(set, None))
.collect();
trace!("merge_model: means={:?} stdevs={:?}", &means, &stdevs);
// Apply Chauvenet's criterion on each model parameter to detect and
// reject outliers. We reject models with any parameter determined to be
// an outlier.
for (pi, (&mean, &stdev)) in means.iter().zip(stdevs.iter()).enumerate() {
if let Ok(dist) = Normal::new(mean, stdev) {
for (mi, &val) in param_sets[pi].iter().enumerate() {
let is_outlier = (1.0 - dist.cdf(val)) * (models.len() as f64) < 0.5;
trace!(
"merge_model: pi={} mean={} stdev={} mi={} val={} is_outlier={}",
pi,
mean,
stdev,
mi,
val,
is_outlier
);
models[mi].1 |= is_outlier;
}
}
}
}
let model_is_outlier: HashMap<IoCostModelParams, bool> = models.into_iter().collect();
// Determine the median model parameters.
let mut filtered_sets: [Vec<f64>; 6] = Default::default();
for (model, outlier) in model_is_outlier.iter() {
if !outlier {
for (i, v) in model_to_array(model).iter().enumerate() {
filtered_sets[i].push(*v);
}
}
}
for set in filtered_sets.iter_mut() {
set.sort_by(|a, b| a.partial_cmp(b).unwrap());
}
let medians: Vec<f64> = filtered_sets.iter().map(|set| set[set.len() / 2]).collect();
(model_from_array(&medians), model_is_outlier)
}