fn merge_model()

in resctl-bench/src/bench/iocost_tune/merge.rs [28:97]


fn merge_model(
    models: HashSet<IoCostModelParams>,
) -> (IoCostModelParams, HashMap<IoCostModelParams, bool>) {
    // The bool indicates whether an outlier.
    let mut models: Vec<(IoCostModelParams, bool)> =
        models.into_iter().map(|model| (model, false)).collect();

    // Convert to arrays of f64's.
    let mut param_sets: [Vec<f64>; 6] = Default::default();
    for model in models.iter() {
        for (i, v) in model_to_array(&model.0).iter().enumerate() {
            param_sets[i].push(*v);
        }
    }

    // Filter out outliers if there are more than three models.
    if models.len() > 3 {
        let means: Vec<f64> = param_sets
            .iter()
            .map(|set| statistical::mean(set))
            .collect();
        let stdevs: Vec<f64> = param_sets
            .iter()
            .map(|set| statistical::standard_deviation(set, None))
            .collect();

        trace!("merge_model: means={:?} stdevs={:?}", &means, &stdevs);

        // Apply Chauvenet's criterion on each model parameter to detect and
        // reject outliers. We reject models with any parameter determined to be
        // an outlier.
        for (pi, (&mean, &stdev)) in means.iter().zip(stdevs.iter()).enumerate() {
            if let Ok(dist) = Normal::new(mean, stdev) {
                for (mi, &val) in param_sets[pi].iter().enumerate() {
                    let is_outlier = (1.0 - dist.cdf(val)) * (models.len() as f64) < 0.5;
                    trace!(
                        "merge_model: pi={} mean={} stdev={} mi={} val={} is_outlier={}",
                        pi,
                        mean,
                        stdev,
                        mi,
                        val,
                        is_outlier
                    );
                    models[mi].1 |= is_outlier;
                }
            }
        }
    }

    let model_is_outlier: HashMap<IoCostModelParams, bool> = models.into_iter().collect();

    // Determine the median model parameters.
    let mut filtered_sets: [Vec<f64>; 6] = Default::default();
    for (model, outlier) in model_is_outlier.iter() {
        if !outlier {
            for (i, v) in model_to_array(model).iter().enumerate() {
                filtered_sets[i].push(*v);
            }
        }
    }

    for set in filtered_sets.iter_mut() {
        set.sort_by(|a, b| a.partial_cmp(b).unwrap());
    }

    let medians: Vec<f64> = filtered_sets.iter().map(|set| set[set.len() / 2]).collect();

    (model_from_array(&medians), model_is_outlier)
}