fn are_gpus_healthy()

in src/main.rs [450:481]


fn are_gpus_healthy(
    burn_results: Vec<BurnResult>,
    tflops_tolerance: f64,
    tolerate_software_throttling: bool,
) -> (bool, Vec<String>) {
    let mut reasons = vec![];
    // acceptable_flops is tflops_tolerance% lower than best gpu avg flops
    let acceptable_flops: f64 = burn_results
        .iter()
        .map(|r| r.flops_avg())
        .fold(0., |max, avg| {
            max.max(avg * (100. - tflops_tolerance) / 100.)
        });
    for r in burn_results.iter() {
        let mut low_flops = false;
        if r.flops_avg() < acceptable_flops {
            reasons.push(format!("GPU {} - ", r.gpu_idx) + GPU_FLOPS_REASON);
            low_flops = true;
        }
        // if we have any throttling
        if r.is_throttled() {
            if !low_flops
                && tolerate_software_throttling
                && (r.throttling_thermal_hw == 0 && r.throttling_hw == 0)
            {
                continue;
            }
            reasons.push(format!("GPU {} - ", r.gpu_idx) + GPU_THROTTLING_REASON);
        }
    }
    (reasons.is_empty(), reasons)
}