in pkg/gpu/nvidia/health_check/health_checker.go [41:61]
func NewGPUHealthChecker(devices map[string]pluginapi.Device, health chan pluginapi.Device, codes []int) *GPUHealthChecker {
hc := &GPUHealthChecker{
devices: make(map[string]pluginapi.Device),
nvmlDevices: make(map[string]*nvml.Device),
health: health,
stop: make(chan bool),
healthCriticalXid: make(map[uint64]bool),
}
// Cloning the device map to avoid interfering with the device manager
for id, d := range devices {
hc.devices[id] = d
}
for _, c := range codes {
glog.Infof("reading code %v", c)
hc.healthCriticalXid[uint64(c)] = true
}
// By default, we check Double Bit ECC Error
hc.healthCriticalXid[48] = true
return hc
}