in pkg/gpu/nvidia/server.go [164:178]
func (m *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
s.Send(&pluginapi.ListAndWatchResponse{Devices: m.devs})
for {
select {
case <-m.stop:
return nil
case d := <-m.health:
// FIXME: there is no way to recover from the Unhealthy state.
d.Health = pluginapi.Unhealthy
log.Printf("device marked unhealthy: %s", d.ID)
s.Send(&pluginapi.ListAndWatchResponse{Devices: m.devs})
}
}
}