in pkg/gpu/nvidia/server.go [185:236]
func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
devs := m.devs
responses := pluginapi.AllocateResponse{}
physicalDevsMap := make(map[string]bool)
for _, req := range reqs.ContainerRequests {
for _, id := range req.DevicesIDs {
if !deviceExists(devs, id) {
return nil, fmt.Errorf("invalid allocation request: unknown device: %s", id)
}
// Convert virtual GPUDeviceId to physical GPUDeviceID
physicalDevId := getPhysicalDeviceID(id)
if !physicalDevsMap[physicalDevId] {
physicalDevsMap[physicalDevId] = true
}
dev := getDeviceById(devs, id)
if dev == nil {
return nil, fmt.Errorf("invalid allocation request: unknown device: %s", id)
}
if dev.Health != pluginapi.Healthy {
return nil, fmt.Errorf("invalid allocation request with unhealthy device %s", id)
}
}
// Set physical GPU devices as container visible devices
visibleDevs := make([]string, 0, len(physicalDevsMap))
for visibleDev := range physicalDevsMap {
visibleDevs = append(visibleDevs, visibleDev)
}
response := pluginapi.ContainerAllocateResponse{
Envs: map[string]string{
"NVIDIA_VISIBLE_DEVICES": strings.Join(visibleDevs, ","),
},
}
// Set MPS environment variables - figure it out why it doesn't work?
//response.Envs["CUDA_MPS_ACTIVE_THREAD_PERCENTAGE"] = fmt.Sprintf("%d", 100 * uint(len(req.DevicesIDs) / len(m.devs)))
//response.Envs["CUDA_MPS_PIPE_DIRECTORY"] = "/tmp"
//
//mount := pluginapi.Mount{
// ContainerPath: "/tmp/nvidia-mps",
// HostPath: "/tmp/nvidia-mps",
//}
//response.Mounts = append(response.Mounts, &mount)
responses.ContainerResponses = append(responses.ContainerResponses, &response)
}
return &responses, nil
}