partition_gpu/partition_gpu.go (257 lines of code) (raw):
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"os"
"os/exec"
"strings"
"syscall"
"github.com/golang/glog"
)
var (
nvidiaSmiPath = flag.String("nvidia-smi-path", "/usr/local/nvidia/bin/nvidia-smi", "Path where nvidia-smi is installed.")
gpuConfigFile = flag.String("gpu-config", "/etc/nvidia/gpu_config.json", "File with GPU configurations for device plugin")
)
var partitionSizeToProfileID = map[string]string{
//nvidia-tesla-a100
"1g.5gb": "19",
"2g.10gb": "14",
"3g.20gb": "9",
"4g.20gb": "5",
"7g.40gb": "0",
//nvidia-a100-80gb, nvidia-h100-80gb
"1g.10gb": "19",
"2g.20gb": "14",
"3g.40gb": "9",
"4g.40gb": "5",
"7g.80gb": "0",
//nvidia-h100-80gb
"1g.20gb": "15",
//nvidia-h200-141gb
"1g.18gb": "19",
"1g.35gb": "15",
"2g.35gb": "14",
"3g.71gb": "9",
"4g.71gb": "5",
"7g.141gb": "0",
//nvidia-b200, nvidia-gb200
"1g.23gb": "19",
//nvidia-b200
"1g.45gb": "15",
"2g.45gb": "14",
"3g.90gb": "9",
"4g.90gb": "5",
"7g.180gb": "0",
//nvidia-gb200
"1g.47gb": "15",
"2g.47gb": "14",
"3g.93gb": "9",
"4g.93gb": "5",
"7g.186gb": "0",
}
var partitionSizeMaxCount = map[string]int{
//nvidia-tesla-a100
"1g.5gb": 7,
"2g.10gb": 3,
"3g.20gb": 2,
"4g.20gb": 1,
"7g.40gb": 1,
//nvidia-a100-80gb, nvidia-h100-80gb
"1g.10gb": 7,
"2g.20gb": 3,
"3g.40gb": 2,
"4g.40gb": 1,
"7g.80gb": 1,
//nvidia-h100-80gb
"1g.20gb": 4,
//nvidia-h200-141gb
"1g.18gb": 7,
"1g.35gb": 4,
"2g.35gb": 3,
"3g.71gb": 2,
"4g.71gb": 1,
"7g.141gb": 1,
//nvidia-b200, nvidia-gb200
"1g.23gb": 7,
//nvidia-b200
"1g.45gb": 4,
"2g.45gb": 3,
"3g.90gb": 2,
"4g.90gb": 1,
"7g.180gb": 1,
//nvidia-gb200
"1g.47gb": 4,
"2g.47gb": 3,
"3g.93gb": 2,
"4g.93gb": 1,
"7g.186gb": 1,
}
const (
SIGRTMIN = 34
NvidiaGB200 = "NVIDIA GB200" //nvidia-gb200
NvidiaB200 = "NVIDIA B200" //nvidia-b200
Nvidia141gbH200 = "NVIDIA H200" //nvidia-h200-141gb
Nvidia80gbH100 = "NVIDIA H100 80GB HBM3" //nvidia-h100-80gb
Nvidia40gbA100 = "NVIDIA A100-SXM4-40GB" //nvidia-tesla-a100
Nvidia80gbA100 = "NVIDIA A100-SXM4-80GB" //nvidia-a100-80gb
)
// GPUConfig stores the settings used to configure the GPUs on a node.
type GPUConfig struct {
GPUPartitionSize string
}
func main() {
flag.Parse()
if _, err := os.Stat(*gpuConfigFile); os.IsNotExist(err) {
glog.Infof("No GPU config file given, nothing to do.")
return
}
gpuConfig, err := parseGPUConfig(*gpuConfigFile)
if err != nil {
glog.Infof("failed to parse GPU config file, taking no action.")
return
}
glog.Infof("Using gpu config: %v", gpuConfig)
if gpuConfig.GPUPartitionSize == "" {
glog.Infof("No GPU partitions are required, exiting")
return
}
if _, err := os.Stat(*nvidiaSmiPath); os.IsNotExist(err) {
glog.Errorf("nvidia-smi path %s not found: %v", *nvidiaSmiPath, err)
os.Exit(1)
}
migModeEnabled, err := currentMigMode()
if err != nil {
glog.Errorf("Failed to check if MIG mode is enabled: %v", err)
os.Exit(1)
}
if !migModeEnabled {
glog.Infof("MIG mode is not enabled. Enabling now.")
glog.Infof("Checking the GPU type now.")
gpuType, err := checkGpuType()
if err != nil {
glog.Errorf("Failed to check GPU Type: %v", err)
os.Exit(1)
}
glog.Infof("Got GPU type used: %s", gpuType)
if err := enableMigMode(); err != nil {
glog.Errorf("Failed to enable MIG mode: %v", err)
os.Exit(1)
}
// On NVIDIA Ampere GPUs, when MIG mode is enabled, the driver will attempt to reset the GPU so that MIG mode can take effect.
// Starting with the Hopper generation of GPUs, enabling MIG mode no longer requires a GPU reset to take effect.
// See https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#enable-mig-mode for more information
if gpuType == Nvidia40gbA100 || gpuType == Nvidia80gbA100 {
glog.Infof("Rebooting node to enable MIG mode")
if err := rebootNode(); err != nil {
glog.Errorf("Failed to trigger node reboot after enabling MIG mode: %v", err)
}
// Exit, since we cannot proceed until node has rebooted, for MIG changes to take effect on NVIDIA Ampere GPUs.
os.Exit(1)
}
}
glog.Infof("MIG mode is enabled on all GPUs, proceeding to create GPU partitions.")
glog.Infof("Cleaning up any existing GPU partitions")
if err := cleanupAllGPUPartitions(); err != nil {
glog.Errorf("Failed to cleanup GPU partitions: %v", err)
os.Exit(1)
}
glog.Infof("Creating new GPU partitions")
if err := createGPUPartitions(gpuConfig.GPUPartitionSize); err != nil {
glog.Errorf("Failed to create GPU partitions: %v", err)
os.Exit(1)
}
glog.Infof("Running %s", *nvidiaSmiPath)
out, err := exec.Command(*nvidiaSmiPath).Output()
if err != nil {
glog.Errorf("Failed to run nvidia-smi, output: %s, error: %v", string(out), err)
}
glog.Infof("Output:\n %s", string(out))
}
func parseGPUConfig(gpuConfigFile string) (GPUConfig, error) {
var gpuConfig GPUConfig
gpuConfigContent, err := ioutil.ReadFile(gpuConfigFile)
if err != nil {
return gpuConfig, fmt.Errorf("unable to read gpu config file %s: %v", gpuConfigFile, err)
}
if err = json.Unmarshal(gpuConfigContent, &gpuConfig); err != nil {
return gpuConfig, fmt.Errorf("failed to parse GPU config file contents: %s, error: %v", gpuConfigContent, err)
}
return gpuConfig, nil
}
// currentMigMode returns whether mig mode is currently enabled all GPUs attached to this node.
func currentMigMode() (bool, error) {
out, err := exec.Command(*nvidiaSmiPath, "--query-gpu=mig.mode.current", "--format=csv,noheader").Output()
if err != nil {
return false, err
}
if strings.HasPrefix(string(out), "Enabled") {
return true, nil
}
if strings.HasPrefix(string(out), "Disabled") {
return false, nil
}
return false, fmt.Errorf("nvidia-smi returned invalid output: %s", out)
}
// enableMigMode enables MIG mode on all GPUs attached to the node. Requires node restart to take effect.
func enableMigMode() error {
return exec.Command(*nvidiaSmiPath, "-mig", "1").Run()
}
// checkGpuType checkes the GPU type used
func checkGpuType() (string, error) {
gpuType, err := exec.Command(*nvidiaSmiPath, "--query-gpu=gpu_name", "--format=csv,noheader").Output()
if err != nil {
return "", err
}
switch {
case strings.HasPrefix(string(gpuType), NvidiaGB200):
return NvidiaGB200, nil
case strings.HasPrefix(string(gpuType), NvidiaB200):
return NvidiaB200, nil
case strings.HasPrefix(string(gpuType), Nvidia141gbH200):
return Nvidia141gbH200, nil
case strings.HasPrefix(string(gpuType), Nvidia80gbH100):
return Nvidia80gbH100, nil
case strings.HasPrefix(string(gpuType), Nvidia40gbA100):
return Nvidia40gbA100, nil
case strings.HasPrefix(string(gpuType), Nvidia80gbA100):
return Nvidia80gbA100, nil
}
return "", fmt.Errorf("nvidia-smi returned invalid GPU type for MIG: %s", gpuType)
}
func rebootNode() error {
// Gracefully reboot systemd: https://man7.org/linux/man-pages/man1/systemd.1.html#SIGNALS
return syscall.Kill(1, SIGRTMIN+5)
}
func cleanupAllGPUPartitions() error {
args := []string{"mig", "-dci"}
glog.Infof("Running %s %s", *nvidiaSmiPath, strings.Join(args, " "))
out, err := exec.Command(*nvidiaSmiPath, args...).Output()
if err != nil && !strings.Contains(string(out), "No GPU instances found") &&
!strings.Contains(string(out), "No compute instances found") {
return fmt.Errorf("failed to destroy compute instance, nvidia-smi output: %s, error: %v ", string(out), err)
}
glog.Infof("Output:\n %s", string(out))
args = []string{"mig", "-dgi"}
glog.Infof("Running %s %s", *nvidiaSmiPath, strings.Join(args, " "))
out, err = exec.Command(*nvidiaSmiPath, args...).Output()
if err != nil && !strings.Contains(string(out), "No GPU instances found") &&
!strings.Contains(string(out), "No compute instances found") {
return fmt.Errorf("failed to destroy gpu instance, nvidia-smi output: %s, error: %v ", string(out), err)
}
glog.Infof("Output:\n %s", string(out))
return nil
}
func createGPUPartitions(partitionSize string) error {
p, err := buildPartitionStr(partitionSize)
if err != nil {
return err
}
args := []string{"mig", "-cgi", p}
glog.Infof("Running %s %s", *nvidiaSmiPath, strings.Join(args, " "))
out, err := exec.Command(*nvidiaSmiPath, args...).Output()
if err != nil {
return fmt.Errorf("failed to create GPU Instances: output: %s, error: %v", string(out), err)
}
glog.Infof("Output:\n %s", string(out))
args = []string{"mig", "-cci"}
glog.Infof("Running %s %s", *nvidiaSmiPath, strings.Join(args, " "))
out, err = exec.Command(*nvidiaSmiPath, args...).Output()
if err != nil {
return fmt.Errorf("failed to create compute instances: output: %s, error: %v", string(out), err)
}
glog.Infof("Output:\n %s", string(out))
return nil
}
func buildPartitionStr(partitionSize string) (string, error) {
if partitionSize == "" {
return "", nil
}
p, ok := partitionSizeToProfileID[partitionSize]
if !ok {
return "", fmt.Errorf("%s is not a valid partition size", partitionSize)
}
partitionStr := p
for i := 1; i < partitionSizeMaxCount[partitionSize]; i++ {
partitionStr += fmt.Sprintf(",%s", p)
}
return partitionStr, nil
}