func main()

in partition_gpu/partition_gpu.go [129:204]


func main() {
	flag.Parse()

	if _, err := os.Stat(*gpuConfigFile); os.IsNotExist(err) {
		glog.Infof("No GPU config file given, nothing to do.")
		return
	}
	gpuConfig, err := parseGPUConfig(*gpuConfigFile)
	if err != nil {
		glog.Infof("failed to parse GPU config file, taking no action.")
		return
	}
	glog.Infof("Using gpu config: %v", gpuConfig)
	if gpuConfig.GPUPartitionSize == "" {
		glog.Infof("No GPU partitions are required, exiting")
		return
	}

	if _, err := os.Stat(*nvidiaSmiPath); os.IsNotExist(err) {
		glog.Errorf("nvidia-smi path %s not found: %v", *nvidiaSmiPath, err)
		os.Exit(1)
	}

	migModeEnabled, err := currentMigMode()
	if err != nil {
		glog.Errorf("Failed to check if MIG mode is enabled: %v", err)
		os.Exit(1)
	}
	if !migModeEnabled {
		glog.Infof("MIG mode is not enabled. Enabling now.")
		glog.Infof("Checking the GPU type now.")
		gpuType, err := checkGpuType()
		if err != nil {
			glog.Errorf("Failed to check GPU Type: %v", err)
			os.Exit(1)
		}
		glog.Infof("Got GPU type used: %s", gpuType)
		if err := enableMigMode(); err != nil {
			glog.Errorf("Failed to enable MIG mode: %v", err)
			os.Exit(1)
		}
		// On NVIDIA Ampere GPUs, when MIG mode is enabled, the driver will attempt to reset the GPU so that MIG mode can take effect.
		// Starting with the Hopper generation of GPUs, enabling MIG mode no longer requires a GPU reset to take effect.
		// See https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#enable-mig-mode for more information
		if gpuType == Nvidia40gbA100 || gpuType == Nvidia80gbA100 {
			glog.Infof("Rebooting node to enable MIG mode")
			if err := rebootNode(); err != nil {
				glog.Errorf("Failed to trigger node reboot after enabling MIG mode: %v", err)
			}
			// Exit, since we cannot proceed until node has rebooted, for MIG changes to take effect on NVIDIA Ampere GPUs.
			os.Exit(1)
		}
	}

	glog.Infof("MIG mode is enabled on all GPUs, proceeding to create GPU partitions.")

	glog.Infof("Cleaning up any existing GPU partitions")
	if err := cleanupAllGPUPartitions(); err != nil {
		glog.Errorf("Failed to cleanup GPU partitions: %v", err)
		os.Exit(1)
	}

	glog.Infof("Creating new GPU partitions")
	if err := createGPUPartitions(gpuConfig.GPUPartitionSize); err != nil {
		glog.Errorf("Failed to create GPU partitions: %v", err)
		os.Exit(1)
	}

	glog.Infof("Running %s", *nvidiaSmiPath)
	out, err := exec.Command(*nvidiaSmiPath).Output()
	if err != nil {
		glog.Errorf("Failed to run nvidia-smi, output: %s, error: %v", string(out), err)
	}
	glog.Infof("Output:\n %s", string(out))

}