linux/cuda_installer/os_installers/debian.py (100 lines of code) (raw):

# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import pathlib import re from typing import Optional from decorators import checkpoint_decorator from logger import logger from os_installers import LinuxInstaller, RebootRequired, System from config import ( NVIDIA_DEB_REPO_KEYRING_URL, NVIDIA_DEB_REPO_KEYRING_GS_URI, NVIDIA_KEYRING_SHA256_SUMS, CUDA_TOOLKIT_VERSION_SHORT, ) class DebianInstaller(LinuxInstaller): KERNEL_IMAGE_PACKAGE = "linux-image-{version}" KERNEL_VERSION_FORMAT = "{major}.{minor}.{patch}-{micro}-cloud-amd64" KERNEL_HEADERS_PACKAGE = "linux-headers-{version}" KERNEL_PACKAGE_REGEX = r"linux-image-{major}.{minor}.([\d]+)-([\d]+)-cloud-amd64" def __init__(self): super().__init__() # To make sure we don't get stuck waiting for user input. os.environ["DEBIAN_FRONTEND"] = "noninteractive" @checkpoint_decorator("add_nvidia_repo", "NVIDIA repository already added.") def _add_nvidia_repo(self): """ Add the Nvidia repository to the system. Do nothing if already present. """ system, version = self._detect_linux_distro() assert system == System.Debian system = "debian" keyring = self.download_file( NVIDIA_DEB_REPO_KEYRING_URL.format(system=system, version=version), NVIDIA_KEYRING_SHA256_SUMS[system][version], NVIDIA_DEB_REPO_KEYRING_GS_URI.format(system=system, version=version), ) self.run(f"dpkg -i {keyring.absolute()}") self.run("apt-get update") @checkpoint_decorator("prerequisites", "System preparations already done.") def _install_prerequisites(self): """ Installs packages required for the proper driver installation on Debian. """ self.run("apt-get update") major, minor, *_ = self.kernel_version.split(".") kernel_package_regex = re.compile( self.KERNEL_PACKAGE_REGEX.format(major=major, minor=minor) ) # Find the newest version of kernel to update to, but staying with the same major version packages = self.run("apt-cache search linux-image").stdout patch, micro = max(kernel_package_regex.findall(packages)) wanted_kernel_version = self.KERNEL_VERSION_FORMAT.format( major=major, minor=minor, patch=patch, micro=micro ) wanted_kernel_package = self.KERNEL_IMAGE_PACKAGE.format( version=wanted_kernel_version ) wanted_kernel_headers = self.KERNEL_HEADERS_PACKAGE.format( version=wanted_kernel_version ) self.run( f"apt-get install -y make gcc {wanted_kernel_package} {wanted_kernel_headers} " f"software-properties-common pciutils gcc make dkms cmake" ) raise RebootRequired def lock_kernel_updates(self): """ Marks kernel related packages, so they don't get auto-updated. This would cause the driver to stop working. """ logger.info("Locking kernel updates...") self.run( f"apt-mark hold " f"linux-image-{self.kernel_version} " f"linux-headers-{self.kernel_version} " f"linux-image-cloud-amd64 " f"linux-headers-cloud-amd64" ) def unlock_kernel_updates(self): """ Allows the kernel related packages to be upgraded. """ logger.info("Unlocking kernel updates...") self.run( f"apt-mark unhold " f"linux-image-{self.kernel_version} " f"linux-headers-{self.kernel_version} " f"linux-image-cloud-amd64 " f"linux-headers-cloud-amd64" ) def _repo_install_driver( self, secure_boot_public_key: Optional[pathlib.Path] = None, secure_boot_private_key: Optional[pathlib.Path] = None, ): system, version = self._detect_linux_distro() assert system == System.Debian if version == "11": raise RuntimeError("The 'repo' mode is not available for Debian 11.") if secure_boot_public_key and secure_boot_private_key: if secure_boot_public_key.exists() and secure_boot_private_key.exists(): self.place_custom_dkms_signing_keys( secure_boot_public_key=secure_boot_public_key, secure_boot_private_key=secure_boot_private_key, ) try: logger.info("Installing GPU driver...") self.run("apt-get install -yq cuda-drivers") finally: if secure_boot_public_key and secure_boot_private_key: self.remove_custom_dkms_signing_keys() def _install_cuda_repo(self): """ Install CUDA Toolkit using DNF. """ self._add_nvidia_repo() major, minor = CUDA_TOOLKIT_VERSION_SHORT.split(".") logger.info(f"Installing CUDA Toolkit version {CUDA_TOOLKIT_VERSION_SHORT}") self.run(f"apt-get install -yq cuda-toolkit-{major}-{minor}")