linux/cuda_installer/os_installers/ubuntu.py (80 lines of code) (raw):
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pathlib
from typing import Optional
from config import (
NVIDIA_DEB_REPO_KEYRING_URL,
NVIDIA_KEYRING_SHA256_SUMS,
NVIDIA_DEB_REPO_KEYRING_GS_URI,
CUDA_TOOLKIT_VERSION_SHORT,
)
from decorators import checkpoint_decorator
from logger import logger
from os_installers import LinuxInstaller, RebootRequired, System
class UbuntuInstaller(LinuxInstaller):
DKMS_MOK_PUB = pathlib.Path("/var/lib/shim-signed/mok/MOK.der")
DKMS_MOK_KEY = pathlib.Path("/var/lib/shim-signed/mok/MOK.priv")
@checkpoint_decorator("add_nvidia_repo", "NVIDIA repository already added.")
def _add_nvidia_repo(self):
"""
Add the Nvidia repository to the system. Do nothing if already present.
"""
system, version = self._detect_linux_distro()
assert system == System.Ubuntu
system = "ubuntu"
version = version.replace(".", "")
keyring = self.download_file(
NVIDIA_DEB_REPO_KEYRING_URL.format(system=system, version=version),
NVIDIA_KEYRING_SHA256_SUMS[system][version],
NVIDIA_DEB_REPO_KEYRING_GS_URI.format(system=system, version=version),
)
self.run(f"dpkg -i {keyring.absolute()}")
self.run("apt update")
@checkpoint_decorator("prerequisites", "System preparations already done.")
def _install_prerequisites(self):
"""
Installs packages required for the proper driver installation on Debian.
"""
self.run("apt-get update")
self.run(
"apt-get install -y linux-image-gcp linux-headers-gcp "
"gcc make dkms pciutils software-properties-common cmake"
)
raise RebootRequired
def lock_kernel_updates(self):
"""
Marks kernel related packages, so they don't get auto-updated. This would cause the driver to stop working.
"""
logger.info("Locking kernel updates...")
self.run(
f"apt-mark hold "
f"linux-image-gcp "
f"linux-headers-gcp "
f"linux-image-{self.kernel_version} "
f"linux-headers-{self.kernel_version}"
)
def unlock_kernel_updates(self):
"""
Allows the kernel related packages to be upgraded.
"""
logger.info("Unlocking kernel updates...")
self.run(
f"apt-mark unhold "
f"linux-image-gcp "
f"linux-headers-gcp "
f"linux-image-{self.kernel_version} "
f"linux-headers-{self.kernel_version}"
)
def _repo_install_driver(
self,
secure_boot_public_key: Optional[pathlib.Path] = None,
secure_boot_private_key: Optional[pathlib.Path] = None,
):
system, version = self._detect_linux_distro()
assert system == System.Ubuntu
if version not in ("20.04", "22.04", "24.04"):
raise RuntimeError(
f"The 'repo' mode is not available for Ubuntu {version}."
)
if secure_boot_public_key and secure_boot_private_key:
if secure_boot_public_key.exists() and secure_boot_private_key.exists():
self.place_custom_dkms_signing_keys(
secure_boot_public_key=secure_boot_public_key,
secure_boot_private_key=secure_boot_private_key,
)
try:
logger.info("Installing GPU driver...")
self.run("apt-get install -yq cuda-drivers")
finally:
if secure_boot_public_key and secure_boot_private_key:
self.remove_custom_dkms_signing_keys()
def _install_cuda_repo(self):
"""
Install CUDA Toolkit using DNF.
"""
self._add_nvidia_repo()
major, minor = CUDA_TOOLKIT_VERSION_SHORT.split(".")
self.run(f"apt-get install -yq cuda-toolkit-{major}-{minor}")