linux/cuda_installer/os_installers/dnf_system.py (80 lines of code) (raw):
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import configparser
import pathlib
import shutil
from config import NVIDIA_RHEL_REPO_URL, CUDA_TOOLKIT_VERSION_SHORT
from decorators import checkpoint_decorator
from logger import logger
from os_installers import LinuxInstaller, System
class DNFSystemInstaller(LinuxInstaller, metaclass=abc.ABCMeta):
"""
An abstract class providing implementation of DNF kernel locking methods.
"""
BASHRC_PATH = pathlib.Path("/etc/bashrc")
@checkpoint_decorator("add_nvidia_repo", "NVIDIA repository already added.")
def _add_nvidia_repo(self):
"""
Add the Nvidia repository to the system. Do nothing if already done.
"""
system, version = self._detect_linux_distro()
assert system in (System.RHEL, System.Rocky)
version = version.split(".")[0]
repo_url = NVIDIA_RHEL_REPO_URL.format(version=version)
self.run(f"dnf config-manager --add-repo {repo_url}")
self.run("dnf clean all")
def _install_cuda_repo(self):
"""
Install CUDA Toolkit using DNF.
"""
self._add_nvidia_repo()
major, minor = CUDA_TOOLKIT_VERSION_SHORT.split(".")
self.run(f"dnf install -y cuda-toolkit-{major}-{minor}")
def lock_kernel_updates(self):
"""Make sure no kernel updates are installed."""
logger.info("Attempting to update /etc/dnf/dnf.conf to block kernel updates.")
conf_parser = configparser.ConfigParser()
conf_parser.read("/etc/dnf/dnf.conf")
if "exclude" in conf_parser["main"]:
value = conf_parser["main"]["exclude"]
if "kernel*" in value:
logger.info("Kernel updates are already blocked in /etc/dnf/dnf.conf")
return
value = [s.strip() for s in value.split(",")]
value.append("kernel*")
else:
value = ["kernel*"]
conf_parser["main"]["exclude"] = ", ".join(value)
shutil.copyfile("/etc/dnf/dnf.conf", "/etc/dnf/dnf.conf_backup")
try:
with open("/etc/dnf/dnf.conf", mode="w") as dnf_conf_file:
conf_parser.write(dnf_conf_file)
except Exception as e:
logger.error(
"Failed to update /etc/dnf/dnf.conf due to {}. Restoring config file from backup.".format(
e
)
)
shutil.copyfile("/etc/dnf/dnf.conf_backup", "/etc/dnf/dnf.conf")
raise e
else:
logger.info(
"Kernel updates blocked by `exclude` entry in /etc/dnf/dnf.conf"
)
def unlock_kernel_updates(self):
"""Remove `kernel*` from exclusion list in /etc/dnf/dnf.conf"""
logger.info("Attempting to update /etc/dnf/dnf.conf to unblock kernel updates.")
conf_parser = configparser.ConfigParser()
conf_parser.read("/etc/dnf/dnf.conf")
if "exclude" not in conf_parser["main"]:
logger.info("Kernel updates are not blocked in /etc/dnf/dnf.conf")
return
value = conf_parser["main"]["exclude"]
value = [s.strip() for s in value.split(",")]
if "kernel*" not in value:
logger.info("Kernel updates are not blocked in /etc/dnf/dnf.conf")
return
value.remove("kernel*")
conf_parser["main"]["exclude"] = ", ".join(value)
shutil.copyfile("/etc/dnf/dnf.conf", "/etc/dnf/dnf.conf_backup")
try:
with open("/etc/dnf/dnf.conf", mode="w") as dnf_conf_file:
conf_parser.write(dnf_conf_file)
except Exception as e:
logger.error(
"Failed to update /etc/dnf/dnf.conf due to {}. Restoring config file from backup.".format(
e
)
)
shutil.copyfile("/etc/dnf/dnf.conf_backup", "/etc/dnf/dnf.conf")
raise e
else:
logger.info("Kernel updates unblocked in /etc/dnf/dnf.conf")