in nvidia-persistenced-installer/nvidia_persistenced_installer.go [45:83]
func main() {
flag.Parse()
ctx := context.Background()
// Only run persistence daemon on confidential GPU nodes.
enabled, err := checkConfidentialGPUEnablement(ctx)
if err != nil {
glog.ExitContextf(ctx, "parseCGPUConfig failed: %v", err)
}
if enabled {
// This is necessary to be able to use nvidia smi from the container to set the GPU to a ready state.
if err := updateContainerLdCache(); err != nil {
glog.ExitContextf(ctx, "updateContainerLdCache failed: %v", err)
}
if err := enablePersistenceMode(ctx); err != nil {
glog.ExitContextf(ctx, "failed to start persistence mode: %v", err)
}
// Add small delay before setting the ready state for consistency.
// If the workload starts too close to when the persistence daemon has started sometimes there can be errors.
time.Sleep(time.Duration(*readyDelay) * time.Millisecond)
if err := setGPUReadyState(ctx); err != nil {
glog.ExitContextf(ctx, "failed to set gpu to ready state: %v", err)
}
} else {
glog.InfoContext(ctx, "Confidential GPU is NOT enabled, skipping nvidia persistenced enablement.")
// Don't exit as this is intended for a side car which would cause it to restart infinitely.
}
// Need to keep the container running so that the nvidia persistence daemon can keep running.
// Create a channel to listen for termination signals (SIGINT and SIGTERM)
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
// Block indefinitely until a signal is received
sig := <-sigChan
fmt.Printf("Received signal: %v. Shutting down...\n", sig)
}