fn configure_tcpxo_for_workload()

in tools/gpudirect-webhook/src/webhook.rs [258:469]


fn configure_tcpxo_for_workload(
    mut pod: Pod,
    machine_config: MachineConfig,
) -> anyhow::Result<Pod> {
    let mut devices = vec![];

    for i in 0..8 {
        let mut data = HashMap::new();
        data.insert("path", format!("/dev/nvidia{}", i));
        devices.push(data);
    }

    let interfaces = utils::build_net_interfaces_annotation_value(
        &machine_config,
        pod.annotations(),
        &machine_config.gpu_direct_type,
    )?;

    let mut nvidia_ctl = HashMap::new();
    nvidia_ctl.insert("path", "/dev/nvidiactl".to_string());
    devices.push(nvidia_ctl);
    let mut nvidia_uvm = HashMap::new();
    nvidia_uvm.insert("path", "/dev/nvidia-uvm".to_string());
    devices.push(nvidia_uvm);
    let mut dmabuf = HashMap::new();
    dmabuf.insert("path", "/dev/dmabuf_import_helper".to_string());
    devices.push(dmabuf);

    let annotations = pod.annotations_mut();
    annotations.insert(
        "devices.gke.io/container.tcpx-daemon".to_string(),
        serde_yml::to_string(&devices)?,
    );
    annotations.insert(
        "networking.gke.io/default-interface".to_string(),
        "eth0".to_string(),
    );
    annotations.insert(
        "networking.gke.io/interfaces".to_string(),
        serde_json::to_string(&interfaces)?,
    );
    if let Some(spec) = pod.spec.as_mut() {
        if let Some(volumes) = spec.volumes.as_mut() {
            volumes.push(Volume {
                name: "libraries".to_string(),
                host_path: Some(HostPathVolumeSource {
                    path: "/home/kubernetes/bin/nvidia/lib64".to_string(),
                    ..Default::default()
                }),
                ..Default::default()
            });
            volumes.push(Volume {
                name: "sys".to_string(),
                host_path: Some(HostPathVolumeSource {
                    path: "/sys".to_string(),
                    ..Default::default()
                }),
                ..Default::default()
            });
            volumes.push(Volume {
                name: "proc-sys".to_string(),
                host_path: Some(HostPathVolumeSource {
                    path: "/proc/sys".to_string(),
                    ..Default::default()
                }),
                ..Default::default()
            });
            volumes.push(Volume {
                name: "aperture-devices".to_string(),
                host_path: Some(HostPathVolumeSource {
                    path: "/dev/aperture_devices".to_string(),
                    ..Default::default()
                }),
                ..Default::default()
            });
        }
        let tcpx_daemon: Container = serde_json::from_value(json!({
            "args": [
                "set -ex\nchmod 755 /fts/entrypoint_rxdm_container.sh\n/fts/entrypoint_rxdm_container.sh --num_hops=2 --num_nics=8 --uid= --alsologtostderr\n"
            ],
            "command": [
                "/bin/sh",
                "-c"
            ],
            "env": [
                {
                    "name": "LD_LIBRARY_PATH",
                    "value": "/usr/local/nvidia/lib64"
                }
            ],
            "image": "us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpxo/tcpgpudmarxd-dev:v1.0.14",
            "imagePullPolicy": "Always",
            "name": "tcpxo-daemon",
            "resources": {},
            "securityContext": {
                "capabilities": {
                    "add": [
                        "NET_ADMIN",
                        "NET_BIND_SERVICE"
                    ]
                }
            },
            "terminationMessagePath": "/dev/termination-log",
            "terminationMessagePolicy": "File",
            "volumeMounts": [
                {
                    "mountPath": "/usr/local/nvidia",
                    "name": "libraries"
                },
                {
                    "mountPath": "/hostsysfs",
                    "name": "sys"
                },
                {
                    "mountPath": "/hostprocsysfs",
                    "name": "proc-sys"
                }
            ]
        }))?;
        spec.containers.push(tcpx_daemon);
        for container in spec.containers.iter_mut() {
            if let Some(resources) = &container.resources {
                if utils::is_gpu_pod(resources) {
                    let mut vms = vec![];
                    vms.push(VolumeMount {
                        name: "aperture-devices".to_string(),
                        mount_path: "/dev/aperture_devices".to_string(),
                        ..Default::default()
                    });
                    match container.volume_mounts.as_mut() {
                        Some(volume_mounts) => {
                            volume_mounts.extend_from_slice(&vms);
                        }
                        None => {
                            container.volume_mounts = Some(vms);
                        }
                    }
                    let ld_config = EnvVar {
                        name: "LD_LIBRARY_PATH".to_string(),
                        value: Some("/usr/local/nvidia/lib64".to_string()),
                        ..Default::default()
                    };
                    let fastrak_llcm = EnvVar {
                        name: "NCCL_FASTRAK_LLCM_DEVICE_DIRECTORY".to_string(),
                        value: Some("/dev/aperture_devices".to_string()),
                        ..Default::default()
                    };
                    let mut nccl_config = HashMap::new();
                    nccl_config.insert("NCCL_FASTRAK_CTRL_DEV", "eth0");
                    nccl_config.insert(
                        "NCCL_FASTRAK_IFNAME",
                        "eth1,eth2,eth3,eth4,eth5,eth6,eth7,eth8",
                    );
                    nccl_config.insert("NCCL_SOCKET_IFNAME", "eth0");
                    nccl_config.insert("NCCL_CROSS_NIC", "0");
                    nccl_config.insert("NCCL_ALGO", "Ring,Tree");
                    nccl_config.insert("NCCL_PROTO", "Simple");
                    nccl_config.insert("NCCL_MIN_NCHANNELS", "4");
                    nccl_config.insert("NCCL_TUNER_PLUGIN", "libnccl-tuner.so");
                    nccl_config.insert(
                        "NCCL_TUNER_CONFIG_PATH",
                        "/usr/local/nvidia/lib64/a3plus_tuner_config.textproto",
                    );
                    nccl_config.insert(
                        "NCCL_SHIMNET_GUEST_CONFIG_CHECKER_CONFIG_FILE",
                        "/usr/local/nvidia/lib64/a3plus_guest_config.textproto",
                    );
                    nccl_config.insert("NCCL_DYNAMIC_CHUNK_SIZE", "524288");
                    nccl_config.insert("NCCL_P2P_NET_CHUNKSIZE", "524288");
                    nccl_config.insert("NCCL_P2P_PCI_CHUNKSIZE", "524288");
                    nccl_config.insert("NCCL_P2P_NVL_CHUNKSIZE", "1048576");
                    nccl_config.insert("NCCL_FASTRAK_NUM_FLOWS", "2");
                    nccl_config.insert("NCCL_FASTRAK_USE_SNAP", "1");
                    nccl_config.insert("NCCL_FASTRAK_PLUGIN_ACCEPT_TIMEOUT_MS", "600000");
                    nccl_config.insert("NCCL_FASTRAK_ENABLE_CONTROL_CHANNEL", "0");
                    nccl_config.insert("NCCL_BUFFSIZE", "8388608");
                    nccl_config.insert("CUDA_VISIBLE_DEVICES", "0,1,2,3,4,5,6,7");
                    nccl_config.insert("NCCL_NET_GDR_LEVEL", "PIX");
                    nccl_config.insert("NCCL_FASTRAK_ENABLE_HOTPATH_LOGGING", "0");
                    nccl_config.insert("NCCL_FASTRAK_USE_LLCM", "1");
                    nccl_config.insert("NCCL_NVLS_ENABLE", "0");
                    // recommended, to log NCCL errors
                    nccl_config.insert("NCCL_DEBUG", "WARN");
                    nccl_config.insert("NCCL_DEBUG_SUBSYS", "INIT,NET,ENV,COLL,GRAPH");
                    let nccl_env_vars = utils::build_pod_env_vars(nccl_config);
                    match container.env.as_mut() {
                        Some(env_vars) => {
                            env_vars.push(ld_config);
                            env_vars.push(fastrak_llcm);
                            env_vars.extend_from_slice(&nccl_env_vars);
                        }
                        None => {
                            let mut env_vars = vec![];
                            env_vars.push(ld_config);
                            env_vars.push(fastrak_llcm);
                            env_vars.extend_from_slice(&nccl_env_vars);
                            container.env = Some(env_vars);
                        }
                    }
                }
            }
        }
    }
    debug!("{}", serde_json::to_string_pretty(&pod)?);
    info!(
        "configured {:?} for pod: {}/{}",
        machine_config.gpu_direct_type,
        pod.namespace().unwrap(),
        pod.name_any()
    );
    Ok(pod)
}