fn configure_tcpx_for_workload()

in tools/gpudirect-webhook/src/webhook.rs [65:256]


fn configure_tcpx_for_workload(mut pod: Pod, machine_config: MachineConfig) -> anyhow::Result<Pod> {
    let mut devices = vec![];

    for i in 0..8 {
        let mut data = HashMap::new();
        data.insert("path", format!("/dev/nvidia{}", i));
        devices.push(data);
    }

    let interfaces = utils::build_net_interfaces_annotation_value(
        &machine_config,
        pod.annotations(),
        &machine_config.gpu_direct_type,
    )?;

    let mut nvidia_ctl = HashMap::new();
    nvidia_ctl.insert("path", "/dev/nvidiactl".to_string());
    devices.push(nvidia_ctl);
    let mut nvidia_uvm = HashMap::new();
    nvidia_uvm.insert("path", "/dev/nvidia-uvm".to_string());
    devices.push(nvidia_uvm);

    let annotations = pod.annotations_mut();
    annotations.insert(
        "devices.gke.io/container.tcpx-daemon".to_string(),
        serde_yml::to_string(&devices)?,
    );
    annotations.insert(
        "networking.gke.io/default-interface".to_string(),
        "eth0".to_string(),
    );
    annotations.insert(
        "networking.gke.io/interfaces".to_string(),
        serde_json::to_string(&interfaces)?,
    );
    if let Some(spec) = pod.spec.as_mut() {
        if let Some(volumes) = spec.volumes.as_mut() {
            volumes.push(Volume {
                name: "libraries".to_string(),
                host_path: Some(HostPathVolumeSource {
                    path: "/home/kubernetes/bin/nvidia/lib64".to_string(),
                    ..Default::default()
                }),
                ..Default::default()
            });
            volumes.push(Volume {
                name: "sys".to_string(),
                host_path: Some(HostPathVolumeSource {
                    path: "/sys".to_string(),
                    ..Default::default()
                }),
                ..Default::default()
            });
            volumes.push(Volume {
                name: "proc-sys".to_string(),
                host_path: Some(HostPathVolumeSource {
                    path: "/proc/sys".to_string(),
                    ..Default::default()
                }),
                ..Default::default()
            });
            volumes.push(Volume {
                name: "tcpx-socket".to_string(),
                empty_dir: Some(EmptyDirVolumeSource {
                    ..Default::default()
                }),
                ..Default::default()
            });
        }
        let tcpx_daemon: Container = serde_json::from_value(json!({
          "name": "tcpx-daemon",
          "image": "us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpx/tcpgpudmarxd-dev:v2.0.12",
          "imagePullPolicy": "Always",
          "command": [
            "/tcpgpudmarxd/build/app/tcpgpudmarxd",
            "--gpu_nic_preset",
            "a3vm",
            "--gpu_shmem_type",
            "fd",
            "--uds_path",
            "/run/tcpx",
            "--setup_param",
            "\\\"--verbose 128 2 0 \\\""
          ],
          "env": [
            {
              "name": "LD_LIBRARY_PATH",
              "value": "/usr/local/nvidia/lib64"
            }
          ],
          "securityContext": {
            "capabilities": {
              "add": [
                "NET_ADMIN"
              ]
            }
          },
          "volumeMounts": [
            {
              "mountPath": "/usr/local/nvidia/lib64",
              "name": "libraries"
            },
            {
              "mountPath": "/run/tcpx",
              "name": "tcpx-socket"
            },
            {
              "mountPath": "/hostsysfs",
              "name": "sys"
            },
            {
              "mountPath": "/hostprocsysfs",
              "name": "proc-sys"
            }
          ]
        }))?;
        spec.containers.push(tcpx_daemon);
        for container in spec.containers.iter_mut() {
            if let Some(resources) = &container.resources {
                if utils::is_gpu_pod(resources) {
                    let mut vms = vec![];
                    vms.push(VolumeMount {
                        name: "tcpx-socket".to_string(),
                        mount_path: "/tmp".to_string(),
                        ..Default::default()
                    });
                    vms.push(VolumeMount {
                        name: "libraries".to_string(),
                        mount_path: "/usr/local/nvidia/lib64".to_string(),
                        ..Default::default()
                    });
                    match container.volume_mounts.as_mut() {
                        Some(volume_mounts) => {
                            volume_mounts.extend_from_slice(&vms);
                        }
                        None => {
                            container.volume_mounts = Some(vms);
                        }
                    }
                    let ld_config = EnvVar {
                        name: "LD_LIBRARY_PATH".to_string(),
                        value: Some("/usr/local/nvidia/lib64:/usr/local/tcpx/lib64".to_string()),
                        ..Default::default()
                    };
                    let mut nccl_config = HashMap::new();
                    nccl_config.insert("NCCL_SOCKET_IFNAME", "eth0");
                    nccl_config.insert("NCCL_ALGO", "Ring");
                    nccl_config.insert("NCCL_PROTO", "Simple");
                    nccl_config.insert("NCCL_CROSS_NIC", "0");
                    nccl_config.insert("NCCL_NET_GDR_LEVEL", "PIX");
                    nccl_config.insert("NCCL_P2P_PXN_LEVEL", "0");
                    nccl_config.insert("NCCL_GPUDIRECTTCPX_SOCKET_IFNAME", "eth1,eth2,eth3,eth4");
                    nccl_config.insert("NCCL_GPUDIRECTTCPX_CTRL_DEV", "eth0");
                    nccl_config.insert("NCCL_DYNAMIC_CHUNK_SIZE", "524288");
                    nccl_config.insert("NCCL_P2P_NET_CHUNKSIZE", "524288");
                    nccl_config.insert("NCCL_P2P_PCI_CHUNKSIZE", "524288");
                    nccl_config.insert("NCCL_P2P_NVL_CHUNKSIZE", "1048576");
                    nccl_config.insert("NCCL_BUFFSIZE", "4194304");
                    nccl_config.insert("NCCL_NSOCKS_PERTHREAD", "4");
                    nccl_config.insert("NCCL_SOCKET_NTHREADS", "1");
                    nccl_config.insert("NCCL_GPUDIRECTTCPX_TX_BINDINGS", "\"eth1:8-21,112-125;eth2:8-21,112-125;eth3:60-73,164-177;eth4:60-73,164-177\"");
                    nccl_config.insert("NCCL_GPUDIRECTTCPX_RX_BINDINGS", "\"eth1:22-35,126-139;eth2:22-35,126-139;eth3:74-87,178-191;eth4:74-87,178-191\"");
                    nccl_config.insert(
                        "NCCL_GPUDIRECTTCPX_PROGRAM_FLOW_STEERING_WAIT_MICROS",
                        "500000",
                    );
                    let nccl_env_vars = utils::build_pod_env_vars(nccl_config);
                    match container.env.as_mut() {
                        Some(env_vars) => {
                            env_vars.push(ld_config);
                            env_vars.extend_from_slice(&nccl_env_vars);
                        }
                        None => {
                            let mut env_vars = vec![];
                            env_vars.push(ld_config);
                            env_vars.extend_from_slice(&nccl_env_vars);
                            container.env = Some(env_vars);
                        }
                    }
                }
            }
        }
    }
    debug!("{}", serde_json::to_string_pretty(&pod)?);
    info!(
        "configured {:?} for pod: {}/{}",
        machine_config.gpu_direct_type,
        pod.namespace().unwrap(),
        pod.name_any()
    );
    Ok(pod)
}