in tools/gpudirect-webhook/src/webhook.rs [65:256]
fn configure_tcpx_for_workload(mut pod: Pod, machine_config: MachineConfig) -> anyhow::Result<Pod> {
let mut devices = vec![];
for i in 0..8 {
let mut data = HashMap::new();
data.insert("path", format!("/dev/nvidia{}", i));
devices.push(data);
}
let interfaces = utils::build_net_interfaces_annotation_value(
&machine_config,
pod.annotations(),
&machine_config.gpu_direct_type,
)?;
let mut nvidia_ctl = HashMap::new();
nvidia_ctl.insert("path", "/dev/nvidiactl".to_string());
devices.push(nvidia_ctl);
let mut nvidia_uvm = HashMap::new();
nvidia_uvm.insert("path", "/dev/nvidia-uvm".to_string());
devices.push(nvidia_uvm);
let annotations = pod.annotations_mut();
annotations.insert(
"devices.gke.io/container.tcpx-daemon".to_string(),
serde_yml::to_string(&devices)?,
);
annotations.insert(
"networking.gke.io/default-interface".to_string(),
"eth0".to_string(),
);
annotations.insert(
"networking.gke.io/interfaces".to_string(),
serde_json::to_string(&interfaces)?,
);
if let Some(spec) = pod.spec.as_mut() {
if let Some(volumes) = spec.volumes.as_mut() {
volumes.push(Volume {
name: "libraries".to_string(),
host_path: Some(HostPathVolumeSource {
path: "/home/kubernetes/bin/nvidia/lib64".to_string(),
..Default::default()
}),
..Default::default()
});
volumes.push(Volume {
name: "sys".to_string(),
host_path: Some(HostPathVolumeSource {
path: "/sys".to_string(),
..Default::default()
}),
..Default::default()
});
volumes.push(Volume {
name: "proc-sys".to_string(),
host_path: Some(HostPathVolumeSource {
path: "/proc/sys".to_string(),
..Default::default()
}),
..Default::default()
});
volumes.push(Volume {
name: "tcpx-socket".to_string(),
empty_dir: Some(EmptyDirVolumeSource {
..Default::default()
}),
..Default::default()
});
}
let tcpx_daemon: Container = serde_json::from_value(json!({
"name": "tcpx-daemon",
"image": "us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpx/tcpgpudmarxd-dev:v2.0.12",
"imagePullPolicy": "Always",
"command": [
"/tcpgpudmarxd/build/app/tcpgpudmarxd",
"--gpu_nic_preset",
"a3vm",
"--gpu_shmem_type",
"fd",
"--uds_path",
"/run/tcpx",
"--setup_param",
"\\\"--verbose 128 2 0 \\\""
],
"env": [
{
"name": "LD_LIBRARY_PATH",
"value": "/usr/local/nvidia/lib64"
}
],
"securityContext": {
"capabilities": {
"add": [
"NET_ADMIN"
]
}
},
"volumeMounts": [
{
"mountPath": "/usr/local/nvidia/lib64",
"name": "libraries"
},
{
"mountPath": "/run/tcpx",
"name": "tcpx-socket"
},
{
"mountPath": "/hostsysfs",
"name": "sys"
},
{
"mountPath": "/hostprocsysfs",
"name": "proc-sys"
}
]
}))?;
spec.containers.push(tcpx_daemon);
for container in spec.containers.iter_mut() {
if let Some(resources) = &container.resources {
if utils::is_gpu_pod(resources) {
let mut vms = vec![];
vms.push(VolumeMount {
name: "tcpx-socket".to_string(),
mount_path: "/tmp".to_string(),
..Default::default()
});
vms.push(VolumeMount {
name: "libraries".to_string(),
mount_path: "/usr/local/nvidia/lib64".to_string(),
..Default::default()
});
match container.volume_mounts.as_mut() {
Some(volume_mounts) => {
volume_mounts.extend_from_slice(&vms);
}
None => {
container.volume_mounts = Some(vms);
}
}
let ld_config = EnvVar {
name: "LD_LIBRARY_PATH".to_string(),
value: Some("/usr/local/nvidia/lib64:/usr/local/tcpx/lib64".to_string()),
..Default::default()
};
let mut nccl_config = HashMap::new();
nccl_config.insert("NCCL_SOCKET_IFNAME", "eth0");
nccl_config.insert("NCCL_ALGO", "Ring");
nccl_config.insert("NCCL_PROTO", "Simple");
nccl_config.insert("NCCL_CROSS_NIC", "0");
nccl_config.insert("NCCL_NET_GDR_LEVEL", "PIX");
nccl_config.insert("NCCL_P2P_PXN_LEVEL", "0");
nccl_config.insert("NCCL_GPUDIRECTTCPX_SOCKET_IFNAME", "eth1,eth2,eth3,eth4");
nccl_config.insert("NCCL_GPUDIRECTTCPX_CTRL_DEV", "eth0");
nccl_config.insert("NCCL_DYNAMIC_CHUNK_SIZE", "524288");
nccl_config.insert("NCCL_P2P_NET_CHUNKSIZE", "524288");
nccl_config.insert("NCCL_P2P_PCI_CHUNKSIZE", "524288");
nccl_config.insert("NCCL_P2P_NVL_CHUNKSIZE", "1048576");
nccl_config.insert("NCCL_BUFFSIZE", "4194304");
nccl_config.insert("NCCL_NSOCKS_PERTHREAD", "4");
nccl_config.insert("NCCL_SOCKET_NTHREADS", "1");
nccl_config.insert("NCCL_GPUDIRECTTCPX_TX_BINDINGS", "\"eth1:8-21,112-125;eth2:8-21,112-125;eth3:60-73,164-177;eth4:60-73,164-177\"");
nccl_config.insert("NCCL_GPUDIRECTTCPX_RX_BINDINGS", "\"eth1:22-35,126-139;eth2:22-35,126-139;eth3:74-87,178-191;eth4:74-87,178-191\"");
nccl_config.insert(
"NCCL_GPUDIRECTTCPX_PROGRAM_FLOW_STEERING_WAIT_MICROS",
"500000",
);
let nccl_env_vars = utils::build_pod_env_vars(nccl_config);
match container.env.as_mut() {
Some(env_vars) => {
env_vars.push(ld_config);
env_vars.extend_from_slice(&nccl_env_vars);
}
None => {
let mut env_vars = vec![];
env_vars.push(ld_config);
env_vars.extend_from_slice(&nccl_env_vars);
container.env = Some(env_vars);
}
}
}
}
}
}
debug!("{}", serde_json::to_string_pretty(&pod)?);
info!(
"configured {:?} for pod: {}/{}",
machine_config.gpu_direct_type,
pod.namespace().unwrap(),
pod.name_any()
);
Ok(pod)
}