in tools/gpudirect-webhook/src/webhook.rs [258:469]
fn configure_tcpxo_for_workload(
mut pod: Pod,
machine_config: MachineConfig,
) -> anyhow::Result<Pod> {
let mut devices = vec![];
for i in 0..8 {
let mut data = HashMap::new();
data.insert("path", format!("/dev/nvidia{}", i));
devices.push(data);
}
let interfaces = utils::build_net_interfaces_annotation_value(
&machine_config,
pod.annotations(),
&machine_config.gpu_direct_type,
)?;
let mut nvidia_ctl = HashMap::new();
nvidia_ctl.insert("path", "/dev/nvidiactl".to_string());
devices.push(nvidia_ctl);
let mut nvidia_uvm = HashMap::new();
nvidia_uvm.insert("path", "/dev/nvidia-uvm".to_string());
devices.push(nvidia_uvm);
let mut dmabuf = HashMap::new();
dmabuf.insert("path", "/dev/dmabuf_import_helper".to_string());
devices.push(dmabuf);
let annotations = pod.annotations_mut();
annotations.insert(
"devices.gke.io/container.tcpx-daemon".to_string(),
serde_yml::to_string(&devices)?,
);
annotations.insert(
"networking.gke.io/default-interface".to_string(),
"eth0".to_string(),
);
annotations.insert(
"networking.gke.io/interfaces".to_string(),
serde_json::to_string(&interfaces)?,
);
if let Some(spec) = pod.spec.as_mut() {
if let Some(volumes) = spec.volumes.as_mut() {
volumes.push(Volume {
name: "libraries".to_string(),
host_path: Some(HostPathVolumeSource {
path: "/home/kubernetes/bin/nvidia/lib64".to_string(),
..Default::default()
}),
..Default::default()
});
volumes.push(Volume {
name: "sys".to_string(),
host_path: Some(HostPathVolumeSource {
path: "/sys".to_string(),
..Default::default()
}),
..Default::default()
});
volumes.push(Volume {
name: "proc-sys".to_string(),
host_path: Some(HostPathVolumeSource {
path: "/proc/sys".to_string(),
..Default::default()
}),
..Default::default()
});
volumes.push(Volume {
name: "aperture-devices".to_string(),
host_path: Some(HostPathVolumeSource {
path: "/dev/aperture_devices".to_string(),
..Default::default()
}),
..Default::default()
});
}
let tcpx_daemon: Container = serde_json::from_value(json!({
"args": [
"set -ex\nchmod 755 /fts/entrypoint_rxdm_container.sh\n/fts/entrypoint_rxdm_container.sh --num_hops=2 --num_nics=8 --uid= --alsologtostderr\n"
],
"command": [
"/bin/sh",
"-c"
],
"env": [
{
"name": "LD_LIBRARY_PATH",
"value": "/usr/local/nvidia/lib64"
}
],
"image": "us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpxo/tcpgpudmarxd-dev:v1.0.14",
"imagePullPolicy": "Always",
"name": "tcpxo-daemon",
"resources": {},
"securityContext": {
"capabilities": {
"add": [
"NET_ADMIN",
"NET_BIND_SERVICE"
]
}
},
"terminationMessagePath": "/dev/termination-log",
"terminationMessagePolicy": "File",
"volumeMounts": [
{
"mountPath": "/usr/local/nvidia",
"name": "libraries"
},
{
"mountPath": "/hostsysfs",
"name": "sys"
},
{
"mountPath": "/hostprocsysfs",
"name": "proc-sys"
}
]
}))?;
spec.containers.push(tcpx_daemon);
for container in spec.containers.iter_mut() {
if let Some(resources) = &container.resources {
if utils::is_gpu_pod(resources) {
let mut vms = vec![];
vms.push(VolumeMount {
name: "aperture-devices".to_string(),
mount_path: "/dev/aperture_devices".to_string(),
..Default::default()
});
match container.volume_mounts.as_mut() {
Some(volume_mounts) => {
volume_mounts.extend_from_slice(&vms);
}
None => {
container.volume_mounts = Some(vms);
}
}
let ld_config = EnvVar {
name: "LD_LIBRARY_PATH".to_string(),
value: Some("/usr/local/nvidia/lib64".to_string()),
..Default::default()
};
let fastrak_llcm = EnvVar {
name: "NCCL_FASTRAK_LLCM_DEVICE_DIRECTORY".to_string(),
value: Some("/dev/aperture_devices".to_string()),
..Default::default()
};
let mut nccl_config = HashMap::new();
nccl_config.insert("NCCL_FASTRAK_CTRL_DEV", "eth0");
nccl_config.insert(
"NCCL_FASTRAK_IFNAME",
"eth1,eth2,eth3,eth4,eth5,eth6,eth7,eth8",
);
nccl_config.insert("NCCL_SOCKET_IFNAME", "eth0");
nccl_config.insert("NCCL_CROSS_NIC", "0");
nccl_config.insert("NCCL_ALGO", "Ring,Tree");
nccl_config.insert("NCCL_PROTO", "Simple");
nccl_config.insert("NCCL_MIN_NCHANNELS", "4");
nccl_config.insert("NCCL_TUNER_PLUGIN", "libnccl-tuner.so");
nccl_config.insert(
"NCCL_TUNER_CONFIG_PATH",
"/usr/local/nvidia/lib64/a3plus_tuner_config.textproto",
);
nccl_config.insert(
"NCCL_SHIMNET_GUEST_CONFIG_CHECKER_CONFIG_FILE",
"/usr/local/nvidia/lib64/a3plus_guest_config.textproto",
);
nccl_config.insert("NCCL_DYNAMIC_CHUNK_SIZE", "524288");
nccl_config.insert("NCCL_P2P_NET_CHUNKSIZE", "524288");
nccl_config.insert("NCCL_P2P_PCI_CHUNKSIZE", "524288");
nccl_config.insert("NCCL_P2P_NVL_CHUNKSIZE", "1048576");
nccl_config.insert("NCCL_FASTRAK_NUM_FLOWS", "2");
nccl_config.insert("NCCL_FASTRAK_USE_SNAP", "1");
nccl_config.insert("NCCL_FASTRAK_PLUGIN_ACCEPT_TIMEOUT_MS", "600000");
nccl_config.insert("NCCL_FASTRAK_ENABLE_CONTROL_CHANNEL", "0");
nccl_config.insert("NCCL_BUFFSIZE", "8388608");
nccl_config.insert("CUDA_VISIBLE_DEVICES", "0,1,2,3,4,5,6,7");
nccl_config.insert("NCCL_NET_GDR_LEVEL", "PIX");
nccl_config.insert("NCCL_FASTRAK_ENABLE_HOTPATH_LOGGING", "0");
nccl_config.insert("NCCL_FASTRAK_USE_LLCM", "1");
nccl_config.insert("NCCL_NVLS_ENABLE", "0");
// recommended, to log NCCL errors
nccl_config.insert("NCCL_DEBUG", "WARN");
nccl_config.insert("NCCL_DEBUG_SUBSYS", "INIT,NET,ENV,COLL,GRAPH");
let nccl_env_vars = utils::build_pod_env_vars(nccl_config);
match container.env.as_mut() {
Some(env_vars) => {
env_vars.push(ld_config);
env_vars.push(fastrak_llcm);
env_vars.extend_from_slice(&nccl_env_vars);
}
None => {
let mut env_vars = vec![];
env_vars.push(ld_config);
env_vars.push(fastrak_llcm);
env_vars.extend_from_slice(&nccl_env_vars);
container.env = Some(env_vars);
}
}
}
}
}
}
debug!("{}", serde_json::to_string_pretty(&pod)?);
info!(
"configured {:?} for pod: {}/{}",
machine_config.gpu_direct_type,
pod.namespace().unwrap(),
pod.name_any()
);
Ok(pod)
}