in use-cases/model-fine-tuning-pipeline/fine-tuning/pytorch/src/fine_tune.py [0:0]
def get_current_node_id_and_rank():
if dist.is_initialized():
logger.info("Distributed training enabled.")
logger.info("Calculating node id")
global_rank = dist.get_rank() # Get the process's rank
logger.info(f"global_rank: {global_rank}")
gpu_per_node = torch.cuda.device_count()
logger.info(f"gpu_per_node: {gpu_per_node}")
total_gpus = accelerator.state.num_processes
logger.info(f"total_gpus: {total_gpus}")
total_nodes = int(total_gpus / gpu_per_node)
logger.info(f"total_nodes: {total_nodes}")
node_id = global_rank // gpu_per_node
else:
logger.info("Distributed training enabled.")
node_id = 0
global_rank = 0
logger.info(f"node_id: {node_id}")
return (node_id, global_rank, gpu_per_node)