def get_amd_rocm_env()

in optimum/amd/cli.py [0:0]


def get_amd_rocm_env():
    """
    Returns a dictionary of environment variables that are optimized for AMD's ROCm platform.

    The target environment variables are:
    - `ROCR_VISIBLE_DEVICES`: The list of devices to use (maximizing the average bandwidth between them).
    """

    from .topology_utils import extract_max_avg_bandwidth_cluster, get_bandwidth_matrix

    # extract the number of devices to use
    if "--nproc_per_node" in sys.argv:
        # torchrun style
        nproc_per_node_index = sys.argv.index("--nproc_per_node")
        num_devices = int(sys.argv[nproc_per_node_index + 1])
    elif "--ngpus" in sys.argv:
        # accelerate/deepspeed style
        ngpus_index = sys.argv.index("--ngpus")
        num_devices = int(sys.argv[ngpus_index + 1])
    else:
        # early exit if we can't find the number of devices
        return {}

    bandwidth_matrix = get_bandwidth_matrix()
    max_avg_bandwidth_cluster, max_avg_bandwidth = extract_max_avg_bandwidth_cluster(bandwidth_matrix, num_devices)

    # lowest level isolation env var on AMD GPUs
    ROCR_VISIBLE_DEVICES = ",".join(list(map(str, max_avg_bandwidth_cluster)))

    logger.info("AMD ROCm environment variables:")
    logger.info(f"- ROCR_VISIBLE_DEVICES: {ROCR_VISIBLE_DEVICES}")

    return {"ROCR_VISIBLE_DEVICES": ROCR_VISIBLE_DEVICES}