in optimum/amd/cli.py [0:0]
def get_amd_zentorch_env():
"""
Returns a dictionary of environment variables that are optimized for the AMD ZenTorch plugin.
The target environment variables are:
- `OMP_NUM_THREADS`: The number of OpenMP threads to use.
- `OMP_DYNAMIC`: Whether or not OpenMP threads are dynamically allocated.
- `OMP_WAIT_POLICY`: The OpenMP wait policy.
- `ZENDNN_GEMM_ALGO`: The GEMM algorithm to use.
- `GOMP_CPU_AFFINITY`: The CPU affinity for OpenMP threads.
- `LD_PRELOAD`: The path to the Jemalloc library.
- `MALLOC_CONF`: The Jemalloc configuration.
"""
# TODO: how to handle NUMA nodes and socket affinity?
CPU_COUNT = os.cpu_count()
MALLOC_CONF = "oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1"
LD_PRELOAD = "/usr/lib/x86_64-linux-gnu/libjemalloc.so"
GOMP_CPU_AFFINITY = f"0-{CPU_COUNT - 1}"
OMP_NUM_THREADS = f"{CPU_COUNT}"
OMP_WAIT_POLICY = "ACTIVE"
ZENDNN_GEMM_ALGO = "4"
OMP_DYNAMIC = "False"
env = {}
env["OMP_NUM_THREADS"] = OMP_NUM_THREADS
env["OMP_DYNAMIC"] = OMP_DYNAMIC
env["OMP_WAIT_POLICY"] = OMP_WAIT_POLICY
env["ZENDNN_GEMM_ALGO"] = ZENDNN_GEMM_ALGO
env["GOMP_CPU_AFFINITY"] = GOMP_CPU_AFFINITY
env["LD_PRELOAD"] = LD_PRELOAD
env["MALLOC_CONF"] = MALLOC_CONF
if not os.path.exists(LD_PRELOAD) and "LD_PRELOAD" not in get_env_vars_overrides():
logger.warning(
f"Jemalloc not found at {LD_PRELOAD} either because it's not installed or because the path is incorrect."
"Make sure it's installed and/or override `LD_PRELOAD` manually: `amdrun LD_PRELOAD=/path/to/libjemalloc.so python script.py script_args`"
)
logger.info("AMD ZenTorch environment variables:")
logger.info(f"- OMP_NUM_THREADS: {OMP_NUM_THREADS}")
logger.info(f"- OMP_DYNAMIC: {OMP_DYNAMIC}")
logger.info(f"- OMP_WAIT_POLICY: {OMP_WAIT_POLICY}")
logger.info(f"- ZENDNN_GEMM_ALGO: {ZENDNN_GEMM_ALGO}")
logger.info(f"- GOMP_CPU_AFFINITY: {GOMP_CPU_AFFINITY}")
logger.info(f"- LD_PRELOAD: {LD_PRELOAD}")
logger.info(f"- MALLOC_CONF: {MALLOC_CONF}")
return env