in train/comms/pt/comms_utils.py [0:0]
def checkArgs(self, args):
"""Validate some basic/common arguments for all PARAM-Comm benchmarks"""
if args.nw_stack not in self.supportedNwstacks:
logger.error(
f"Specified backend: {args.nw_stack} is not one of the supported backends: {str(self.supportedNwstacks)}. Make sure the input is using the correct case."
)
gracefulExit()
if args.data_type not in self.supportedDtype:
logger.error(
f"Specified dtype: {args.data_type} is not one of the supported commstyle: {str(self.supportedDtype)}"
)
gracefulExit()
if args.num_tpu_cores not in self.supported_tpu_core_valuses:
logger.error(
f"TPU core value: {args.num_tpu_cores} is not one of the supported values: {self.supported_tpu_core_valuses}"
)
gracefulExit()
# check and set log level
numeric_level = getattr(logging, args.log.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError(f"Invalid log level: {args.log}")
comms_env_params = read_comms_env_vars()
logging.basicConfig(
level=numeric_level,
format="[%(asctime)s][%(name)s][%(levelname)s][Rank{:3}] - %(message)s".format(
comms_env_params["global_rank"]
),
)
# check master-ip and master-port with the following logic
# 1) prefer the values passed to PARAM, i.e., through --master-ip and --master-port
# 2) check and use the env. variable, i.e., MASTER_ADDR and MASTER_PORT
# 3) if both #1 and #2 are not set, pre-defined default values will be used
if "MASTER_ADDR" in os.environ:
if args.master_ip not in (default_master_ip, os.environ["MASTER_ADDR"]):
logger.warning(
f"--master-ip={args.master_ip} while MASTER_ADDR={os.environ['MASTER_ADDR']}, "
f"use --master-ip={args.master_ip} and continue..."
)
os.environ["MASTER_ADDR"] = args.master_ip
else:
logger.info(
"From environment variables, using MASTER_ADDR="
+ os.environ["MASTER_ADDR"]
)
else:
os.environ["MASTER_ADDR"] = args.master_ip
if "MASTER_PORT" in os.environ:
if args.master_port not in (default_master_port, os.environ["MASTER_PORT"]):
logger.warning(
f"--master-port={args.master_port} while MASTER_PORT={os.environ['MASTER_PORT']}, "
f"use --master-port={args.master_port} and continue..."
)
os.environ["MASTER_PORT"] = args.master_port
else:
logger.info(
"From environment variables, using MASTER_PORT="
+ os.environ["MASTER_PORT"]
)
else:
os.environ["MASTER_PORT"] = args.master_port