in src/sagemaker_training/environment.py [0:0]
def num_neurons(instance_type): # type: (str) -> int
"""Return the number of neuron cores available in the current container.
Returns:
int: Number of Neuron Cores available in the current container.
"""
try:
if "trn2.48xlarge" in instance_type:
neuron_cores = 64
else:
cmd = shlex.split("neuron-ls -j")
output = subprocess.check_output(cmd).decode("utf-8")
j = json.loads(output)
neuron_cores = 0
for item in j:
neuron_cores += item.get("nc_count", 0)
logger.info(f"Found {neuron_cores} neurons on this instance")
return neuron_cores
except OSError:
logger.info("No Neurons detected (normal if no neurons installed)")
return 0
except subprocess.CalledProcessError as e:
if e.output is not None:
try:
msg = e.output.decode("utf-8").partition("error=")[2]
logger.info(
"No Neurons detected (normal if no neurons installed). \
If neuron installed then {}".format(
msg
)
)
except AttributeError:
logger.info("No Neurons detected (normal if no neurons installed)")
else:
logger.info("No Neurons detected (normal if no neurons installed)")
return 0