in pytorch/sagemakercv/training/optimizers/mlperf_fp16_optimizer.py [0:0]
def __init__(self,
init_optimizer,
static_loss_scale=1.0,
dynamic_loss_scale=False,
dynamic_loss_args=None,
verbose=True):
if not torch.cuda.is_available:
raise SystemError("Cannot use fp16 without CUDA.")
self.optimizer = init_optimizer
# create list for params in fp16
self.fp16_params = []
# maintain master weights for params in fp16
self.fp32_from_fp16_params = []
# create list for params in fp32
self.fp32_params = []
# iterate over param_groups
for param_group in self.optimizer.param_groups:
fp16_params = []
fp32_from_fp16_params = []
fp32_params = []
# separate fp16/32 params into 2 groups
for p in param_group['params']:
if p.dtype == torch.float16: # fp16
fp16_params.append(p)
fp32_from_fp16_params.append(p.clone().float().detach())
if p.dtype == torch.float32: # fp32
fp32_params.append(p)
self.fp16_params.append(fp16_params)
self.fp32_from_fp16_params.append(fp32_from_fp16_params)
self.fp32_params.append(fp32_params)
if multi_tensor_applier.available:
import amp_C
self.overflow_buf = torch.cuda.IntTensor([0])
self.multi_tensor_l2norm=amp_C.multi_tensor_l2norm
else:
raise RuntimeError('FP16_Optimizer requires cuda extensions')
if dynamic_loss_scale:
if dynamic_loss_args is not None:
raise SystemError("Do not support dynamic loss scale args for now.")
self.dynamic_loss_scale = True
self.cur_scale = 2**16
self.cur_iter = 0
self.last_overflow_iter = -1
self.scale_factor = 2
self.scale_window = 1000
else:
self.dynamic_loss_scale = False
self.cur_iter = 0
self.cur_scale = static_loss_scale
self.verbose = verbose