in ludwig/modules/optimization_modules.py [0:0]
def clip_optimizer(optimizer, clipglobalnorm, clipnorm, clipvalue,
horovod=None):
class _ClippedOptimizer(tf.keras.optimizers.Optimizer):
def __init__(self, **kwargs):
self.clipglobalnorm = clipglobalnorm
self.clipnorm = clipnorm
self.clipvalue = clipvalue
self.horovod = horovod
super(self.__class__, self).__init__(**kwargs)
def minimize_with_tape(self, tape, loss, variables):
if self.horovod:
tape = self.horovod.DistributedGradientTape(tape)
gradients = tape.gradient(loss, variables)
if self.clipglobalnorm:
gradients, _ = tf.clip_by_global_norm(gradients,
self.clipglobalnorm)
if self.clipnorm:
gradients = map(
lambda x: tf.clip_by_norm(x, self.clipnorm),
gradients
)
if self.clipvalue:
gradients = map(
lambda x: tf.clip_by_value(
x,
clip_value_min=self.clipvalue[0],
clip_value_max=self.clipvalue[1]
),
gradients
)
self.apply_gradients(zip(gradients, variables))
def set_learning_rate(self, learning_rate):
self.lr.assign(learning_rate)
cls = type(optimizer.__class__.__name__, (optimizer.__class__,),
dict(_ClippedOptimizer.__dict__))
return cls.from_config(optimizer.get_config())