in cm/train_util.py [0:0]
def run_loop(self):
saved = False
while (
not self.lr_anneal_steps
or self.step < self.lr_anneal_steps
or self.global_step < self.total_training_steps
):
batch, cond = next(self.data)
self.run_step(batch, cond)
saved = False
if (
self.global_step
and self.save_interval != -1
and self.global_step % self.save_interval == 0
):
self.save()
saved = True
th.cuda.empty_cache()
# Run for a finite amount of time in integration tests.
if os.environ.get("DIFFUSION_TRAINING_TEST", "") and self.step > 0:
return
if self.global_step % self.log_interval == 0:
logger.dumpkvs()
# Save the last checkpoint if it wasn't already saved.
if not saved:
self.save()