def run_training()

in experiment.py [0:0]


def run_training(cfg):
    """
    run the training loops
    """

    # torch gpu setup
    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
    os.environ['CUDA_VISIBLE_DEVICES'] = str(cfg.gpu_idx)
    if cfg.model_zoo is not None:
        os.environ["TORCH_MODEL_ZOO"] = cfg.model_zoo

    # make the exp dir
    os.makedirs(cfg.exp_dir, exist_ok=True)

    # set the seeds
    np.random.seed(cfg.seed)
    torch.manual_seed(cfg.seed)

    # set cudnn to reproducibility mode
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # dump the exp config to the exp dir
    dump_config(cfg)

    # setup datasets
    dset_train, dset_val, dset_test = dataset_zoo(**cfg.DATASET)

    # init loaders
    trainloader = torch.utils.data.DataLoader(dset_train,
                                              num_workers=cfg.num_workers,
                                              pin_memory=True,
                                              batch_size=cfg.batch_size,
                                              shuffle=True)

    if dset_val is not None:
        valloader = torch.utils.data.DataLoader(dset_val,
                                                num_workers=cfg.num_workers,
                                                pin_memory=True,
                                                batch_size=cfg.batch_size,
                                                shuffle=False)
    else:
        valloader = None

    # test loaders
    if dset_test is not None:
        testloader = torch.utils.data.DataLoader(dset_test,
                                                 num_workers=cfg.num_workers,
                                                 pin_memory=True,
                                                 batch_size=cfg.batch_size,
                                                 shuffle=False)
        _, _, eval_vars = eval_zoo(cfg.DATASET.dataset_name)
    else:
        testloader = None
        eval_vars = None

    # init the model
    model, stats, optimizer_state = init_model(cfg, add_log_vars=eval_vars)
    start_epoch = stats.epoch + 1

    # move model to gpu
    if torch.cuda.is_available():
        model.cuda()

    # init the optimizer
    optimizer, scheduler = init_optimizer(
        model, optimizer_state=optimizer_state, **cfg.SOLVER)

    # loop through epochs
    scheduler.last_epoch = start_epoch
    for epoch in range(start_epoch, cfg.SOLVER.max_epochs):
        with stats:  # automatic new_epoch and plotting at every epoch start

            print("scheduler lr = %1.2e" % float(scheduler.get_lr()[-1]))

            # train loop
            trainvalidate(model, stats, epoch, trainloader, optimizer, False,
                          visdom_env_root=get_visdom_env(cfg), **cfg)

            # val loop
            if valloader is not None:
                trainvalidate(model, stats, epoch, valloader, optimizer, True,
                              visdom_env_root=get_visdom_env(cfg), **cfg)

            # eval loop (optional)
            if testloader is not None:
                eval_result = run_eval(cfg, model, testloader, stats=stats)
                dump_eval_result(cfg, eval_result)

            assert stats.epoch == epoch, "inconsistent stats!"

            # delete previous models if required
            if cfg.store_checkpoints_purge > 0 and cfg.store_checkpoints:
                for prev_epoch in range(epoch-cfg.store_checkpoints_purge):
                    purge_epoch(cfg.exp_dir, prev_epoch)

            # save model
            if cfg.store_checkpoints:
                outfile = get_checkpoint(cfg.exp_dir, epoch)
                save_model(model, stats, outfile, optimizer=optimizer)

            scheduler.step()

    # the final eval
    if testloader is not None:
        eval_result = run_eval(cfg, model, testloader, stats=None)
        dump_eval_result(cfg, eval_result)
        return eval_result
    else:
        return None