def _train()

in pytorch_code/classifier/classifier.py [0:0]


def _train(args):
    is_distributed = len(args.hosts) > 1 and args.dist_backend is not None
    print("Distributed training - {}".format(is_distributed))

    if is_distributed:
        # Initialize the distributed environment.
        print("Ignoring distributed training")

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print("Device Type: {}".format(device))

    print("Loading dataset")
    DATA = Path(args.data_dir)
    tfms = get_transforms(flip_vert=True, max_lighting = None, max_warp = None)
    data = ImageDataBunch.from_folder(DATA, ds_tfms=tfms, size=224, num_workers=args.workers, bs=args.batch_size)
    print("Model loaded: {0}".format(str(data)))
    learn = create_cnn(data, models.resnet18, metrics=accuracy)

    if torch.cuda.device_count() > 1:
        print("Gpu count: {}".format(torch.cuda.device_count()))

    cb_val_loss = TrackerCallback(learn, monitor='val_loss')
    cb_accuracy = TrackerCallback(learn, monitor='accuracy')
    learn.fit_one_cycle(args.epochs, max_lr=args.lr, callbacks=[cb_val_loss, cb_accuracy])
    accuracy_val = cb_accuracy.get_monitor_value().item()
    loss_val = cb_val_loss.get_monitor_value()

    print('Finished Training')
    print("METRIC_ACCURACY={0}".format(str(accuracy_val)))
    print("METRIC_VAL_LOSS={0}".format(str(loss_val)))
    return _save_model(learn, args.model_dir)