in pytorch_code/classifier/classifier.py [0:0]
def _train(args):
is_distributed = len(args.hosts) > 1 and args.dist_backend is not None
print("Distributed training - {}".format(is_distributed))
if is_distributed:
# Initialize the distributed environment.
print("Ignoring distributed training")
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device Type: {}".format(device))
print("Loading dataset")
DATA = Path(args.data_dir)
tfms = get_transforms(flip_vert=True, max_lighting = None, max_warp = None)
data = ImageDataBunch.from_folder(DATA, ds_tfms=tfms, size=224, num_workers=args.workers, bs=args.batch_size)
print("Model loaded: {0}".format(str(data)))
learn = create_cnn(data, models.resnet18, metrics=accuracy)
if torch.cuda.device_count() > 1:
print("Gpu count: {}".format(torch.cuda.device_count()))
cb_val_loss = TrackerCallback(learn, monitor='val_loss')
cb_accuracy = TrackerCallback(learn, monitor='accuracy')
learn.fit_one_cycle(args.epochs, max_lr=args.lr, callbacks=[cb_val_loss, cb_accuracy])
accuracy_val = cb_accuracy.get_monitor_value().item()
loss_val = cb_val_loss.get_monitor_value()
print('Finished Training')
print("METRIC_ACCURACY={0}".format(str(accuracy_val)))
print("METRIC_VAL_LOSS={0}".format(str(loss_val)))
return _save_model(learn, args.model_dir)