in torchbenchmark/models/nvidia_deeprecommender/nvtrain.py [0:0]
def TrainInit(self, device="cpu", jit=False, batch_size=256, processCommandLine = False):
# Force test to run in toy mode. Single call of fake data to model.
self.toytest = True
self.toybatch = batch_size
# number of movies in netflix training set.
self.toyvocab = 197951
self.toyinputs = torch.randn(self.toybatch, self.toyvocab)
if (processCommandLine) :
self.args = getTrainCommandLineArgs()
else:
self.args = getTrainBenchmarkArgs()
if device == "cpu":
forcecuda = False
elif device == "cuda":
forcecuda = True
else:
# unknown device string, quit init
return
self.args.forcecuda = forcecuda
self.args.forcecpu = not forcecuda
self.args = processTrainArgState(self.args)
if self.toytest == False:
self.logger = Logger(self.args.logdir)
self.params = dict()
self.params['batch_size'] = self.args.batch_size
self.params['data_dir'] = self.args.path_to_train_data
self.params['major'] = 'users'
self.params['itemIdInd'] = 1
self.params['userIdInd'] = 0
if self.toytest == False:
if not self.args.silent:
print("Loading training data")
self.data_layer = input_layer.UserItemRecDataProvider(params=self.params)
if not self.args.silent:
print("Data loaded")
print("Total items found: {}".format(len(self.data_layer.data.keys())))
print("Vector dim: {}".format(self.data_layer.vector_dim))
print("Loading eval data")
self.eval_params = copy.deepcopy(self.params)
# must set eval batch size to 1 to make sure no examples are missed
if self.toytest:
self.rencoder = model.AutoEncoder(layer_sizes=[self.toyvocab] + [int(l) for l in self.args.hidden_layers.split(',')],
nl_type=self.args.non_linearity_type,
is_constrained=self.args.constrained,
dp_drop_prob=self.args.drop_prob,
last_layer_activations=not self.args.skip_last_layer_nl)
else:
self.eval_params['data_dir'] = self.args.path_to_eval_data
self.eval_data_layer = input_layer.UserItemRecDataProvider(params=self.eval_params,
user_id_map=self.data_layer.userIdMap, # the mappings are provided
item_id_map=self.data_layer.itemIdMap)
self.eval_data_layer.src_data = self.data_layer.data
self.rencoder = model.AutoEncoder(layer_sizes=[self.data_layer.vector_dim] + [int(l) for l in self.args.hidden_layers.split(',')],
nl_type=self.args.non_linearity_type,
is_constrained=self.args.constrained,
dp_drop_prob=self.args.drop_prob,
last_layer_activations=not self.args.skip_last_layer_nl)
os.makedirs(self.args.logdir, exist_ok=True)
self.model_checkpoint = self.args.logdir + "/model"
self.path_to_model = Path(self.model_checkpoint)
if self.path_to_model.is_file():
print("Loading model from: {}".format(self.model_checkpoint))
self.rencoder.load_state_dict(torch.load(self.model_checkpoint))
if not self.args.silent:
print('######################################################')
print('######################################################')
print('############# AutoEncoder Model: #####################')
print(self.rencoder)
print('######################################################')
print('######################################################')
if self.args.use_cuda:
gpu_ids = [int(g) for g in self.args.gpu_ids.split(',')]
if not self.args.silent:
print('Using GPUs: {}'.format(gpu_ids))
if len(gpu_ids)>1:
self.rencoder = nn.DataParallel(self.rencoder,
device_ids=gpu_ids)
self.rencoder = self.rencoder.cuda()
self.toyinputs = self.toyinputs.to(device)
if self.args.optimizer == "adam":
self.optimizer = optim.Adam(self.rencoder.parameters(),
lr=self.args.lr,
weight_decay=self.args.weight_decay)
elif self.args.optimizer == "adagrad":
self.optimizer = optim.Adagrad(self.rencoder.parameters(),
lr=self.args.lr,
weight_decay=self.args.weight_decay)
elif self.args.optimizer == "momentum":
self.optimizer = optim.SGD(self.rencoder.parameters(),
lr=self.args.lr, momentum=0.9,
weight_decay=self.args.weight_decay)
self.scheduler = MultiStepLR(self.optimizer, milestones=[24, 36, 48, 66, 72], gamma=0.5)
elif args.optimizer == "rmsprop":
self.optimizer = optim.RMSprop(self.rencoder.parameters(),
lr=self.args.lr, momentum=0.9,
weight_decay=self.args.weight_decay)
else:
raise ValueError('Unknown optimizer kind')
self.t_loss = 0.0
self.t_loss_denom = 0.0
self.denom = 0.0
self.total_epoch_loss = 0.0
self.global_step = 0
if self.args.noise_prob > 0.0:
self.dp = nn.Dropout(p=self.args.noise_prob)