def TrainInit()

in torchbenchmark/models/nvidia_deeprecommender/nvtrain.py [0:0]


  def TrainInit(self, device="cpu", jit=False, batch_size=256, processCommandLine = False):

    # Force test to run in toy mode. Single call of fake data to model.
    self.toytest = True
    self.toybatch = batch_size

    # number of movies in netflix training set.
    self.toyvocab = 197951

    self.toyinputs = torch.randn(self.toybatch, self.toyvocab)

    if (processCommandLine) :
      self.args = getTrainCommandLineArgs()
    else:
      self.args = getTrainBenchmarkArgs()

      if device == "cpu":
        forcecuda = False
      elif device == "cuda":
        forcecuda = True
      else:
        # unknown device string, quit init
        return

      self.args.forcecuda = forcecuda
      self.args.forcecpu = not forcecuda

    self.args = processTrainArgState(self.args)

    if self.toytest == False:
      self.logger = Logger(self.args.logdir)
    self.params = dict()
    self.params['batch_size'] = self.args.batch_size
    self.params['data_dir'] =  self.args.path_to_train_data
    self.params['major'] = 'users'
    self.params['itemIdInd'] = 1
    self.params['userIdInd'] = 0

    if self.toytest == False:
      if not self.args.silent:
        print("Loading training data")
    
      self.data_layer = input_layer.UserItemRecDataProvider(params=self.params)
      if not self.args.silent:
        print("Data loaded")
        print("Total items found: {}".format(len(self.data_layer.data.keys())))
        print("Vector dim: {}".format(self.data_layer.vector_dim))
  
        print("Loading eval data")
    
    self.eval_params = copy.deepcopy(self.params)

    # must set eval batch size to 1 to make sure no examples are missed
    if self.toytest:
      self.rencoder = model.AutoEncoder(layer_sizes=[self.toyvocab] + [int(l) for l in self.args.hidden_layers.split(',')],
                                        nl_type=self.args.non_linearity_type,
                                        is_constrained=self.args.constrained,
                                        dp_drop_prob=self.args.drop_prob,
                                        last_layer_activations=not self.args.skip_last_layer_nl)
    else:
      self.eval_params['data_dir'] = self.args.path_to_eval_data
      self.eval_data_layer = input_layer.UserItemRecDataProvider(params=self.eval_params,
                                                                 user_id_map=self.data_layer.userIdMap, # the mappings are provided
                                                                 item_id_map=self.data_layer.itemIdMap)
      self.eval_data_layer.src_data = self.data_layer.data
      self.rencoder = model.AutoEncoder(layer_sizes=[self.data_layer.vector_dim] + [int(l) for l in self.args.hidden_layers.split(',')],
                                        nl_type=self.args.non_linearity_type,
                                        is_constrained=self.args.constrained,
                                        dp_drop_prob=self.args.drop_prob,
                                        last_layer_activations=not self.args.skip_last_layer_nl)

      os.makedirs(self.args.logdir, exist_ok=True)
      self.model_checkpoint = self.args.logdir + "/model"
      self.path_to_model = Path(self.model_checkpoint)
      if self.path_to_model.is_file():
        print("Loading model from: {}".format(self.model_checkpoint))
        self.rencoder.load_state_dict(torch.load(self.model_checkpoint))
  
    if not self.args.silent:
      print('######################################################')
      print('######################################################')
      print('############# AutoEncoder Model: #####################')
      print(self.rencoder)
      print('######################################################')
      print('######################################################')

  
    if self.args.use_cuda:
      gpu_ids = [int(g) for g in self.args.gpu_ids.split(',')]
      if not self.args.silent:
        print('Using GPUs: {}'.format(gpu_ids))
      
      if len(gpu_ids)>1:
        self.rencoder = nn.DataParallel(self.rencoder,
                                   device_ids=gpu_ids)

      self.rencoder = self.rencoder.cuda()
      self.toyinputs = self.toyinputs.to(device)

  
    if self.args.optimizer == "adam":
      self.optimizer = optim.Adam(self.rencoder.parameters(),
                                  lr=self.args.lr,
                                  weight_decay=self.args.weight_decay)
    elif self.args.optimizer == "adagrad":
      self.optimizer = optim.Adagrad(self.rencoder.parameters(),
                                lr=self.args.lr,
                                weight_decay=self.args.weight_decay)
    elif self.args.optimizer == "momentum":
      self.optimizer = optim.SGD(self.rencoder.parameters(),
                            lr=self.args.lr, momentum=0.9,
                            weight_decay=self.args.weight_decay)
      self.scheduler = MultiStepLR(self.optimizer, milestones=[24, 36, 48, 66, 72], gamma=0.5)
    elif args.optimizer == "rmsprop":
      self.optimizer = optim.RMSprop(self.rencoder.parameters(),
                                lr=self.args.lr, momentum=0.9,
                                weight_decay=self.args.weight_decay)
    else:
      raise  ValueError('Unknown optimizer kind')
  
    self.t_loss = 0.0
    self.t_loss_denom = 0.0
    self.denom = 0.0
    self.total_epoch_loss = 0.0
    self.global_step = 0
  
    if self.args.noise_prob > 0.0:
      self.dp = nn.Dropout(p=self.args.noise_prob)