torchbenchmark/models/nvidia_deeprecommender/nvinfer.py (199 lines of code) (raw):

# Copyright (c) 2017 NVIDIA Corporation # parameters to run benchmark on cpu # --path_to_train_data Netflix/N1W_TRAIN --path_to_eval_data Netflix/N1W_TEST --hidden_layers 512,512,1024 --non_linearity_type selu --save_path model_save/model.epoch_0 --drop_prob 0.8 --predictions_path preds.txt --nooutput --forcecpu # parameters to run benchmark on cuda # --path_to_train_data Netflix/N1W_TRAIN --path_to_eval_data Netflix/N1W_TEST --hidden_layers 512,512,1024 --non_linearity_type selu --save_path model_save/model.epoch_0 --drop_prob 0.8 --predictions_path preds.txt --nooutput --forcecuda import torch import argparse import copy import time import os from .reco_encoder.data import input_layer from .reco_encoder.model import model from torch.autograd import Variable from pathlib import Path import torch.autograd.profiler as profiler def getCommandLineArgs() : parser = argparse.ArgumentParser(description='RecoEncoder') parser.add_argument('--drop_prob', type=float, default=0.0, metavar='N', help='dropout drop probability') parser.add_argument('--constrained', action='store_true', help='constrained autoencoder') parser.add_argument('--skip_last_layer_nl', action='store_true', help='if present, decoder\'s last layer will not apply non-linearity function') parser.add_argument('--hidden_layers', type=str, default="1024,512,512,128", metavar='N', help='hidden layer sizes, comma-separated') parser.add_argument('--path_to_train_data', type=str, default="", metavar='N', help='Path to training data') parser.add_argument('--path_to_eval_data', type=str, default="", metavar='N', help='Path to evaluation data') parser.add_argument('--non_linearity_type', type=str, default="selu", metavar='N', help='type of the non-linearity used in activations') parser.add_argument('--save_path', type=str, default="autorec.pt", metavar='N', help='where to save model') parser.add_argument('--predictions_path', type=str, default="out.txt", metavar='N', help='where to save predictions') parser.add_argument('--batch_size', type=int, default=1, metavar='N', help='inference batch size') parser.add_argument('--jit', action='store_true', help='jit-ify model before running') parser.add_argument('--forcecuda', action='store_true', help='force cuda use') parser.add_argument('--forcecpu', action='store_true', help='force cpu use') parser.add_argument('--nooutput', action='store_true', help='disable writing output to file') parser.add_argument('--silent', action='store_true', help='disable output messages') parser.add_argument('--profile', action='store_true', help='enable profiler and stat print') args = parser.parse_args() return args def getBenchmarkArgs(forceCuda): class Args: pass args = Args() args.drop_prob = 0.8 args.constrained = False args.skip_last_layer_nl = False args.hidden_layers = '512,512,1024' args.path_to_train_data = os.path.dirname(__file__) + '/Netflix/N1W_TRAIN' args.path_to_eval_data = os.path.dirname(__file__) + '/Netflix/N1W_TEST' args.non_linearity_type = 'selu' args.save_path = 'model_save/model.epoch_0' args.predictions_path = 'preds.txt' args.batch_size = 1 args.jit = False args.forcecuda = forceCuda args.forcecpu = not forceCuda args.nooutput = True args.silent = True args.profile = False return args def processArgState(args) : if not args.silent: print(args) if args.forcecpu and args.forcecuda: print("Error, force cpu and cuda cannot both be set") quit() args.use_cuda = torch.cuda.is_available() # global flag if not args.silent: if args.use_cuda: print('GPU is available.') else: print('GPU is not available.') if args.use_cuda and args.forcecpu: args.use_cuda = False if not args.silent: if args.use_cuda: print('Running On GPU') else: print('Running On CUDA') if args.profile: print('Profiler Enabled') return args class DeepRecommenderInferenceBenchmark: def __init__(self, device = 'cpu', jit=False, batch_size=256, usecommandlineargs = False) : self.toytest = True self.batch_size = batch_size # number of movies in netflix training set. self.node_count = 197951 if self.toytest: self.toyinputs = torch.randn(self.batch_size,self.node_count).to(device) if usecommandlineargs: self.args = getCommandLineArgs() else: if device == "cpu": forcecuda = False elif device == "cuda": forcecuda = True else: # unknown device string, quit init return self.args = getBenchmarkArgs(forcecuda) args = processArgState(self.args) self.params = dict() self.params['batch_size'] = self.args.batch_size self.params['data_dir'] = self.args.path_to_train_data self.params['major'] = 'users' self.params['itemIdInd'] = 1 self.params['userIdInd'] = 0 if not self.args.silent: print("Loading training data") if self.toytest == False: self.data_layer = input_layer.UserItemRecDataProvider(params=self.params) if not self.args.silent: print("Data loaded") print("Total items found: {}".format(len(self.data_layer.data.keys()))) print("Vector dim: {}".format(self.data_layer.vector_dim)) print("Loading eval data") self.eval_params = copy.deepcopy(self.params) # must set eval batch size to 1 to make sure no examples are missed self.eval_params['batch_size'] = 1 self.eval_params['data_dir'] = self.args.path_to_eval_data if self.toytest: self.rencoder = model.AutoEncoder(layer_sizes=[self.node_count] + [int(l) for l in self.args.hidden_layers.split(',')], nl_type=self.args.non_linearity_type, is_constrained=self.args.constrained, dp_drop_prob=self.args.drop_prob, last_layer_activations=not self.args.skip_last_layer_nl) else: self.eval_data_layer = input_layer.UserItemRecDataProvider(params=self.eval_params, user_id_map=self.data_layer.userIdMap, item_id_map=self.data_layer.itemIdMap) self.rencoder = model.AutoEncoder(layer_sizes=[self.data_layer.vector_dim] + [int(l) for l in self.args.hidden_layers.split(',')], nl_type=self.args.non_linearity_type, is_constrained=self.args.constrained, dp_drop_prob=self.args.drop_prob, last_layer_activations=not self.args.skip_last_layer_nl) self.path_to_model = Path(self.args.save_path) if self.path_to_model.is_file(): print("Loading model from: {}".format(self.path_to_model)) self.rencoder.load_state_dict(torch.load(self.args.save_path)) if not self.args.silent: print('######################################################') print('######################################################') print('############# AutoEncoder Model: #####################') print(self.rencoder) print('######################################################') print('######################################################') self.rencoder.eval() if self.args.use_cuda: self.rencoder = self.rencoder.cuda() if self.toytest == False: self.inv_userIdMap = {v: k for k, v in self.data_layer.userIdMap.items()} self.inv_itemIdMap = {v: k for k, v in self.data_layer.itemIdMap.items()} self.eval_data_layer.src_data = self.data_layer.data def eval(self, niter=1): for iteration in range(niter): if self.toytest: out = self.rencoder(self.toyinputs) continue for i, ((out, src), majorInd) in enumerate(self.eval_data_layer.iterate_one_epoch_eval(for_inf=True)): inputs = Variable(src.cuda().to_dense() if self.args.use_cuda else src.to_dense()) targets_np = out.to_dense().numpy()[0, :] out = self.rencoder(inputs) if not self.args.nooutput: self.outputs = out.cpu().data.numpy()[0, :] non_zeros = targets_np.nonzero()[0].tolist() major_key = self.inv_userIdMap [majorInd] with open(self.args.predictions_path, 'w') as outf: for ind in non_zeros: outf.write("{}\t{}\t{}\t{}\n".format(major_key, self.inv_itemIdMap[ind], self.outputs[ind], targets_np[ind])) if i % 10000 == 0: print("Done: {}".format(i)) return out def TimedInferenceRun(self) : print('Timed Inference Start') e_start_time = time.time() if self.args.profile: with profiler.profile(record_shapes=True, use_cuda=True) as prof: with profiler.record_function("Inference"): self.eval() else: self.eval() e_end_time = time.time() print('Timed Inference Complete') print('Inference finished in {} seconds' .format(e_end_time - e_start_time)) if self.args.profile: print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10)) prof.export_chrome_trace("trace.json") def main(): benchmarkCuda = DeepRecommenderInferenceBenchmark(device='cuda') benchmarkCuda.TimedInferenceRun() benchmarkCPU = DeepRecommenderInferenceBenchmark(device='cpu') benchmarkCPU.TimedInferenceRun() if __name__ == '__main__': main()