in torchbenchmark/models/nvidia_deeprecommender/reco_encoder/data/input_layer.py [0:0]
def __init__(self, params, user_id_map=None, item_id_map=None):
self._params = params
self._data_dir = self.params['data_dir']
self._extension = ".txt" if 'extension' not in self.params else self.params['extension']
self._i_id = 0 if 'itemIdInd' not in self.params else self.params['itemIdInd']
self._u_id = 1 if 'userIdInd' not in self.params else self.params['userIdInd']
self._r_id = 2 if 'ratingInd' not in self.params else self.params['ratingInd']
self._major = 'items' if 'major' not in self.params else self.params['major']
if not (self._major == 'items' or self._major == 'users'):
raise ValueError("Major must be 'users' or 'items', but got {}".format(self._major))
self._major_ind = self._i_id if self._major == 'items' else self._u_id
self._minor_ind = self._u_id if self._major == 'items' else self._i_id
self._delimiter = '\t' if 'delimiter' not in self.params else self.params['delimiter']
if user_id_map is None or item_id_map is None:
self._build_maps()
else:
self._user_id_map = user_id_map
self._item_id_map = item_id_map
major_map = self._item_id_map if self._major == 'items' else self._user_id_map
minor_map = self._user_id_map if self._major == 'items' else self._item_id_map
self._vector_dim = len(minor_map)
src_files = [path.join(self._data_dir, f)
for f in listdir(self._data_dir)
if path.isfile(path.join(self._data_dir, f)) and f.endswith(self._extension)]
self._batch_size = self.params['batch_size']
self.data = dict()
for source_file in src_files:
with open(source_file, 'r') as src:
for line in src.readlines():
parts = line.strip().split(self._delimiter)
if len(parts)<3:
raise ValueError('Encountered badly formatted line in {}'.format(source_file))
key = major_map[int(parts[self._major_ind])]
value = minor_map[int(parts[self._minor_ind])]
rating = float(parts[self._r_id])
#print("Key: {}, Value: {}, Rating: {}".format(key, value, rating))
if key not in self.data:
self.data[key] = []
self.data[key].append((value, rating))