in torchbenchmark/models/dlrm/dlrm_data_pytorch.py [0:0]
def __getitem__(self, index):
if isinstance(index, slice):
return [
self[idx] for idx in range(
index.start or 0, index.stop or len(self), index.step or 1
)
]
if self.memory_map:
if self.split == 'none' or self.split == 'train':
# check if need to swicth to next day and load data
if index == self.offset_per_file[self.day]:
# print("day_boundary switch", index)
self.day_boundary = self.offset_per_file[self.day]
fi = self.npzfile + "_{0}_reordered.npz".format(
self.day
)
# print('Loading file: ', fi)
with np.load(fi) as data:
self.X_int = data["X_int"] # continuous feature
self.X_cat = data["X_cat"] # categorical feature
self.y = data["y"] # target
self.day = (self.day + 1) % self.max_day_range
i = index - self.day_boundary
elif self.split == 'test' or self.split == 'val':
# only a single day is used for testing
i = index + (0 if self.split == 'test' else self.test_size)
else:
sys.exit("ERROR: dataset split is neither none, nor train or test.")
else:
i = index
if self.max_ind_range > 0:
return self.X_int[i], self.X_cat[i] % self.max_ind_range, self.y[i]
else:
return self.X_int[i], self.X_cat[i], self.y[i]