in dlrm_data_caffe2.py [0:0]
def __getitem__(self, index):
if isinstance(index, slice):
return [
self[idx]
for idx in range(
index.start or 0, index.stop or len(self), index.step or 1
)
]
if self.split == "none" or self.split == "train":
# check if need to swicth to next day and load data
if index == self.offset_per_file[self.day]:
# print("day_boundary switch", index)
self.day_boundary = self.offset_per_file[self.day]
fi = self.npzfile + "_{0}_reordered.npz".format(self.day)
# print('Loading file: ', fi)
with np.load(fi) as data:
self.X_int = data["X_int"] # continuous feature
self.X_cat = data["X_cat"] # categorical feature
self.y = data["y"] # target
self.day = (self.day + 1) % self.max_day_range
i = index - self.day_boundary
elif self.split == "test" or self.split == "val":
# only a single day is used for testing
i = index + (0 if self.split == "test" else self.test_size)
else:
sys.exit("ERROR: dataset split is neither none, nor train or test.")
if self.max_ind_range > 0:
return self.X_int[i], self.X_cat[i] % self.max_ind_range, self.y[i]
else:
return self.X_int[i], self.X_cat[i], self.y[i]