in ludwig/data/dataset.py [0:0]
def get(self, feature_name, idx=None):
if idx is None:
idx = range(self.size)
if (self.data_hdf5_fp is None or
'preprocessing' not in self.features[feature_name] or
'in_memory' not in self.features[feature_name]['preprocessing']):
return self.dataset[feature_name][idx]
if self.features[feature_name]['preprocessing']['in_memory']:
return self.dataset[feature_name][idx]
sub_batch = self.dataset[feature_name][idx]
indices = np.empty((3, len(sub_batch)), dtype=np.int64)
indices[0, :] = sub_batch
indices[1, :] = np.arange(len(sub_batch))
indices = indices[:, np.argsort(indices[0])]
with h5py.File(self.data_hdf5_fp, 'r') as h5_file:
im_data = h5_file[feature_name + '_data'][indices[0, :], :, :]
indices[2, :] = np.arange(len(sub_batch))
indices = indices[:, np.argsort(indices[1])]
return im_data[indices[2, :]]