in benchmarks/dlrm/ootb/dlrm_data_pytorch.py [0:0]
def __getitem__(self, index):
if isinstance(index, slice):
return [
self[idx] for idx in range(
index.start or 0, index.stop or len(self), index.step or 1
)
]
# WARNING: reset seed on access to first element
# (e.g. if same random samples needed across epochs)
if self.reset_seed_on_access and index == 0:
self.reset_numpy_seed(self.rand_seed)
# number of data points in a batch
n = min(self.mini_batch_size, self.data_size - (index * self.mini_batch_size))
# generate a batch of dense and sparse features
if self.data_generation == "random":
if self.cache_size is None:
Gen = generate_dist_input_batch.__wrapped__
cache_key = None
else:
Gen = generate_dist_input_batch
cache_key = index % self.cache_size
(X, lS_o, lS_i) = Gen(
self.m_den,
tuple(self.ln_emb.tolist()),
n,
self.num_indices_per_lookup,
self.num_indices_per_lookup_fixed,
rand_data_dist=self.rand_data_dist,
rand_data_min=self.rand_data_min,
rand_data_max=self.rand_data_max,
rand_data_mu=self.rand_data_mu,
rand_data_sigma=self.rand_data_sigma,
cache_key=cache_key,
)
elif self.data_generation == "synthetic":
(X, lS_o, lS_i) = generate_synthetic_input_batch(
self.m_den,
self.ln_emb,
n,
self.num_indices_per_lookup,
self.num_indices_per_lookup_fixed,
self.trace_file,
self.enable_padding
)
else:
sys.exit(
"ERROR: --data-generation=" + self.data_generation + " is not supported"
)
# generate a batch of target (probability of a click)
if 'cache_key' in locals() and cache_key is not None:
T = generate_random_output_batch(n, self.num_targets, self.round_targets, cache_key)
else:
T = generate_random_output_batch.__wrapped__(n, self.num_targets, self.round_targets)
return (X, lS_o, lS_i, T)