in lmms_eval/api/task.py [0:0]
def build_all_requests(self, limit=None, rank=None, world_size=None) -> None:
"""Build a set of Instances for a task, and store them in task.instances"""
if self.has_test_docs():
docs = self.test_docs()
split = self.config.test_split
elif self.has_validation_docs():
docs = self.validation_docs()
split = self.config.validation_split
else:
assert False, f"Task dataset (path={self.DATASET_PATH}, name={self.DATASET_NAME}) must have valid or test docs!"
eval_logger.info(f"Building contexts for task {self.CONFIG.task} on rank {rank}...")
instances = []
doc_id_iterator = utils.create_iterator([i for i in range(len(docs))], rank, world_size, limit)
doc_id_iterator, doc_id_iterator_counting = itertools.tee(doc_id_iterator)
total_docs = sum(1 for _ in doc_id_iterator_counting)
pbar = tqdm(total=total_docs, desc=f"Building context", disable=(rank != 0))
for doc_id in doc_id_iterator:
# sample fewshot context #TODO: need to offset doc_id by rank now!
fewshot_ctx = self.fewshot_context(doc_id, 0 if self.config.num_fewshot is None else self.config.num_fewshot, self.config.training_split if self.has_training_docs() else split)
# TODO: we should override self.config.repeats if doing greedy gen so users don't waste time+compute
inst = self.construct_requests(doc_id=doc_id, ctx=fewshot_ctx, metadata=(self.config["task"], doc_id, self.config.repeats), split=split)
if not isinstance(inst, list):
inst = [inst]
instances.extend(inst)
pbar.update(1)
pbar.close()
self._instances = instances
assert len(self._instances) != 0, "task.build_requests() did not find any docs!"