in distilvit/curate.py [0:0]
def process_batch(self, batch):
if self.model is None:
self.load_model_and_tokenizer()
# need to re-triage the original captions with the new order
batch["original_caption"] = list(batch["caption"])
batch["original_sentids"] = list(batch["sentids"])
new_captions = []
grades = []
sentids = []
for captions, nsentids in zip(batch["caption"], batch["sentids"]):
converted, grade, nsentids = self.transform(captions, nsentids)
new_captions.append(converted)
grades.append(grade)
sentids.append(nsentids)
batch["caption"] = new_captions
batch["grade"] = grades
batch["sentids"] = sentids
return batch