in data/datasets.py [0:0]
def __getitem__(self, idx):
item = self.dataset[idx]
image = item['image']
processed_images = self._process_images([image])
item['texts'] = [{
"user": item['question'] + "\nAnswer only with the letter!",
"assistant": item['answer']
}]
messages = self._get_messages(item, image_count=len(processed_images))
input_ids, mask, attention_mask = self._prepare_inputs_and_loss_mask(messages)
labels = self._get_labels(input_ids, mask)
input_ids = input_ids.masked_fill(mask, self.tokenizer.pad_token_id)
attention_mask = attention_mask.masked_fill(mask, 0)
return {
"images": processed_images,
"input_ids": input_ids,
"attention_mask": attention_mask,
"labels": labels,
}