def get_indexed_data()

in model/disambiguate/dataloader.py [0:0]


    def get_indexed_data(self, indices):
        text_labels = []
        text_inputs = []
        dialog_ids = []
        turn_ids = []
        for index in indices:
            # Add <USER> and <SYS> tokens.
            dialog_datum = self._raw_data[index]
            dialog = self._raw_data[index]["input_text"]
            for turn_id, turn in enumerate(dialog):
                if turn_id % 2 == 0:
                    dialog[turn_id] = "<USER> " + turn
                else:
                    dialog[turn_id] = "<SYS> " + turn
            text = " ".join(dialog[-self.num_utterances :])
            text_inputs.append(text)
            text_labels.append(dialog_datum["disambiguation_label_gt"])
            dialog_ids.append(dialog_datum["dialog_id"])
            turn_ids.append(dialog_datum["turn_id"])
        encoded_inputs = self._tokenizer(
            text_inputs, return_tensors="pt", padding=True, truncation=True,
        )
        if self._args["use_gpu"]:
            encoded_inputs = {key: val.cuda() for key, val in encoded_inputs.items()}
        if self._hidden_labels:
            # Reset all the text_labels to 0.
            text_labels = [0 for ii in text_labels]
        # Pack the batch.
        batch = {
            "text_in": encoded_inputs,
            "gt_label": self.device.LongTensor(text_labels),
            "dialog_id": dialog_ids,
            "turn_id": turn_ids,
        }
        return batch