relogic/pretrainkit/multitask_trainer.py [803:870]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            for k, v in inputs.items():
                if isinstance(v, torch.Tensor):
                    inputs[k] = v.to(self.args.device)

            with torch.no_grad():
                outputs = model(**inputs)
                if has_labels:
                    step_eval_loss, logits = outputs[:2]
                    eval_losses += [step_eval_loss.mean().item()]
                else:
                    logits = outputs[0]

            if not prediction_loss_only:
                # Change the way of concat
                # We need to make sure that the size of preds and labels is (batch_size, sequence_length)
                if preds is None:
                    preds = logits.detach()
                    preds_size = preds.new_full(size=preds.size()[:1], fill_value=preds.size(1)).detach()
                    preds = preds.view(-1)
                else:
                    preds_size = torch.cat((preds_size, logits.new_full(size=logits.size()[:1], fill_value=logits.size(1)).detach()), dim=0)
                    preds = torch.cat((preds, logits.detach().view(-1)), dim=0)

                # label_name = None
                # if inputs.get("labels") is not None:
                #     label_name = "labels"
                # if inputs.get("lm_labels") is not None:
                #     label_name = "lm_labels"
                # if inputs.get("masked_lm_labels") is not None:
                #     label_name = "lm_labels"
                if inputs.get("labels") is not None:
                    if label_ids is None:
                        label_ids = inputs["labels"].detach()
                        label_size = label_ids.new_full(size=label_ids.size()[:1], fill_value=label_ids.size(1)).detach()
                        label_ids = label_ids.view(-1)
                    else:
                        label_size = torch.cat((label_size, inputs["labels"].new_full(size=inputs["labels"].size()[:1], fill_value=inputs["labels"].size(1)).detach()), dim=0)
                        label_ids = torch.cat((label_ids, inputs["labels"].detach().view(-1)), dim=0)
                # if inputs.get("label_prediction_index") is not None and inputs.get(
                #     "typing_prediction_index") is not None:
                #     pass


        if self.args.local_rank != -1:
            # In distributed mode, concatenate all results from all nodes:
            if preds is not None:
                # preds = self.distributed_concat(preds, num_total_examples=self.num_examples(dataloader))
                preds, preds_size = self.distributed_concat_with_size(preds, preds_size, num_total_examples=self.num_examples(dataloader))
            if label_ids is not None:
                # label_ids = self.distributed_concat(label_ids, num_total_examples=self.num_examples(dataloader))
                label_ids, label_size = self.distributed_concat_with_size(label_ids, label_size, num_total_examples=self.num_examples(dataloader))
        elif is_torch_tpu_available():
            # tpu-comment: Get all predictions and labels from all worker shards of eval dataset
            # NOTE: We do not modify this for now.
            if preds is not None:
                preds = xm.mesh_reduce("eval_preds", preds, torch.cat)
            if label_ids is not None:
                label_ids = xm.mesh_reduce("eval_label_ids", label_ids, torch.cat)

        # Finally, turn the aggregated tensors into numpy arrays.
        if preds is not None:
            preds = preds.cpu().numpy()
            preds_size = preds_size.cpu().numpy()
        if label_ids is not None:
            label_ids = label_ids.cpu().numpy()
            label_size = label_size.cpu().numpy()

        if self.compute_metrics is not None and preds is not None and label_ids is not None:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


relogic/pretrainkit/trainer.py [777:832]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            for k, v in inputs.items():
                if isinstance(v, torch.Tensor):
                    inputs[k] = v.to(self.args.device)

            with torch.no_grad():
                outputs = model(**inputs)
                if has_labels:
                    step_eval_loss, logits = outputs[:2]
                    eval_losses += [step_eval_loss.mean().item()]
                else:
                    logits = outputs[0]

            if not prediction_loss_only:
                # Change the way of concat
                # We need to make sure that the size of preds and labels is (batch_size, sequence_length)
                if preds is None:
                    preds = logits.detach()
                    preds_size = preds.new_full(size=preds.size()[:1], fill_value=preds.size(1)).detach()
                    preds = preds.view(-1)
                else:
                    preds_size = torch.cat((preds_size, logits.new_full(size=logits.size()[:1], fill_value=logits.size(1)).detach()), dim=0)
                    preds = torch.cat((preds, logits.detach().view(-1)), dim=0)

                if inputs.get("labels") is not None:
                    if label_ids is None:
                        label_ids = inputs["labels"].detach()
                        label_size = label_ids.new_full(size=label_ids.size()[:1], fill_value=label_ids.size(1)).detach()
                        label_ids = label_ids.view(-1)
                    else:
                        label_size = torch.cat((label_size, inputs["labels"].new_full(size=inputs["labels"].size()[:1], fill_value=inputs["labels"].size(1)).detach()), dim=0)
                        label_ids = torch.cat((label_ids, inputs["labels"].detach().view(-1)), dim=0)

        if self.args.local_rank != -1:
            # In distributed mode, concatenate all results from all nodes:
            if preds is not None:
                # preds = self.distributed_concat(preds, num_total_examples=self.num_examples(dataloader))
                preds, preds_size = self.distributed_concat_with_size(preds, preds_size, num_total_examples=self.num_examples(dataloader))
            if label_ids is not None:
                # label_ids = self.distributed_concat(label_ids, num_total_examples=self.num_examples(dataloader))
                label_ids, label_size = self.distributed_concat_with_size(label_ids, label_size, num_total_examples=self.num_examples(dataloader))
        elif is_torch_tpu_available():
            # tpu-comment: Get all predictions and labels from all worker shards of eval dataset
            # NOTE: We do not modify this for now.
            if preds is not None:
                preds = xm.mesh_reduce("eval_preds", preds, torch.cat)
            if label_ids is not None:
                label_ids = xm.mesh_reduce("eval_label_ids", label_ids, torch.cat)

        # Finally, turn the aggregated tensors into numpy arrays.
        if preds is not None:
            preds = preds.cpu().numpy()
            preds_size = preds_size.cpu().numpy()
        if label_ids is not None:
            label_ids = label_ids.cpu().numpy()
            label_size = label_size.cpu().numpy()
        if self.compute_metrics is not None and preds is not None and label_ids is not None:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -