def _process_req()

in docker_images/peft/app/pipelines/text_generation.py [0:0]


    def _process_req(self, inputs: str, **kwargs) -> str:
        """
        Args:
            inputs (:obj:`str`):
                a string for text to be completed
        Returns:
            A string of completed text.
        """
        tokenized_inputs = self.tokenizer(inputs, return_tensors="pt")
        self._model_to_gpu()

        if torch.cuda.is_available():
            device = "cuda"
            tokenized_inputs = {
                "input_ids": tokenized_inputs["input_ids"].to(device),
                "attention_mask": tokenized_inputs["attention_mask"].to(device),
            }
        with torch.no_grad():
            outputs = self.model.generate(
                input_ids=tokenized_inputs["input_ids"],
                attention_mask=tokenized_inputs["attention_mask"],
                max_new_tokens=10,
                eos_token_id=3,
            )

        return self.tokenizer.batch_decode(outputs, skip_special_tokens=True)