in docker_images/peft/app/pipelines/text_generation.py [0:0]
def _process_req(self, inputs: str, **kwargs) -> str:
"""
Args:
inputs (:obj:`str`):
a string for text to be completed
Returns:
A string of completed text.
"""
tokenized_inputs = self.tokenizer(inputs, return_tensors="pt")
self._model_to_gpu()
if torch.cuda.is_available():
device = "cuda"
tokenized_inputs = {
"input_ids": tokenized_inputs["input_ids"].to(device),
"attention_mask": tokenized_inputs["attention_mask"].to(device),
}
with torch.no_grad():
outputs = self.model.generate(
input_ids=tokenized_inputs["input_ids"],
attention_mask=tokenized_inputs["attention_mask"],
max_new_tokens=10,
eos_token_id=3,
)
return self.tokenizer.batch_decode(outputs, skip_special_tokens=True)