in torchserve/inf2/llama2/workspace/inf2_handler.py [0:0]
def preprocess(self, requests):
input_text = []
for req in requests:
data = req.get("data") or req.get("body")
if isinstance(data, (bytes, bytearray)):
data = data.decode("utf-8")
logger.info(f"received req={data}")
input_text.append(data.strip())
# Ensure the compiled model can handle the input received
if len(input_text) > self.handle.micro_batch_size:
raise ValueError(
f"Model is compiled for batch size {self.handle.micro_batch_size} but received input of size {len(input_text)}"
)
# Pad input to match compiled model batch size
input_text.extend([""] * (self.handle.micro_batch_size - len(input_text)))
return self.tokenizer(input_text, return_tensors="pt", padding=True)