in text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py [0:0]
def _cached_batch(self, batch_id: int, active_slots: List):
"""Create a CachedBatch from the active slots.
"""
request_ids = [slot.request_id for slot in active_slots if slot.state == Slot.State.READY]
if len(request_ids) == 0:
logger.debug("No more pending requests")
return None
size = len(request_ids)
max_tokens = size * self.model.config.sequence_length
return CachedBatch(id=batch_id, request_ids=request_ids, size=size, max_tokens=max_tokens)