def _cached_batch()

in text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py [0:0]


    def _cached_batch(self, batch_id: int, active_slots: List):
        """Create a CachedBatch from the active slots.
        """
        request_ids = [slot.request_id for slot in active_slots if slot.state == Slot.State.READY]
        if len(request_ids) == 0:
            logger.debug("No more pending requests")
            return None
        size = len(request_ids)
        max_tokens = size * self.model.config.sequence_length
        return CachedBatch(id=batch_id, request_ids=request_ids, size=size, max_tokens=max_tokens)