def _post_generate()

in text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py [0:0]
31 lines of code
6 McCabe index (conditional complexity)

    def _post_generate(self, slot: Slot, next_token: int, generations: List[Generation]) -> None:
        """Post-generate a slot after the generation has been completed.

        This will check if the slot is finished and append the generated text to the response.

        Args:
            slot (`Slot`):
                The slot to post-generate.
            next_token (`int`):
                The next token generated by the model.
            generations (`List[Generation]`):
                The list of generations to append the slot to.
        """
        # prepare the generation response
        next_token_text = slot.append(next_token)
        generated_text = None
        finish_reason = None
        if next_token == self.tokenizer.eos_token_id:
            finish_reason = FinishReason.FINISH_REASON_EOS_TOKEN
        elif slot.stopped:
            if slot.generated_tokens == slot.max_new_tokens:
                finish_reason = FinishReason.FINISH_REASON_LENGTH
            else:
                finish_reason = FinishReason.FINISH_REASON_STOP_SEQUENCE
        request_id = slot.request_id
        if finish_reason is not None:
            # We must include the generated text for each finished sequence in the response
            generated_text = GeneratedText(
                text=slot.generated_text, generated_tokens=slot.generated_tokens, finish_reason=finish_reason
            )
            logger.debug(f"Decode complete for request {request_id} with {slot.generated_tokens} tokens")
            # This slot is now empty, it will be removed from the list of
            # active slots.
            slot.clear()
        generations.append(
            Generation(
                request_id=request_id,
                prefill_tokens=None,
                tokens=Tokens(
                    ids=[next_token],
                    logprobs=[0],
                    texts=[next_token_text],
                    is_special=[next_token in self.special_tokens],
                ),
                generated_text=generated_text,
            )
        )