def _create_dummy_request()

in text-generation-inference/server/text_generation_server/generator.py [0:0]


    def _create_dummy_request(self, max_tokens: int) -> Batch:
        """Create a dummy request for warmup."""
        # Generate a random input with slightly more tokens than requested, because special tokens are going to be
        # skipped.
        MARGIN = 10
        input_tokens = torch.randint(self.model.config.vocab_size, (1, max_tokens + MARGIN), dtype=torch.int64)
        text = self.tokenizer.decode(input_tokens[0], skip_special_tokens=True)
        # These are just dummy params to allow Request creation
        parameters = NextTokenChooserParameters(
            temperature=1.0,
            top_k=None,
            top_p=None,
            do_sample=False,
            seed=None,
            repetition_penalty=1.0,
            typical_p=1.0,
        )
        stopping_parameters = StoppingCriteriaParameters(max_new_tokens=20, ignore_eos_token=True)
        dummy_request = Request(
            id=0,
            inputs=text,
            truncate=max_tokens,
            parameters=parameters,
            stopping_parameters=stopping_parameters,
        )
        return dummy_request