in text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py [0:0]
def _create_dummy_request(self, max_tokens: int) -> Batch:
"""Create a dummy request for warmup."""
# Generate a random input with slightly more tokens than requested, because special tokens are going to be
# skipped.
MARGIN = 10
input_tokens = np.random.randint(self.model.config.vocab_size, size=(1, max_tokens + MARGIN), dtype=np.int64)
text = self.tokenizer.decode(input_tokens[0], skip_special_tokens=True)
# These are just dummy params to allow Request creation
parameters = NextTokenChooserParameters(
temperature=1.0,
top_k=None,
top_p=None,
do_sample=False,
seed=None,
repetition_penalty=1.0,
typical_p=1.0,
)
stopping_parameters = StoppingCriteriaParameters(max_new_tokens=20, ignore_eos_token=True)
dummy_request = Request(
id=0,
inputs=text,
truncate=max_tokens,
parameters=parameters,
stopping_parameters=stopping_parameters,
)
return dummy_request