in text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py [0:0]
def assign(self, batch_id: int, request: Request, generation_config: GenerationConfig):
"""Assign a request to a slot.
Args:
batch_id (`int`): The id of the batch containing the request.
request (`Request`):
The request to be assigned. Contains the inputs and tokens selection parameters.
generation_config (`transformers.GenerationConfig`):
The base generation config (might be modified by the request generation parameters).
"""
self._state = Slot.State.READY
self._batch_id = batch_id
self._request_id = request.id
self._inputs = request.inputs
self._generation_config = copy.deepcopy(generation_config)
# Update generation config with token chooser parameters
self._generation_config.temperature = request.parameters.temperature
self._generation_config.top_k = request.parameters.top_k
self._generation_config.top_p = request.parameters.top_p
self._generation_config.typical_p = request.parameters.typical_p
self._generation_config.do_sample = request.parameters.do_sample
self._generation_config.repetition_penalty = request.parameters.repetition_penalty
self._truncate = request.truncate
self._seed = request.parameters.seed
# TODO: watermark
self._generation_config.max_new_tokens = request.stopping_parameters.max_new_tokens
self._max_new_tokens = self._generation_config.max_new_tokens