in arctic_inference/suffix_decoding/cache.py [0:0]
def _generate_seq_id(self, req_id: Hashable) -> int:
# Find the next available seq_id not used by an active request.
while True:
seq_id = self._next_seq_id
# Increment to the next non-negative int32_t value.
self._next_seq_id = (self._next_seq_id + 1) & 0x7FFFFFFF
if (seq_id not in self._seq_to_req_id or
self._seq_to_req_id[seq_id] not in self._local_trees):
break
# Check if the seq_id is used by an inactive but cached request.
if seq_id in self._seq_to_req_id:
# This seq_id is already used, should be a very rare case that
# only happens when the seq_id has wrapped around and collided.
# We evict the old cached request to free up the seq_id.
del self._req_to_seq_id[self._seq_to_req_id[seq_id]]
del self._seq_to_req_id[seq_id]
self._global_tree.remove(seq_id)
# Allocate the seq_id to the new req_id.
self._req_to_seq_id[req_id] = seq_id
self._seq_to_req_id[seq_id] = req_id
self._maybe_evict_requests(seq_id)
return seq_id