in optimum/executorch/attentions/custom_kv_cache.py [0:0]
def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
"""Returns the sequence length of the cached states. A layer index can be optionally passed."""
if layer_idx is None:
layer_idx = 0
# For CustomRingKVCache, we need to handle the sequence length differently
layer_cache = self.kv_cache[layer_idx]
if self.is_sliding[layer_idx]:
# CustomRingKVCache cache_position_manager which
# maintains cache position for each slot in the kv cache
# we return the max position + 1 to indicate max position
# seen so far. Not sure if thats the correct interpretation
# of sequence length
return layer_cache.cache_positions_manager.cache_positions.max().item() + 1
return (layer_cache.k_cache[0, :, 0].any(dim=-1)).sum()