in optimum/executorch/attentions/custom_kv_cache.py [0:0]
def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
"""Returns the sequence length of the cached states. A layer index can be optionally passed."""
# Occupied cache == any slot in the 2nd dim (sequence length) holds a non-zero value
# This is different from StaticCache which checks the 3rd dim
if layer_idx is None:
layer_idx = 0
return (self.kv_cache[layer_idx].k_cache[0, :, 0].any(dim=-1)).sum()