def get_seq_length()

in optimum/executorch/attentions/custom_kv_cache.py [0:0]


    def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
        """Returns the sequence length of the cached states. A layer index can be optionally passed."""
        if layer_idx is None:
            layer_idx = 0

        # For CustomRingKVCache, we need to handle the sequence length differently
        layer_cache = self.kv_cache[layer_idx]
        if self.is_sliding[layer_idx]:
            # CustomRingKVCache cache_position_manager which
            # maintains cache position for each slot in the kv cache
            # we return the max position + 1 to indicate max position
            # seen so far. Not sure if thats the correct interpretation
            # of sequence length
            return layer_cache.cache_positions_manager.cache_positions.max().item() + 1
        return (layer_cache.k_cache[0, :, 0].any(dim=-1)).sum()