optimum/executorch/attentions/custom_kv_cache.py [99:116]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        layer_cache = self.kv_cache[layer_idx]

        # Use the CustomKVCache's update method
        # CustomKVCache expects input_pos, k_val, v_val and handles the transpose internally
        k_out, v_out = layer_cache.update(
            input_pos=cache_position,
            k_val=key_states,
            v_val=value_states,
        )

        return k_out, v_out

    def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
        """Returns the sequence length of the cached states. A layer index can be optionally passed."""
        # Occupied cache == any slot in the 2nd dim (sequence length) holds a non-zero value
        # This is different from StaticCache which checks the 3rd dim
        if layer_idx is None:
            layer_idx = 0
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


optimum/executorch/attentions/custom_kv_cache.py [268:283]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        layer_cache = self.kv_cache[layer_idx]

        # Use the cache's update method
        # Both CustomKVCache and CustomRingKVCache have the same update interface
        k_out, v_out = layer_cache.update(
            input_pos=cache_position,
            k_val=key_states,
            v_val=value_states,
        )

        return k_out, v_out

    def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
        """Returns the sequence length of the cached states. A layer index can be optionally passed."""
        if layer_idx is None:
            layer_idx = 0
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -