in deepseek_vl2/models/modeling_deepseek.py [0:0]
def forward(self, x, seq_len=None):
# x: [bs, num_attention_heads, seq_len, head_size]
if self.max_seq_len_cached is None or seq_len > self.max_seq_len_cached:
self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
return (
self.cos_cached[:seq_len].to(dtype=x.dtype),
self.sin_cached[:seq_len].to(dtype=x.dtype),
)