src/hyperpod_nemo_adapter/collections/model/nlp/custom_models/modeling_deepseek.py [163:175]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def __init__(
        self,
        dim,
        max_position_embeddings=2048,
        base=10000,
        device=None,
        scaling_factor=1.0,
    ):
        self.scaling_factor = scaling_factor
        super().__init__(dim, max_position_embeddings, base, device)

    def _set_cos_sin_cache(self, seq_len, device, dtype):
        self.max_seq_len_cached = seq_len
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/hyperpod_nemo_adapter/collections/model/nlp/custom_models/modeling_deepseek.py [190:202]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def __init__(
        self,
        dim,
        max_position_embeddings=2048,
        base=10000,
        device=None,
        scaling_factor=1.0,
    ):
        self.scaling_factor = scaling_factor
        super().__init__(dim, max_position_embeddings, base, device)

    def _set_cos_sin_cache(self, seq_len, device, dtype):
        self.max_seq_len_cached = seq_len
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



