src/hyperpod_nemo_adapter/collections/model/nlp/custom_models/modeling_deepseek.py [1258:1265]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    ) -> Union[Tuple, BaseModelOutputWithPast]:
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
        use_cache = use_cache if use_cache is not None else self.config.use_cache

        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


src/hyperpod_nemo_adapter/patches/patch_llama_flash_attn_cp.py [312:318]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
) -> Union[Tuple, BaseModelOutputWithPast]:
    output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
    output_hidden_states = (
        output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
    )
    use_cache = use_cache if use_cache is not None else self.config.use_cache
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -