optimum/habana/transformers/models/deepseek_v3/modeling_deepseek_v3.py [992:1040]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            if use_flash_attention and FusedSDPA is not None:
                if q_len == 1:
                    # next token
                    attn_output = self.fused_scaled_dot_product_attention(
                        query_states,
                        key_states,
                        value_states,
                        attention_mask,
                        0.0,
                        False,
                        None,
                        "None",
                        False,
                        None,
                        "None",
                    )
                else:
                    # first token
                    softmax_mode = "fast" if flash_attention_fast_softmax else "None"
                    if flash_attention_causal_mask:
                        attn_output = self.fused_scaled_dot_product_attention(
                            query_states,
                            key_states,
                            value_states,
                            None,
                            0.0,
                            True,
                            None,
                            softmax_mode,
                            flash_attention_recompute,
                            valid_sequence_lengths,
                            "left",
                        )
                    else:
                        attn_output = self.fused_scaled_dot_product_attention(
                            query_states,
                            key_states,
                            value_states,
                            attention_mask,
                            0.0,
                            False,
                            None,
                            softmax_mode,
                            flash_attention_recompute,
                            None,
                            "None",
                        )

            else:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



optimum/habana/transformers/models/qwen2_moe/modeling_qwen2_moe.py [422:470]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        if use_flash_attention and FusedSDPA is not None:
            if q_len == 1:
                # next token
                attn_output = self.fused_scaled_dot_product_attention(
                    query_states,
                    key_states,
                    value_states,
                    attention_mask,
                    0.0,
                    False,
                    None,
                    "None",
                    False,
                    None,
                    "None",
                )
            else:
                # first token
                softmax_mode = "fast" if flash_attention_fast_softmax else "None"
                if flash_attention_causal_mask:
                    attn_output = self.fused_scaled_dot_product_attention(
                        query_states,
                        key_states,
                        value_states,
                        None,
                        0.0,
                        True,
                        None,
                        softmax_mode,
                        flash_attention_recompute,
                        valid_sequence_lengths,
                        "left",
                    )
                else:
                    attn_output = self.fused_scaled_dot_product_attention(
                        query_states,
                        key_states,
                        value_states,
                        attention_mask,
                        0.0,
                        False,
                        None,
                        softmax_mode,
                        flash_attention_recompute,
                        None,
                        "None",
                    )

        else:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



