in backends/candle/src/models/modernbert.rs [369:394]
fn forward(
&self,
hidden_states: &Tensor,
global_attention_mask: &Tensor,
local_attention_mask: &Tensor,
global_rotaray_cache: &(Tensor, Tensor),
local_rotaray_cache: &(Tensor, Tensor),
) -> Result<Tensor> {
let _enter = self.span.enter();
let mut hidden_states = hidden_states.clone();
for (index, layer) in self.layers.iter().enumerate() {
let use_local_attention = index % self.global_attn_every_n_layers != 0;
let (attention_mask, rotary_cache) = if use_local_attention {
(local_attention_mask, local_rotaray_cache)
} else {
(global_attention_mask, global_rotaray_cache)
};
hidden_states = layer.forward(&hidden_states, attention_mask, rotary_cache)?;
}
Ok(hidden_states)
}