in backends/candle/src/models/modernbert.rs [577:603]
fn get_local_attention_mask(&self, attention_mask: &Tensor) -> Result<Tensor> {
let dev = attention_mask.device();
let attention_mask = attention_mask
.to_device(&Device::Cpu)?
.to_dtype(DType::U8)?;
let mask_shape = attention_mask.shape();
let (_, _, seq_len, _) = mask_shape.dims4()?;
let rows = Tensor::arange(0, seq_len as i64, attention_mask.device())?.unsqueeze(0)?;
let rows = rows.broadcast_as((seq_len, seq_len))?;
let distance = (&rows - &rows.t()?)?.abs()?;
let window_size = (self.local_attention / 2) as i64;
let window_mask = distance
.le(window_size)?
.unsqueeze(0)?
.unsqueeze(0)?
.broadcast_as(mask_shape)?;
let zero_tensor = Tensor::zeros_like(&attention_mask)?;
let local_attention_mask = attention_mask.where_cond(&window_mask, &zero_tensor)?;
let local_attention_mask = local_attention_mask.to_device(dev)?;
Ok(local_attention_mask)
}