fn get_local_attention_mask()

in backends/candle/src/models/modernbert.rs [577:603]


    fn get_local_attention_mask(&self, attention_mask: &Tensor) -> Result<Tensor> {
        let dev = attention_mask.device();
        let attention_mask = attention_mask
            .to_device(&Device::Cpu)?
            .to_dtype(DType::U8)?;

        let mask_shape = attention_mask.shape();
        let (_, _, seq_len, _) = mask_shape.dims4()?;

        let rows = Tensor::arange(0, seq_len as i64, attention_mask.device())?.unsqueeze(0)?;
        let rows = rows.broadcast_as((seq_len, seq_len))?;

        let distance = (&rows - &rows.t()?)?.abs()?;

        let window_size = (self.local_attention / 2) as i64;
        let window_mask = distance
            .le(window_size)?
            .unsqueeze(0)?
            .unsqueeze(0)?
            .broadcast_as(mask_shape)?;

        let zero_tensor = Tensor::zeros_like(&attention_mask)?;
        let local_attention_mask = attention_mask.where_cond(&window_mask, &zero_tensor)?;
        let local_attention_mask = local_attention_mask.to_device(dev)?;

        Ok(local_attention_mask)
    }