backends/candle/src/models/mistral.rs (18 lines of code) (raw):
use crate::layers::HiddenAct;
use serde::Deserialize;
#[derive(Debug, Clone, PartialEq, Deserialize)]
pub struct MistralConfig {
pub vocab_size: usize,
pub hidden_size: usize,
pub intermediate_size: usize,
pub num_hidden_layers: usize,
pub num_attention_heads: usize,
pub num_key_value_heads: usize,
pub hidden_act: HiddenAct,
pub max_position_embeddings: usize,
pub initializer_range: f64,
pub rms_norm_eps: f32,
pub model_type: Option<String>,
pub rope_theta: f32,
pub sliding_window: Option<usize>,
}