in grok/transformer.py [0:0]
def __init__(
self,
n_layers: int = 4,
n_heads: int = 4,
d_model: int = 256,
dropout: float = 0.1,
max_context_len: int = 1024,
vocab_len: int = 2000,
non_linearity: str = "relu",
weight_noise: float = 0.0,