in grok/transformer.py [0:0]
def _position_encoding(cls, context_len: int, d_model: int) -> Tensor:
rows = [
tensor(
[
sin(pos / (10000 ** (i / d_model)))
if i % 2 == 0
else cos(pos / (10000 ** ((i - 1) / d_model)))
for i in range(d_model)
]
)
for pos in range(context_len)
]
stack = torch.stack(rows, dim=1)
return stack.T # type: ignore