in FourQ_64bit_and_portable/AMD64/fp_x64.h [194:228]
__inline void fpexp1251(felm_t a, felm_t af)
{ // Exponentiation over GF(p), af = a^(125-1)
int i;
felm_t t1, t2, t3, t4, t5;
fpsqr1271(a, t2);
fpmul1271(a, t2, t2);
fpsqr1271(t2, t3);
fpsqr1271(t3, t3);
fpmul1271(t2, t3, t3);
fpsqr1271(t3, t4);
fpsqr1271(t4, t4);
fpsqr1271(t4, t4);
fpsqr1271(t4, t4);
fpmul1271(t3, t4, t4);
fpsqr1271(t4, t5);
for (i=0; i<7; i++) fpsqr1271(t5, t5);
fpmul1271(t4, t5, t5);
fpsqr1271(t5, t2);
for (i=0; i<15; i++) fpsqr1271(t2, t2);
fpmul1271(t5, t2, t2);
fpsqr1271(t2, t1);
for (i=0; i<31; i++) fpsqr1271(t1, t1);
fpmul1271(t2, t1, t1);
for (i=0; i<32; i++) fpsqr1271(t1, t1);
fpmul1271(t1, t2, t1);
for (i=0; i<16; i++) fpsqr1271(t1, t1);
fpmul1271(t5, t1, t1);
for (i=0; i<8; i++) fpsqr1271(t1, t1);
fpmul1271(t4, t1, t1);
for (i=0; i<4; i++) fpsqr1271(t1, t1);
fpmul1271(t3, t1, t1);
fpsqr1271(t1, t1);
fpmul1271(a, t1, af);
}