fn square_inner()

in src/backend/serial/u32/field.rs [523:562]


    fn square_inner(&self) -> [u64; 10] {
        // Optimized version of multiplication for the case of squaring.
        // Pre- and post- conditions identical to multiplication function.
        let x = &self.0;
        let x0_2   =  2 * x[0];
        let x1_2   =  2 * x[1];
        let x2_2   =  2 * x[2];
        let x3_2   =  2 * x[3];
        let x4_2   =  2 * x[4];
        let x5_2   =  2 * x[5];
        let x6_2   =  2 * x[6];
        let x7_2   =  2 * x[7];
        let x5_19  = 19 * x[5];
        let x6_19  = 19 * x[6];
        let x7_19  = 19 * x[7];
        let x8_19  = 19 * x[8];
        let x9_19  = 19 * x[9];

        /// Helper function to multiply two 32-bit integers with 64 bits
        /// of output.
        #[inline(always)]
        fn m(x: u32, y: u32) -> u64 { (x as u64) * (y as u64) }

        // This block is rearranged so that instead of doing a 32-bit multiplication by 38, we do a
        // 64-bit multiplication by 2 on the results.  This is because lg(38) is too big: we would
        // have less than 1 bit of headroom left, which is too little.
        let mut z = [0u64;10];
        z[0] = m(x[0],x[0]) + m(x2_2,x8_19) + m(x4_2,x6_19) + (m(x1_2,x9_19) + m(x3_2,x7_19) + m(x[5],x5_19))*2;
        z[1] = m(x0_2,x[1]) + m(x3_2,x8_19) + m(x5_2,x6_19) + (m(x[2],x9_19) + m(x[4],x7_19))*2;
        z[2] = m(x0_2,x[2]) + m(x1_2,x[1]) + m(x4_2,x8_19) + m(x[6],x6_19) + (m(x3_2,x9_19) + m(x5_2,x7_19))*2;
        z[3] = m(x0_2,x[3]) + m(x1_2,x[2]) + m(x5_2,x8_19) + (m(x[4],x9_19) + m(x[6],x7_19))*2;
        z[4] = m(x0_2,x[4]) + m(x1_2,x3_2) + m(x[2],x[2]) + m(x6_2,x8_19) + (m(x5_2,x9_19) + m(x[7],x7_19))*2;
        z[5] = m(x0_2,x[5]) + m(x1_2,x[4]) + m(x2_2,x[3]) + m(x7_2,x8_19) + m(x[6],x9_19)*2;
        z[6] = m(x0_2,x[6]) + m(x1_2,x5_2) + m(x2_2,x[4]) + m(x3_2,x[3]) + m(x[8],x8_19) + m(x7_2,x9_19)*2;
        z[7] = m(x0_2,x[7]) + m(x1_2,x[6]) + m(x2_2,x[5]) + m(x3_2,x[4]) + m(x[8],x9_19)*2;
        z[8] = m(x0_2,x[8]) + m(x1_2,x7_2) + m(x2_2,x[6]) + m(x3_2,x5_2) + m(x[4],x[4]) + m(x[9],x9_19)*2;
        z[9] = m(x0_2,x[9]) + m(x1_2,x[8]) + m(x2_2,x[7]) + m(x3_2,x[6]) + m(x4_2,x[5]) ;

        z
    }