void resolve()

in include/Algo-Direct2.h [241:284]


        void resolve(const FVec<AVX, double>& vz, const IVec<SSE, float>& bidx, uint32 *pr) const
    {
        union {
            __m256i vec;
            uint64 ui64[4];
        } u;

        const uint32* buckets = reinterpret_cast<const uint32 *>(base_t::data.buckets);
        const double *xi = base_t::data.xi;

        // read indices t
        const double *p3 = &xi[(u.ui64[3] = buckets[bidx.get3()])];
        const double *p2 = &xi[(u.ui64[2] = buckets[bidx.get2()])];
        const double *p1 = &xi[(u.ui64[1] = buckets[bidx.get1()])];
        const double *p0 = &xi[(u.ui64[0] = buckets[bidx.get0()])];

        // read pairs ( X(t-1), X(t) )
        __m128d xp3 = _mm_loadu_pd(p3);
        __m128d xp2 = _mm_loadu_pd(p2);
        __m128d xp1 = _mm_loadu_pd(p1);
        __m128d xp0 = _mm_loadu_pd(p0);

        // build:
        // { X(t(0)-1), X(t(1)-1), X(t(2)-1), X(t(3)-1) }
        // { X(t(0)),   X(t(1)),   X(t(2)),   X(t(3)) }
        __m256d x02 = _mm256_insertf128_pd(_mm256_castpd128_pd256(xp0), xp2, 1);
        __m256d x13 = _mm256_insertf128_pd(_mm256_castpd128_pd256(xp1), xp3, 1);
        FVec<AVX, double> vxm = _mm256_unpacklo_pd(x02,x13);
        FVec<AVX, double> vxp = _mm256_unpackhi_pd(x02,x13);


//        __m128d h01m = _mm_shuffle_pd(xp0, xp1, 0);
//        __m128d h23m = _mm_shuffle_pd(xp2, xp3, 0);
//        __m128d h01p = _mm_shuffle_pd(xp0, xp1, 3);
//        __m128d h23p = _mm_shuffle_pd(xp2, xp3, 3);
//        FVec<AVX, double> vxm = _mm256_insertf128_pd(_mm256_castpd128_pd256(h01m), h23m, 1);
//        FVec<AVX, double> vxp = _mm256_insertf128_pd(_mm256_castpd128_pd256(h01p), h23p, 1);

        IVec<AVX, double> i(u.vec);
        IVec<AVX, double> vlem = vz < vxm;
        IVec<AVX, double> vlep = vz < vxp;
        i = i + vlem + vlep;
        i.extractLo32s().store(pr);
    }