void resolve()

in include/Algo-Direct2.h [57:100]


        void resolve(const FVec<SSE, float>& vz, const IVec<SSE, float>& bidx, uint32 *pr) const
    {
        union U {
            __m128i vec;
            uint32 ui32[4];
        } u;

        const uint32* buckets = reinterpret_cast<const uint32 *>(base_t::data.buckets);
        const float *xi = base_t::data.xi;

        // read indices t
        const double *p3 = reinterpret_cast<const double *>(&xi[(u.ui32[3] = buckets[bidx.get3()])]);
        const double *p2 = reinterpret_cast<const double *>(&xi[(u.ui32[2] = buckets[bidx.get2()])]);
        const double *p1 = reinterpret_cast<const double *>(&xi[(u.ui32[1] = buckets[bidx.get1()])]);
        const double *p0 = reinterpret_cast<const double *>(&xi[(u.ui32[0] = buckets[bidx.get0()])]);

#if 0
        // read pairs ( X(t-1), X(t) )
        __m128 xp3 = _mm_castpd_ps(_mm_load_sd(p3));
        __m128 xp2 = _mm_castpd_ps(_mm_load_sd(p2));
        __m128 xp1 = _mm_castpd_ps(_mm_load_sd(p1));
        __m128 xp0 = _mm_castpd_ps(_mm_load_sd(p0));

        // build:
        // { X(t(0)-1), X(t(1)-1), X(t(2)-1), X(t(3)-1) }
        // { X(t(0)),   X(t(1)),   X(t(2)),   X(t(3)) }
        __m128 h13 = _mm_shuffle_ps(xp1, xp3, (1 << 2) + (1 << 6));
        __m128 h02 = _mm_shuffle_ps(xp0, xp2, (1 << 2) + (1 << 6));
        __m128 u01 = _mm_unpacklo_ps(h02, h13);
        __m128 u23 = _mm_unpackhi_ps(h02, h13);
        __m128 vxm = _mm_shuffle_ps(u01, u23, (0) + (1 << 2) + (0 << 4) + (1 << 6));
        __m128 vxp = _mm_shuffle_ps(u01, u23, (2) + (3 << 2) + (2 << 4) + (3 << 6));
#else
        __m128 xp23 = _mm_castpd_ps(_mm_set_pd(*p3, *p2));
        __m128 xp01 = _mm_castpd_ps(_mm_set_pd(*p1, *p0));
        __m128 vxm = _mm_shuffle_ps(xp01, xp23, (0) + (2 << 2) + (0 << 4) + (2 << 6));
        __m128 vxp = _mm_shuffle_ps(xp01, xp23, (1) + (3 << 2) + (1 << 4) + (3 << 6));
#endif
        IVec<SSE, float> i(u.vec);
        IVec<SSE, float> vlem = vz < vxm;
        IVec<SSE, float> vlep = vz < vxp;
        i = i + vlem + vlep;
        i.store(pr);
    }