in include/Algo-Direct2.h [105:138]
        void resolve(const FVec<SSE, double>& vz, const IVec<SSE, float>& bidx, uint32 *pr) const
    {
        const uint32* buckets = reinterpret_cast<const uint32 *>(base_t::data.buckets);
        const double *xi = base_t::data.xi;
        uint32 b1 = buckets[bidx.get1()];
        uint32 b0 = buckets[bidx.get0()];
        const double *p1 = &xi[b1];
        const double *p0 = &xi[b0];
        // read pairs ( X(t-1), X(t) )
        __m128d vx1 = _mm_loadu_pd(p1);
        __m128d vx0 = _mm_loadu_pd(p0);
        // build:
        // { X(t(0)-1), X(t(1)-1) }
        // { X(t(0)),   X(t(1)) }
        __m128d vxm = _mm_shuffle_pd(vx0, vx1, 0);
        __m128d vxp = _mm_shuffle_pd(vx0, vx1, 3);
        IVec<SSE, double> i(b1, b0);
        IVec<SSE, double> vlem = (vz < vxm);
        IVec<SSE, double> vlep = (vz < vxp);
        i = i + vlem + vlep;
        union {
            __m128i vec;
            uint32 ui32[4];
        } u;
        u.vec = i;
        pr[0] = u.ui32[0];
        pr[1] = u.ui32[2];
    }