static dictItem ZDICT_analyzePos()

in lib/dictBuilder/zdict.c [170:337]


static dictItem ZDICT_analyzePos(
                       BYTE* doneMarks,
                       const int* suffix, U32 start,
                       const void* buffer, U32 minRatio, U32 notificationLevel)
{
    U32 lengthList[LLIMIT] = {0};
    U32 cumulLength[LLIMIT] = {0};
    U32 savings[LLIMIT] = {0};
    const BYTE* b = (const BYTE*)buffer;
    size_t maxLength = LLIMIT;
    size_t pos = (size_t)suffix[start];
    U32 end = start;
    dictItem solution;

    /* init */
    memset(&solution, 0, sizeof(solution));
    doneMarks[pos] = 1;

    /* trivial repetition cases */
    if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2))
       ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
       ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
        /* skip and mark segment */
        U16 const pattern16 = MEM_read16(b+pos+4);
        U32 u, patternEnd = 6;
        while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
        if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
        for (u=1; u<patternEnd; u++)
            doneMarks[pos+u] = 1;
        return solution;
    }

    /* look forward */
    {   size_t length;
        do {
            end++;
            length = ZDICT_count(b + pos, b + suffix[end]);
        } while (length >= MINMATCHLENGTH);
    }

    /* look backward */
    {   size_t length;
        do {
            length = ZDICT_count(b + pos, b + *(suffix+start-1));
            if (length >=MINMATCHLENGTH) start--;
        } while(length >= MINMATCHLENGTH);
    }

    /* exit if not found a minimum nb of repetitions */
    if (end-start < minRatio) {
        U32 idx;
        for(idx=start; idx<end; idx++)
            doneMarks[suffix[idx]] = 1;
        return solution;
    }

    {   int i;
        U32 mml;
        U32 refinedStart = start;
        U32 refinedEnd = end;

        DISPLAYLEVEL(4, "\n");
        DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u  ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos);
        DISPLAYLEVEL(4, "\n");

        for (mml = MINMATCHLENGTH ; ; mml++) {
            BYTE currentChar = 0;
            U32 currentCount = 0;
            U32 currentID = refinedStart;
            U32 id;
            U32 selectedCount = 0;
            U32 selectedID = currentID;
            for (id =refinedStart; id < refinedEnd; id++) {
                if (b[suffix[id] + mml] != currentChar) {
                    if (currentCount > selectedCount) {
                        selectedCount = currentCount;
                        selectedID = currentID;
                    }
                    currentID = id;
                    currentChar = b[ suffix[id] + mml];
                    currentCount = 0;
                }
                currentCount ++;
            }
            if (currentCount > selectedCount) {  /* for last */
                selectedCount = currentCount;
                selectedID = currentID;
            }

            if (selectedCount < minRatio)
                break;
            refinedStart = selectedID;
            refinedEnd = refinedStart + selectedCount;
        }

        /* evaluate gain based on new dict */
        start = refinedStart;
        pos = suffix[refinedStart];
        end = start;
        memset(lengthList, 0, sizeof(lengthList));

        /* look forward */
        {   size_t length;
            do {
                end++;
                length = ZDICT_count(b + pos, b + suffix[end]);
                if (length >= LLIMIT) length = LLIMIT-1;
                lengthList[length]++;
            } while (length >=MINMATCHLENGTH);
        }

        /* look backward */
        {   size_t length = MINMATCHLENGTH;
            while ((length >= MINMATCHLENGTH) & (start > 0)) {
                length = ZDICT_count(b + pos, b + suffix[start - 1]);
                if (length >= LLIMIT) length = LLIMIT - 1;
                lengthList[length]++;
                if (length >= MINMATCHLENGTH) start--;
            }
        }

        /* largest useful length */
        memset(cumulLength, 0, sizeof(cumulLength));
        cumulLength[maxLength-1] = lengthList[maxLength-1];
        for (i=(int)(maxLength-2); i>=0; i--)
            cumulLength[i] = cumulLength[i+1] + lengthList[i];

        for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break;
        maxLength = i;

        /* reduce maxLength in case of final into repetitive data */
        {   U32 l = (U32)maxLength;
            BYTE const c = b[pos + maxLength-1];
            while (b[pos+l-2]==c) l--;
            maxLength = l;
        }
        if (maxLength < MINMATCHLENGTH) return solution;   /* skip : no long-enough solution */

        /* calculate savings */
        savings[5] = 0;
        for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
            savings[i] = savings[i-1] + (lengthList[i] * (i-3));

        DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f)  \n",
                     (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);

        solution.pos = (U32)pos;
        solution.length = (U32)maxLength;
        solution.savings = savings[maxLength];

        /* mark positions done */
        {   U32 id;
            for (id=start; id<end; id++) {
                U32 p, pEnd, length;
                U32 const testedPos = (U32)suffix[id];
                if (testedPos == pos)
                    length = solution.length;
                else {
                    length = (U32)ZDICT_count(b+pos, b+testedPos);
                    if (length > solution.length) length = solution.length;
                }
                pEnd = (U32)(testedPos + length);
                for (p=testedPos; p<pEnd; p++)
                    doneMarks[p] = 1;
    }   }   }

    return solution;
}