in lib/dictBuilder/zdict.c [170:337]
static dictItem ZDICT_analyzePos(
BYTE* doneMarks,
const int* suffix, U32 start,
const void* buffer, U32 minRatio, U32 notificationLevel)
{
U32 lengthList[LLIMIT] = {0};
U32 cumulLength[LLIMIT] = {0};
U32 savings[LLIMIT] = {0};
const BYTE* b = (const BYTE*)buffer;
size_t maxLength = LLIMIT;
size_t pos = (size_t)suffix[start];
U32 end = start;
dictItem solution;
/* init */
memset(&solution, 0, sizeof(solution));
doneMarks[pos] = 1;
/* trivial repetition cases */
if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2))
||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
/* skip and mark segment */
U16 const pattern16 = MEM_read16(b+pos+4);
U32 u, patternEnd = 6;
while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
for (u=1; u<patternEnd; u++)
doneMarks[pos+u] = 1;
return solution;
}
/* look forward */
{ size_t length;
do {
end++;
length = ZDICT_count(b + pos, b + suffix[end]);
} while (length >= MINMATCHLENGTH);
}
/* look backward */
{ size_t length;
do {
length = ZDICT_count(b + pos, b + *(suffix+start-1));
if (length >=MINMATCHLENGTH) start--;
} while(length >= MINMATCHLENGTH);
}
/* exit if not found a minimum nb of repetitions */
if (end-start < minRatio) {
U32 idx;
for(idx=start; idx<end; idx++)
doneMarks[suffix[idx]] = 1;
return solution;
}
{ int i;
U32 mml;
U32 refinedStart = start;
U32 refinedEnd = end;
DISPLAYLEVEL(4, "\n");
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos);
DISPLAYLEVEL(4, "\n");
for (mml = MINMATCHLENGTH ; ; mml++) {
BYTE currentChar = 0;
U32 currentCount = 0;
U32 currentID = refinedStart;
U32 id;
U32 selectedCount = 0;
U32 selectedID = currentID;
for (id =refinedStart; id < refinedEnd; id++) {
if (b[suffix[id] + mml] != currentChar) {
if (currentCount > selectedCount) {
selectedCount = currentCount;
selectedID = currentID;
}
currentID = id;
currentChar = b[ suffix[id] + mml];
currentCount = 0;
}
currentCount ++;
}
if (currentCount > selectedCount) { /* for last */
selectedCount = currentCount;
selectedID = currentID;
}
if (selectedCount < minRatio)
break;
refinedStart = selectedID;
refinedEnd = refinedStart + selectedCount;
}
/* evaluate gain based on new dict */
start = refinedStart;
pos = suffix[refinedStart];
end = start;
memset(lengthList, 0, sizeof(lengthList));
/* look forward */
{ size_t length;
do {
end++;
length = ZDICT_count(b + pos, b + suffix[end]);
if (length >= LLIMIT) length = LLIMIT-1;
lengthList[length]++;
} while (length >=MINMATCHLENGTH);
}
/* look backward */
{ size_t length = MINMATCHLENGTH;
while ((length >= MINMATCHLENGTH) & (start > 0)) {
length = ZDICT_count(b + pos, b + suffix[start - 1]);
if (length >= LLIMIT) length = LLIMIT - 1;
lengthList[length]++;
if (length >= MINMATCHLENGTH) start--;
}
}
/* largest useful length */
memset(cumulLength, 0, sizeof(cumulLength));
cumulLength[maxLength-1] = lengthList[maxLength-1];
for (i=(int)(maxLength-2); i>=0; i--)
cumulLength[i] = cumulLength[i+1] + lengthList[i];
for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break;
maxLength = i;
/* reduce maxLength in case of final into repetitive data */
{ U32 l = (U32)maxLength;
BYTE const c = b[pos + maxLength-1];
while (b[pos+l-2]==c) l--;
maxLength = l;
}
if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */
/* calculate savings */
savings[5] = 0;
for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
solution.pos = (U32)pos;
solution.length = (U32)maxLength;
solution.savings = savings[maxLength];
/* mark positions done */
{ U32 id;
for (id=start; id<end; id++) {
U32 p, pEnd, length;
U32 const testedPos = (U32)suffix[id];
if (testedPos == pos)
length = solution.length;
else {
length = (U32)ZDICT_count(b+pos, b+testedPos);
if (length > solution.length) length = solution.length;
}
pEnd = (U32)(testedPos + length);
for (p=testedPos; p<pEnd; p++)
doneMarks[p] = 1;
} } }
return solution;
}