in lib/compress/zstd_lazy.c [1100:1275]
size_t ZSTD_RowFindBestMatch(
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 mls, const ZSTD_dictMode_e dictMode,
const U32 rowLog)
{
U32* const hashTable = ms->hashTable;
U16* const tagTable = ms->tagTable;
U32* const hashCache = ms->hashCache;
const U32 hashLog = ms->rowHashLog;
const ZSTD_compressionParameters* const cParams = &ms->cParams;
const BYTE* const base = ms->window.base;
const BYTE* const dictBase = ms->window.dictBase;
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const U32 curr = (U32)(ip-base);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 lowestValid = ms->window.lowLimit;
const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
const U32 isDictionary = (ms->loadedDictEnd != 0);
const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
const U32 rowEntries = (1U << rowLog);
const U32 rowMask = rowEntries - 1;
const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
U32 nbAttempts = 1U << cappedSearchLog;
size_t ml=4-1;
/* DMS/DDS variables that may be referenced laster */
const ZSTD_matchState_t* const dms = ms->dictMatchState;
/* Initialize the following variables to satisfy static analyzer */
size_t ddsIdx = 0;
U32 ddsExtraAttempts = 0; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
U32 dmsTag = 0;
U32* dmsRow = NULL;
BYTE* dmsTagRow = NULL;
if (dictMode == ZSTD_dedicatedDictSearch) {
const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
{ /* Prefetch DDS hashtable entry */
ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG;
PREFETCH_L1(&dms->hashTable[ddsIdx]);
}
ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0;
}
if (dictMode == ZSTD_dictMatchState) {
/* Prefetch DMS rows */
U32* const dmsHashTable = dms->hashTable;
U16* const dmsTagTable = dms->tagTable;
U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow);
dmsRow = dmsHashTable + dmsRelRow;
ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog);
}
/* Update the hashTable and tagTable up to (but not including) ip */
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
{ /* Get the hash for ip, compute the appropriate row */
U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
U32* const row = hashTable + relRow;
BYTE* tagRow = (BYTE*)(tagTable + relRow);
U32 const head = *tagRow & rowMask;
U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
size_t numMatches = 0;
size_t currMatch = 0;
ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
/* Cycle through the matches and prefetch */
for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
U32 const matchIndex = row[matchPos];
assert(numMatches < rowEntries);
if (matchIndex < lowLimit)
break;
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
PREFETCH_L1(base + matchIndex);
} else {
PREFETCH_L1(dictBase + matchIndex);
}
matchBuffer[numMatches++] = matchIndex;
}
/* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
in ZSTD_row_update_internal() at the next search. */
{
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
row[pos] = ms->nextToUpdate++;
}
/* Return the longest match */
for (; currMatch < numMatches; ++currMatch) {
U32 const matchIndex = matchBuffer[currMatch];
size_t currentMl=0;
assert(matchIndex < curr);
assert(matchIndex >= lowLimit);
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
const BYTE* const match = base + matchIndex;
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
if (match[ml] == ip[ml]) /* potentially better */
currentMl = ZSTD_count(ip, match, iLimit);
} else {
const BYTE* const match = dictBase + matchIndex;
assert(match+4 <= dictEnd);
if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
}
/* Save best solution */
if (currentMl > ml) {
ml = currentMl;
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
}
}
}
assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
if (dictMode == ZSTD_dedicatedDictSearch) {
ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms,
ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
} else if (dictMode == ZSTD_dictMatchState) {
/* TODO: Measure and potentially add prefetching to DMS */
const U32 dmsLowestIndex = dms->window.dictLimit;
const BYTE* const dmsBase = dms->window.base;
const BYTE* const dmsEnd = dms->window.nextSrc;
const U32 dmsSize = (U32)(dmsEnd - dmsBase);
const U32 dmsIndexDelta = dictLimit - dmsSize;
{ U32 const head = *dmsTagRow & rowMask;
U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
size_t numMatches = 0;
size_t currMatch = 0;
ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
U32 const matchIndex = dmsRow[matchPos];
if (matchIndex < dmsLowestIndex)
break;
PREFETCH_L1(dmsBase + matchIndex);
matchBuffer[numMatches++] = matchIndex;
}
/* Return the longest match */
for (; currMatch < numMatches; ++currMatch) {
U32 const matchIndex = matchBuffer[currMatch];
size_t currentMl=0;
assert(matchIndex >= dmsLowestIndex);
assert(matchIndex < curr);
{ const BYTE* const match = dmsBase + matchIndex;
assert(match+4 <= dmsEnd);
if (MEM_read32(match) == MEM_read32(ip))
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
}
if (currentMl > ml) {
ml = currentMl;
assert(curr > matchIndex + dmsIndexDelta);
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
if (ip+currentMl == iLimit) break;
}
}
}
}
return ml;
}