in mcrouter/lib/Lz4Immutable.cpp [282:475]
size_t Lz4Immutable::compressCommon(
IovecCursor source,
uint8_t* output,
size_t maxOutputSize) const {
assert(source.totalLength() <= kMaxInputSize && "check max size first");
assert(
maxOutputSize >= compressBound(source.totalLength()) &&
"check available space first");
// Creates a match cursor - a cursor that will keep track of matches
// found in the dictionary.
struct iovec dicIov = getDictionaryIovec(state_);
const IovecCursor dicCursor(&dicIov, 1);
// The difference between the dictionary size and the max we can look back
// to find a match (64KB).
// It is used to see if a match is valid to be used (it has to
// be, at most, 64KB "behind" the data we are compresing right now).
const size_t dictionaryDiff = kMaxDictionarySize - dicCursor.totalLength();
// Upper limit of where we can look for a match.
const size_t matchFindLimit = source.totalLength() - kMatchFindLimit;
// Upper limit of where a match can go.
const size_t matchLimit = source.totalLength() - kLastLiterals;
// Lower and upper limit to where the output buffer can go.
const uint8_t* const outputStart = output;
const uint8_t* const outputLimit = output + maxOutputSize;
(void)outputLimit;
// Controls the compression main loop.
bool running = true;
// Next position (0..sourceSize] in source that was not
// yet written to destination buffer.
IovecCursor anchorCursor = source;
// Cursor that points to current match.
IovecCursor match = dicCursor;
if (source.totalLength() < kMinInputSize) {
// Not enough data to compress. Don't even enter the compress loop,
// skip directly to the part that encodes the last literals.
running = false;
} else {
// Skip first byte.
source.advance(1);
}
// Main loop
while (running) {
// LZ4 token
uint8_t* token;
// Find a match
{
size_t step = 0;
size_t searchMatchNumBytes = 1 << kSkipTrigger;
do {
// Advance cursor and calculate next step.
source.advance(step);
step = (searchMatchNumBytes++ >> kSkipTrigger);
// Hash current position
uint32_t hash = hashPosition(source);
// Verify if the current position in the source buffer
// can still be compressed.
if (UNLIKELY(source.tell() + step > matchFindLimit) ||
UNLIKELY(source.tell() > kMaxDictionarySize)) {
running = false;
break;
}
uint32_t matchPos = getPositionOnHash(state_.table, hash);
match.seek(matchPos);
} while (((match.tell() + dictionaryDiff) <= source.tell()) ||
(match.peek<uint32_t>() != source.peek<uint32_t>()));
if (!running) {
break;
}
}
// Catch up - try to expand the match backwards.
while (source.tell() > anchorCursor.tell() && match.tell() > 0) {
source.retreat(1);
match.retreat(1);
if (LIKELY(source.peek<uint8_t>() != match.peek<uint8_t>())) {
source.advance(1);
match.advance(1);
break;
}
}
// Write literal
{
size_t literalLen = source.tell() - anchorCursor.tell();
token = output++;
// Check output limit
assert(
output + literalLen + (2 + 1 + kLastLiterals) + (literalLen / 255) <=
outputLimit);
// Encode literal length
if (literalLen >= kRunMask) {
int len = static_cast<int>(literalLen - kRunMask);
*token = (kRunMask << kMlBits);
for (; len >= 255; len -= 255) {
*output++ = 255;
}
*output++ = static_cast<uint8_t>(len);
} else {
*token = static_cast<uint8_t>(literalLen << kMlBits);
}
// Copy literals to output buffer.
wildCopy(output, anchorCursor, literalLen);
output += literalLen;
}
// Encode offset
uint16_t offset = dicCursor.totalLength() - match.tell() + source.tell();
writeLE(output, static_cast<uint16_t>(offset));
output += 2;
// Encode matchLength
{
// we cannot go past the dictionary
size_t posLimit =
source.tell() + (dicCursor.totalLength() - match.tell());
// Nor go past the source buffer
posLimit = std::min(posLimit, matchLimit);
source.advance(kMinMatch);
match.advance(kMinMatch);
size_t matchLen = calculateMatchLength(source, match, posLimit);
assert(output + (1 + kLastLiterals) + (matchLen >> 8) <= outputLimit);
// Write match length.
if (matchLen >= kMlMask) {
*token += kMlMask;
matchLen -= kMlMask;
for (; matchLen >= 510; matchLen -= 510) {
*output++ = 255;
*output++ = 255;
}
if (matchLen >= 255) {
matchLen -= 255;
*output++ = 255;
}
*output++ = static_cast<uint8_t>(matchLen);
} else {
*token += static_cast<uint8_t>(matchLen);
}
}
// Update anchor
anchorCursor.seek(source.tell());
// Test end of chunk
if (source.tell() > matchFindLimit) {
break;
}
}
// Encode last literals
{
size_t lastRun = source.totalLength() - anchorCursor.tell();
assert(
(output - outputStart) + lastRun + 1 +
((lastRun + 255 - kRunMask) / 255) <=
maxOutputSize);
if (lastRun >= kRunMask) {
size_t accumulator = lastRun - kRunMask;
*output++ = kRunMask << kMlBits;
for (; accumulator >= 255; accumulator -= 255) {
*output++ = 255;
}
*output++ = static_cast<uint8_t>(accumulator);
} else {
*output++ = static_cast<uint8_t>(lastRun << kMlBits);
}
safeCopy(output, anchorCursor, lastRun);
output += lastRun;
}
return output - outputStart;
}