thrift/lib/cpp/util/VarintUtils-inl.h

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifdef __BMI2__ #include <immintrin.h> #endif #include <array> #include <folly/Utility.h> #include <folly/io/Cursor.h> #include <folly/lang/Bits.h> namespace apache { namespace thrift { namespace util { namespace detail { template <class T, class CursorT> void readVarintSlow(CursorT& c, T& value) { // ceil(sizeof(T) * 8) / 7 static const size_t maxSize = (8 * sizeof(T) + 6) / 7; T retVal = 0; uint8_t shift = 0; uint8_t rsize = 0; while (true) { uint8_t byte = c.template read<uint8_t>(); rsize++; retVal |= (uint64_t)(byte & 0x7f) << shift; shift += 7; if (!(byte & 0x80)) { value = retVal; return; } if (rsize >= maxSize) { // Too big for return type throw std::out_of_range("invalid varint read"); } } } // This is a simple function that just throws an exception. It is defined out // line to make the caller (readVarint) smaller and simpler (assembly-wise), // which gives us 5% perf win (even when the exception is not actually thrown). [[noreturn]] void throwInvalidVarint(); template <class T, class CursorT> void readVarintMediumSlow(CursorT& c, T& value, const uint8_t* p, size_t len) { enum { maxSize = (8 * sizeof(T) + 6) / 7 }; // check that the available data is more than the longest possible varint or // that the last available byte ends a varint if (FOLLY_LIKELY(len >= maxSize || (len > 0 && !(p[len - 1] & 0x80)))) { uint64_t result; const uint8_t* start = p; do { uint64_t byte; // byte is uint64_t so that all shifts are 64-bit // clang-format off byte = *p++; result = (byte & 0x7f); if (!(byte & 0x80)) break; byte = *p++; result |= (byte & 0x7f) << 7; if (!(byte & 0x80)) break; if (sizeof(T) <= 1) throwInvalidVarint(); byte = *p++; result |= (byte & 0x7f) << 14; if (!(byte & 0x80)) break; if (sizeof(T) <= 2) throwInvalidVarint(); byte = *p++; result |= (byte & 0x7f) << 21; if (!(byte & 0x80)) break; byte = *p++; result |= (byte & 0x7f) << 28; if (!(byte & 0x80)) break; if (sizeof(T) <= 4) throwInvalidVarint(); byte = *p++; result |= (byte & 0x7f) << 35; if (!(byte & 0x80)) break; byte = *p++; result |= (byte & 0x7f) << 42; if (!(byte & 0x80)) break; byte = *p++; result |= (byte & 0x7f) << 49; if (!(byte & 0x80)) break; byte = *p++; result |= (byte & 0x7f) << 56; if (!(byte & 0x80)) break; byte = *p++; result |= (byte & 0x7f) << 63; if (!(byte & 0x80)) break; // clang-format on throwInvalidVarint(); } while (false); value = static_cast<T>(result); c.skipNoAdvance(p - start); } else { readVarintSlow(c, value); } } } // namespace detail template <class T, class CursorT> void readVarint(CursorT& c, T& value) { const uint8_t* p = c.data(); size_t len = c.length(); if (len > 0 && !(*p & 0x80)) { value = static_cast<T>(*p); c.skipNoAdvance(1); } else { detail::readVarintMediumSlow(c, value, p, len); } } template <class T, class CursorT> T readVarint(CursorT& c) { T value; readVarint(c, value); return value; } namespace detail { // Cursor class must have ensure() and append() (e.g. QueueAppender) template <class Cursor, class T> uint8_t writeVarintSlow(Cursor& c, T value) { enum { maxSize = (8 * sizeof(T) + 6) / 7 }; auto unval = folly::to_unsigned(value); c.ensure(maxSize); uint8_t* p = c.writableData(); uint8_t* orig_p = p; // precondition: (value & ~0x7f) != 0 do { // clang-format off *p++ = ((unval & 0x7f) | 0x80); unval = unval >> 7; if ((unval & ~0x7f) == 0) break; *p++ = ((unval & 0x7f) | 0x80); unval = unval >> 7; if ((unval & ~0x7f) == 0) break; *p++ = ((unval & 0x7f) | 0x80); unval = unval >> 7; if ((unval & ~0x7f) == 0) break; *p++ = ((unval & 0x7f) | 0x80); unval = unval >> 7; if ((unval & ~0x7f) == 0) break; *p++ = ((unval & 0x7f) | 0x80); unval = unval >> 7; if ((unval & ~0x7f) == 0) break; *p++ = ((unval & 0x7f) | 0x80); unval = unval >> 7; if ((unval & ~0x7f) == 0) break; *p++ = ((unval & 0x7f) | 0x80); unval = unval >> 7; if ((unval & ~0x7f) == 0) break; *p++ = ((unval & 0x7f) | 0x80); unval = unval >> 7; if ((unval & ~0x7f) == 0) break; *p++ = ((unval & 0x7f) | 0x80); unval = unval >> 7; // clang-format on } while (false); *p++ = static_cast<uint8_t>(unval); c.append(p - orig_p); return static_cast<uint8_t>(p - orig_p); } } // namespace detail template <class Cursor, class T> uint8_t writeVarintUnrolled(Cursor& c, T value) { if (FOLLY_LIKELY((value & ~0x7f) == 0)) { c.template write<uint8_t>(static_cast<uint8_t>(value)); return 1; } return detail::writeVarintSlow(c, value); } #ifdef __BMI2__ template <class Cursor, class T> uint8_t writeVarintBMI2(Cursor& c, T valueS) { auto value = folly::to_unsigned(valueS); if (FOLLY_LIKELY((value & ~0x7f) == 0)) { c.template write<uint8_t>(static_cast<uint8_t>(value)); return 1; } if /* constexpr */ (sizeof(T) == 1) { c.template write<uint16_t>(static_cast<uint16_t>(value | 0x100)); return 2; } constexpr uint64_t kMask = 0x8080808080808080ULL; static const std::array<uint8_t, 64> kShift = []() { std::array<uint8_t, 64> v = {}; for (size_t i = 0; i < 64; i++) { uint8_t byteShift = folly::to_narrow(i == 0 ? 0 : (8 - ((63 - i) / 7))); v[i] = byteShift * 8; } return v; }(); static const std::array<uint8_t, 64> kSize = []() { std::array<uint8_t, 64> v = {}; for (size_t i = 0; i < 64; i++) { v[i] = folly::to_narrow(((63 - i) / 7) + 1); } return v; }(); auto clzll = __builtin_clzll(static_cast<uint64_t>(value)); // Only the first 56 bits of @value will be deposited in @v. uint64_t v = _pdep_u64(value, ~kMask) | (kMask >> kShift[clzll]); uint8_t size = kSize[clzll]; if /* constexpr */ (sizeof(T) < sizeof(uint64_t)) { c.template write<uint64_t>(v, size); } else { // Ensure max encoding space for u64 varint (10 bytes). // Write 56 bits using pdep and the other 8 bits manually. // Write 10B to @c, but only update the size using @size. // (Writing exta bytes is faster than branching based on @size.) c.ensure(10); uint8_t* p = c.writableData(); folly::storeUnaligned<uint64_t>(p, v); p[sizeof(uint64_t) + 0] = static_cast<uint64_t>(value) >> 56; p[sizeof(uint64_t) + 1] = 1; c.append(size); } return size; } template <class Cursor, class T> uint8_t writeVarint(Cursor& c, T value) { return writeVarintBMI2(c, value); } #else // __BMI2__ template <class Cursor, class T> uint8_t writeVarint(Cursor& c, T value) { return writeVarintUnrolled(c, value); } #endif // __BMI2__ inline int32_t zigzagToI32(uint32_t n) { return (n & 1) ? ~(n >> 1) : (n >> 1); } inline int64_t zigzagToI64(uint64_t n) { return (n & 1) ? ~(n >> 1) : (n >> 1); } constexpr inline uint32_t i32ToZigzag(const int32_t n) { return (static_cast<uint32_t>(n) << 1) ^ static_cast<uint32_t>(n >> 31); } constexpr inline uint64_t i64ToZigzag(const int64_t l) { return (static_cast<uint64_t>(l) << 1) ^ static_cast<uint64_t>(l >> 63); } } // namespace util } // namespace thrift } // namespace apache

thrift/lib/cpp/util/VarintUtils-inl.h (179 lines of code) (raw):