sql_utils/base/bits.h (533 lines of code) (raw):
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_BASE_BITS_H_
#define THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_BASE_BITS_H_
#include "absl/base/casts.h"
#include "absl/numeric/int128.h"
#if defined(__i386__) || defined(__x86_64__)
#include <x86intrin.h>
#endif
#include <type_traits>
#include "gtest/gtest_prod.h"
#include "sql_utils/base/endian.h"
#include "sql_utils/base/logging.h"
namespace bigquery_ml_utils_base {
class Bits {
public:
// A traits class template for unsigned integer type sizes. Primary
// information contained herein is corresponding (unsigned) integer type.
// E.g. UnsignedTypeBySize<32>::Type is uint32_t. Used by UnsignedType.
template<int size /* in bytes */>
struct UnsignedTypeBySize;
// Auxiliary struct for figuring out an unsigned type for a given type.
template<typename T> struct UnsignedType {
typedef typename UnsignedTypeBySize<sizeof(T)>::Type Type;
};
// Return the number of one bits in the given integer.
static int CountOnesInByte(unsigned char n);
static int CountOnes(uint32_t n) {
#if defined(__powerpc64__) && defined(__GNUC__)
// Use popcount builtin if we know it is inlined and fast.
return PopcountWithBuiltin(n);
#elif (defined(__i386__) || defined(__x86_64__)) && defined(__POPCNT__) && \
defined(__GNUC__)
return PopcountWithBuiltin(n);
#else
n -= ((n >> 1) & 0x55555555);
n = ((n >> 2) & 0x33333333) + (n & 0x33333333);
return static_cast<int>((((n + (n >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24);
#endif
}
// Count bits using sideways addition [WWG'57]. See Knuth TAOCP v4 7.1.3(59)
static inline int CountOnes64(uint64_t n) {
#if defined(__powerpc64__) && defined(__GNUC__)
return PopcountWithBuiltin(n);
#elif defined(__x86_64__) && defined(__POPCNT__) && defined(__GNUC__)
return PopcountWithBuiltin(n);
#elif defined(_LP64)
n -= (n >> 1) & 0x5555555555555555ULL;
n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
return static_cast<int>(
(((n + (n >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
#else
return CountOnes(n >> 32) + CountOnes(n & 0xffffffff);
#endif
}
// Count bits in uint128
static inline int CountOnes128(absl::uint128 n) {
return Bits::CountOnes64(absl::Uint128High64(n)) +
Bits::CountOnes64(absl::Uint128Low64(n));
}
// Count leading zeroes. This is similar to wordsize - 1 - floor(log2(n)).
// Returns number of bits if n is 0.
static inline int CountLeadingZeros32(uint32_t n) {
// Instead of using __builtin_clz(), we explicitly write target specific
// assembly because we want to handle n == 0. If we used __builtin_clz(),
// we would need to use something like "n ? __builtin_clz(n) : 32". The
// check is not necessary on POWER and aarch64 but we cannot depend on
// that because __builtin_clz(0) is documented to be undefined.
#if defined(__aarch64__) && defined(__GNUC__)
int32_t count;
asm("clz %w0,%w1" : "=r"(count) : "r"(n));
return count;
#elif (defined(__i386__) || defined(__x86_64__)) && defined(__LZCNT__) && \
defined(__GNUC__)
return __lzcnt32(n);
#elif (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
if (n == 0) return 32;
int32_t idx;
asm("bsr %1, %0"
: "=r"(idx)
: "ro"(n)
: "cc"); // bsr writes Z flag
return 31 ^ idx;
#elif defined(__powerpc64__) && defined(__GNUC__)
int32_t count;
asm("cntlzw %0,%1" : "=r"(count) : "r"(n));
return count;
#elif defined(__GNUC__)
return CountLeadingZerosWithBuiltin(n);
#else
return CountLeadingZeros32_Portable(n);
#endif
}
static inline int CountLeadingZeros64(uint64_t n) {
#if defined(__aarch64__) && defined(__GNUC__)
int64_t count;
asm("clz %0,%1" : "=r"(count) : "r"(n));
return static_cast<int>(count);
#elif defined(__powerpc64__) && defined(__GNUC__)
int64_t count;
asm("cntlzd %0,%1" : "=r"(count) : "r"(n));
return static_cast<int>(count);
#elif (defined(__i386__) || defined(__x86_64__)) && defined(__LZCNT__) && \
defined(__GNUC__)
return __lzcnt64(n);
#elif defined(__x86_64__) && defined(__GNUC__)
if (n == 0) return 64;
int64_t idx;
asm ("bsr %1, %0"
: "=r"(idx)
: "ro"(n)
: "cc"); // bsr writes Z flag
return static_cast<int>(63 ^ idx);
#elif defined(__GNUC__)
return CountLeadingZerosWithBuiltin(n);
#else
return CountLeadingZeros64_Portable(n);
#endif
}
static inline int CountLeadingZeros128(absl::uint128 n) {
if (uint64_t hi = absl::Uint128High64(n))
return Bits::CountLeadingZeros64(hi);
return Bits::CountLeadingZeros64(absl::Uint128Low64(n)) + 64;
}
// Reverse the bits in the given integer.
static uint8_t ReverseBits8(uint8_t n);
static uint32_t ReverseBits32(uint32_t n);
static uint64_t ReverseBits64(uint64_t n);
static absl::uint128 ReverseBits128(absl::uint128 n);
// Return the number of one bits in the byte sequence.
static int Count(const void *m, int num_bytes);
// Return the number of different bits in the given byte sequences.
// (i.e., the Hamming distance)
static int Difference(const void *m1, const void *m2, int num_bytes);
// Return the number of different bits in the given byte sequences,
// up to a maximum. Values larger than the maximum may be returned
// (because multiple bits are checked at a time), but the function
// may exit early if the cap is exceeded.
static int CappedDifference(const void *m1, const void *m2,
int num_bytes, int cap);
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
static int Log2Floor(uint32_t n);
static int Log2Floor64(uint64_t n);
static int Log2Floor128(absl::uint128 n);
// Potentially faster version of Log2Floor() that returns an
// undefined value if n == 0
static int Log2FloorNonZero(uint32_t n);
static int Log2FloorNonZero64(uint64_t n);
static int Log2FloorNonZero128(absl::uint128 n);
// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
static int Log2Ceiling(uint32_t n);
static int Log2Ceiling64(uint64_t n);
static int Log2Ceiling128(absl::uint128 n);
// Return the first set least / most significant bit, 0-indexed. Returns an
// undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except
// that it's 0-indexed, while FindMSBSetNonZero() is the same as
// Log2FloorNonZero().
static int FindLSBSetNonZero(uint32_t n);
static int FindLSBSetNonZero64(uint64_t n);
static int FindLSBSetNonZero128(absl::uint128 n);
static int FindMSBSetNonZero(uint32_t n) { return Log2FloorNonZero(n); }
static int FindMSBSetNonZero64(uint64_t n) { return Log2FloorNonZero64(n); }
static int FindMSBSetNonZero128(absl::uint128 n) {
return Log2FloorNonZero128(n);
}
// Viewing bytes as a stream of unsigned bytes, does that stream
// contain any byte equal to c?
template <class T> static bool BytesContainByte(T bytes, uint8_t c);
// Viewing bytes as a stream of unsigned bytes, does that stream
// contain any byte b < c?
template <class T> static bool BytesContainByteLessThan(T bytes, uint8_t c);
// Viewing bytes as a stream of unsigned bytes, are all elements of that
// stream in [lo, hi]?
template <class T> static bool BytesAllInRange(T bytes, uint8_t lo, uint8_t hi);
// Extract 'nbits' consecutive bits from 'src'. Position of bits are
// specified by 'offset' from the LSB. 'T' is a scalar type (integral,
// float or pointer) whose size is the same as one of the unsigned types.
// The return type is an unsigned type having the same size as T.
template<typename T>
static typename UnsignedType<T>::Type GetBits(const T src,
const int offset,
const int nbits) {
typedef typename UnsignedType<T>::Type UnsignedT;
const UnsignedT unsigned_src = absl::bit_cast<UnsignedT>(src);
SQL_DCHECK_GT(sizeof(UnsignedT) * 8, offset);
SQL_DCHECK_GE(sizeof(UnsignedT) * 8, offset + nbits);
return GetBitsImpl(unsigned_src, offset, nbits);
}
// Overwrite 'nbits' consecutive bits of 'dest.'. Position of bits are
// specified by an offset from the LSB. 'T' is a scalar type (integral,
// float or pointer) whose size is the same as one of the unsigned types.
template<typename T>
static void SetBits(const typename UnsignedType<T>::Type value,
const int offset,
const int nbits,
T* const dest) {
typedef typename UnsignedType<T>::Type UnsignedT;
const UnsignedT unsigned_dest = absl::bit_cast<UnsignedT>(*dest);
SQL_DCHECK_GT(sizeof(UnsignedT) * 8, offset);
SQL_DCHECK_GE(sizeof(UnsignedT) * 8, offset + nbits);
const UnsignedT mask = NBitsFromLSB<UnsignedT>(nbits);
const UnsignedT unsigned_result =
(unsigned_dest & ~(mask << offset)) | ((value & mask) << offset);
*dest = absl::bit_cast<T>(unsigned_result);
}
// Combine SetBits and GetBits for convenience. This is meant to be a
// replacement for BitCopy() for some use cases. Unlike BitCopy(),
// Bits::CopyBits() operating on multibyte types has the same behavior on
// big-endian and little-endian machines. Sample usage:
//
// uint32_t a, b;
// Bits::CopyBits(&a, 0, b, 12, 3);
template<typename DestType, typename SrcType>
static void CopyBits(DestType* const dest,
const int dest_offset,
const SrcType src,
const int src_offset,
const int nbits) {
const typename UnsignedType<SrcType>::Type value =
GetBits(src, src_offset, nbits);
SetBits(value, dest_offset, nbits, dest);
}
// Extract the lowest 'nbits' consecutive bits from 'src'.
// Bits::GetLowBits(13, 3); /* = 5 (0b1101 => 0b101) */
template<typename T>
static typename UnsignedType<T>::Type GetLowBits(const T src,
const int nbits) {
typedef typename UnsignedType<T>::Type UnsignedT;
const UnsignedT unsigned_src = absl::bit_cast<UnsignedT>(src);
SQL_DCHECK_GE(sizeof(UnsignedT) * 8, nbits);
return GetLowBitsImpl(unsigned_src, nbits);
}
private:
// We only use this for unsigned types and for 0 <= n <= sizeof(UnsignedT).
template<typename UnsignedT>
static UnsignedT NBitsFromLSB(const int nbits) {
const UnsignedT all_ones = ~static_cast<UnsignedT>(0);
return nbits == 0 ? static_cast<UnsignedT>(0)
: all_ones >> (sizeof(UnsignedT) * 8 - nbits);
}
template<typename UnsignedT>
static inline UnsignedT GetBitsImpl(const UnsignedT src,
const int offset,
const int nbits);
template <typename UnsignedT>
static inline UnsignedT GetLowBitsImpl(const UnsignedT src, const int nbits);
#ifdef __GNUC__
static int CountLeadingZerosWithBuiltin(unsigned n);
// NOLINTNEXTLINE(runtime/int)
static int CountLeadingZerosWithBuiltin(unsigned long n);
// NOLINTNEXTLINE(runtime/int)
static int CountLeadingZerosWithBuiltin(unsigned long long n);
static int PopcountWithBuiltin(unsigned n);
static int PopcountWithBuiltin(unsigned long n); // NOLINT(runtime/int)
static int PopcountWithBuiltin(unsigned long long n); // NOLINT(runtime/int)
#if defined(__BMI__) && (defined(__i386__) || defined(__x86_64__))
static inline uint32_t GetBitsImpl(const uint32_t src,
const int offset,
const int nbits);
#endif
#if defined(__BMI__) && defined(__x86_64__)
static inline uint64_t GetBitsImpl(const uint64_t src,
const int offset,
const int nbits);
#endif
#if defined(__BMI2__) && (defined(__i386__) || defined(__x86_64__))
static inline uint32_t GetLowBitsImpl(const uint32_t src, const int nbits);
#endif
#if defined(__BMI2__) && defined(__x86_64__)
static inline uint64_t GetLowBitsImpl(const uint64_t src, const int nbits);
#endif
#endif // __GNUC__
// Portable implementations.
static int Log2Floor_Portable(uint32_t n);
static int Log2Floor64_Portable(uint64_t n);
static int Log2FloorNonZero_Portable(uint32_t n);
static int Log2FloorNonZero64_Portable(uint64_t n);
static int CountLeadingZeros32_Portable(uint32_t n);
static int CountLeadingZeros64_Portable(uint64_t n);
static int FindLSBSetNonZero_Portable(uint32_t n);
static int FindLSBSetNonZero64_Portable(uint64_t n);
static const char num_bits[];
Bits(Bits const&) = delete;
void operator=(Bits const&) = delete;
FRIEND_TEST(Bits, Port64);
FRIEND_TEST(Bits, Port32);
};
// A utility class for some handy bit patterns. The names l and h
// were chosen to match Knuth Volume 4: l is 0x010101... and h is 0x808080...;
// half_ones is ones in the lower half only. We assume sizeof(T) is 1 or even.
template <class T> struct BitPattern {
typedef typename std::make_unsigned<T>::type U;
static const U half_ones = (static_cast<U>(1) << (sizeof(U) * 4)) - 1;
static const U l =
(sizeof(U) == 1) ? 1 : (half_ones / 0xff * (half_ones + 2));
static const U h = ~(l * 0x7f);
};
// ------------------------------------------------------------------------
// Implementation details follow
// ------------------------------------------------------------------------
#if defined(__GNUC__)
inline int Bits::Log2Floor(uint32_t n) {
return n == 0 ? -1 : 31 ^ __builtin_clz(n);
}
inline int Bits::Log2FloorNonZero(uint32_t n) {
return 31 ^ __builtin_clz(n);
}
inline int Bits::FindLSBSetNonZero(uint32_t n) {
return __builtin_ctz(n);
}
inline int Bits::Log2Floor64(uint64_t n) {
return n == 0 ? -1 : 63 ^ __builtin_clzll(n);
}
inline int Bits::Log2FloorNonZero64(uint64_t n) {
return 63 ^ __builtin_clzll(n);
}
inline int Bits::FindLSBSetNonZero64(uint64_t n) {
return __builtin_ctzll(n);
}
#elif defined(COMPILER_MSVC)
inline int Bits::FindLSBSetNonZero(uint32_t n) {
return Bits::FindLSBSetNonZero_Portable(n);
}
inline int Bits::FindLSBSetNonZero64(uint64_t n) {
return Bits::FindLSBSetNonZero64_Portable(n);
}
inline int Bits::Log2FloorNonZero(uint32_t n) {
#ifdef _M_IX86
_asm {
bsr ebx, n
mov n, ebx
}
return n;
#else
return Bits::Log2FloorNonZero_Portable(n);
#endif
}
inline int Bits::Log2Floor(uint32_t n) {
#ifdef _M_IX86
_asm {
xor ebx, ebx
mov eax, n
and eax, eax
jz return_ebx
bsr ebx, eax
return_ebx:
mov n, ebx
}
return n;
#else
return Bits::Log2Floor_Portable(n);
#endif
}
inline int Bits::Log2Floor64(uint64_t n) {
return Bits::Log2Floor64_Portable(n);
}
inline int Bits::Log2FloorNonZero64(uint64_t n) {
return Bits::Log2FloorNonZero64_Portable(n);
}
#else // !__GNUC__ && !COMPILER_MSVC
inline int Bits::Log2Floor(uint32_t n) {
return Bits::Log2Floor_Portable(n);
}
inline int Bits::Log2FloorNonZero(uint32_t n) {
return Bits::Log2FloorNonZero_Portable(n);
}
inline int Bits::FindLSBSetNonZero(uint32_t n) {
return Bits::FindLSBSetNonZero_Portable(n);
}
inline int Bits::Log2Floor64(uint64_t n) {
return Bits::Log2Floor64_Portable(n);
}
inline int Bits::Log2FloorNonZero64(uint64_t n) {
return Bits::Log2FloorNonZero64_Portable(n);
}
inline int Bits::FindLSBSetNonZero64(uint64_t n) {
return Bits::FindLSBSetNonZero64_Portable(n);
}
#endif
inline int Bits::Log2Floor128(absl::uint128 n) {
if (uint64_t hi = absl::Uint128High64(n)) return 64 + Log2FloorNonZero64(hi);
return Log2Floor64(absl::Uint128Low64(n));
}
inline int Bits::Log2FloorNonZero128(absl::uint128 n) {
if (uint64_t hi = absl::Uint128High64(n)) return 64 + Log2FloorNonZero64(hi);
return Log2FloorNonZero64(absl::Uint128Low64(n));
}
inline int Bits::FindLSBSetNonZero128(absl::uint128 n) {
if (uint64_t lo = absl::Uint128Low64(n)) return Bits::FindLSBSetNonZero64(lo);
return 64 + Bits::FindLSBSetNonZero64(absl::Uint128High64(n));
}
inline int Bits::CountOnesInByte(unsigned char n) {
return num_bits[n];
}
inline uint8_t Bits::ReverseBits8(unsigned char n) {
#if defined(__aarch64__) && defined(__GNUC__)
// aarch64 has a reverse bits instruction but there is no gcc builtin.
uint32_t result;
const uint32_t n_shifted = static_cast<uint32_t>(n) << 24;
asm("rbit %w0, %w1" : "=r"(result) : "r"(n_shifted));
return static_cast<uint8_t>(result);
#elif defined (__powerpc64__)
uint64_t temp = n;
// bpermd selects a byte's worth of bits from its second input. Grab one byte
// at a time, in reversed order. 0x3f is the lowest order bit of a 64-bit int.
// Bits 0x0 through 0x37 will all be zero, and bits 0x38 through 0x3f will
// hold the 8 bits from `n`.
uint64_t result = __builtin_bpermd(0x3f3e3d3c3b3a3938, temp);
return static_cast<unsigned char>(result);
#else
n = static_cast<unsigned char>(((n >> 1) & 0x55) | ((n & 0x55) << 1));
n = static_cast<unsigned char>(((n >> 2) & 0x33) | ((n & 0x33) << 2));
return static_cast<unsigned char>(((n >> 4) & 0x0f) | ((n & 0x0f) << 4));
#endif
}
inline uint32_t Bits::ReverseBits32(uint32_t n) {
#if defined(__aarch64__) && defined(__GNUC__)
uint32_t result;
asm("rbit %w0, %w1" : "=r"(result) : "r"(n));
return result;
#elif defined(__powerpc64__)
uint64_t temp = n;
uint64_t result_0 = __builtin_bpermd(0x3f3e3d3c3b3a3938, temp) << 24;
uint64_t result_1 = __builtin_bpermd(0x3736353433323130, temp) << 16;
uint64_t result_2 = __builtin_bpermd(0x2f2e2d2c2b2a2928, temp) << 8;
uint64_t result_3 = __builtin_bpermd(0x2726252423222120, temp);
return static_cast<uint32_t>(result_0 | result_1 | result_2 | result_3);
#else
n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1);
n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2);
n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4);
return bigquery_ml_utils_base::gbswap_32(n);
#endif
}
inline uint64_t Bits::ReverseBits64(uint64_t n) {
#if defined(__aarch64__) && defined(__GNUC__)
uint64_t result;
asm("rbit %0, %1" : "=r"(result) : "r"(n));
return result;
#elif defined(__powerpc64__)
uint64_t result_lo0 = __builtin_bpermd(0x3f3e3d3c3b3a3938, n) << 56;
uint64_t result_lo1 = __builtin_bpermd(0x3736353433323130, n) << 48;
uint64_t result_lo2 = __builtin_bpermd(0x2f2e2d2c2b2a2928, n) << 40;
uint64_t result_lo3 = __builtin_bpermd(0x2726252423222120, n) << 32;
uint64_t result_hi0 = __builtin_bpermd(0x1f1e1d1c1b1a1918, n) << 24;
uint64_t result_hi1 = __builtin_bpermd(0x1716151413121110, n) << 16;
uint64_t result_hi2 = __builtin_bpermd(0x0f0e0d0c0b0a0908, n) << 8;
uint64_t result_hi3 = __builtin_bpermd(0x0706050403020100, n);
return (result_lo0 | result_lo1 | result_lo2 | result_lo3 |
result_hi0 | result_hi1 | result_hi2 | result_hi3);
#elif defined(_LP64)
n = ((n >> 1) & 0x5555555555555555ULL) | ((n & 0x5555555555555555ULL) << 1);
n = ((n >> 2) & 0x3333333333333333ULL) | ((n & 0x3333333333333333ULL) << 2);
n = ((n >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((n & 0x0F0F0F0F0F0F0F0FULL) << 4);
return bigquery_ml_utils_base::gbswap_64(n);
#else
return ReverseBits32( n >> 32 ) |
(static_cast<uint64_t>(ReverseBits32(n & 0xffffffff)) << 32);
#endif
}
inline absl::uint128 Bits::ReverseBits128(absl::uint128 n) {
return absl::MakeUint128(ReverseBits64(absl::Uint128Low64(n)),
ReverseBits64(absl::Uint128High64(n)));
}
inline int Bits::Log2FloorNonZero_Portable(uint32_t n) {
// Just use the common routine
return Log2Floor(n);
}
// Log2Floor64() is defined in terms of Log2Floor32(), Log2FloorNonZero32()
inline int Bits::Log2Floor64_Portable(uint64_t n) {
const uint32_t topbits = static_cast<uint32_t>(n >> 32);
if (topbits == 0) {
// Top bits are zero, so scan in bottom bits
return Log2Floor(static_cast<uint32_t>(n));
} else {
return 32 + Log2FloorNonZero(topbits);
}
}
// Log2FloorNonZero64() is defined in terms of Log2FloorNonZero32()
inline int Bits::Log2FloorNonZero64_Portable(uint64_t n) {
const uint32_t topbits = static_cast<uint32_t>(n >> 32);
if (topbits == 0) {
// Top bits are zero, so scan in bottom bits
return Log2FloorNonZero(static_cast<uint32_t>(n));
} else {
return 32 + Log2FloorNonZero(topbits);
}
}
// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero()
inline int Bits::FindLSBSetNonZero64_Portable(uint64_t n) {
const uint32_t bottombits = static_cast<uint32_t>(n);
if (bottombits == 0) {
// Bottom bits are zero, so scan in top bits
return 32 + FindLSBSetNonZero(static_cast<uint32_t>(n >> 32));
} else {
return FindLSBSetNonZero(bottombits);
}
}
template <class T>
inline bool Bits::BytesContainByteLessThan(T bytes, uint8_t c) {
auto l = BitPattern<T>::l;
auto h = BitPattern<T>::h;
// The c <= 0x80 code is straight out of Knuth Volume 4.
// Usually c will be manifestly constant.
return c <= 0x80 ?
((h & (bytes - l * c) & ~bytes) != 0) :
((((bytes - l * c) | (bytes ^ h)) & h) != 0);
}
template <class T> inline bool Bits::BytesContainByte(T bytes, uint8_t c) {
// Usually c will be manifestly constant.
return Bits::BytesContainByteLessThan<T>(bytes ^ (c * BitPattern<T>::l), 1);
}
template <class T>
inline bool Bits::BytesAllInRange(T bytes, uint8_t lo, uint8_t hi) {
auto l = BitPattern<T>::l;
auto h = BitPattern<T>::h;
// In the common case, lo and hi are manifest constants.
if (lo > hi) {
return false;
}
if (hi - lo < 128) {
auto x = bytes - l * lo;
auto y = bytes + l * (127 - hi);
return ((x | y) & h) == 0;
}
return !Bits::BytesContainByteLessThan(bytes + (255 - hi) * l,
lo + (255 - hi));
}
// Specializations for Bits::UnsignedTypeBySize. For unsupported type
// sizes, a compile-time error will be generated.
template<>
struct Bits::UnsignedTypeBySize<1> {
typedef uint8_t Type;
};
template<>
struct Bits::UnsignedTypeBySize<2> {
typedef uint16_t Type;
};
template<>
struct Bits::UnsignedTypeBySize<4> {
typedef uint32_t Type;
};
template<>
struct Bits::UnsignedTypeBySize<8> {
typedef uint64_t Type;
};
template<>
struct Bits::UnsignedTypeBySize<16> {
typedef absl::uint128 Type;
};
#ifdef __GNUC__
inline int Bits::CountLeadingZerosWithBuiltin(unsigned n) {
if (n == 0) {
return sizeof(n) * 8; // __builtin_clz(0) is undefined.
}
return __builtin_clz(n);
}
// NOLINTNEXTLINE(runtime/int)
inline int Bits::CountLeadingZerosWithBuiltin(unsigned long n) {
if (n == 0) {
return sizeof(n) * 8; // __builtin_clzl(0) is undefined.
}
return __builtin_clzl(n);
}
// NOLINTNEXTLINE(runtime/int)
inline int Bits::CountLeadingZerosWithBuiltin(unsigned long long n) {
if (n == 0) {
return sizeof(n) * 8; // __builtin_clzll(0) is undefined.
}
return __builtin_clzll(n);
}
inline int Bits::PopcountWithBuiltin(unsigned n) {
return __builtin_popcount(n);
}
// NOLINTNEXTLINE(runtime/int)
inline int Bits::PopcountWithBuiltin(unsigned long n) {
return __builtin_popcountl(n);
}
// NOLINTNEXTLINE(runtime/int)
inline int Bits::PopcountWithBuiltin(unsigned long long n) {
return __builtin_popcountll(n);
}
#if defined(__BMI__) && (defined(__i386__) || defined(__x86_64__))
inline uint32_t Bits::GetBitsImpl(const uint32_t src,
const int offset,
const int nbits) {
return _bextr_u32(src, offset, nbits);
}
#endif
#if defined(__BMI__) && defined(__x86_64__)
inline uint64_t Bits::GetBitsImpl(const uint64_t src,
const int offset,
const int nbits) {
return _bextr_u64(src, offset, nbits);
}
#endif
#if defined(__BMI2__) && (defined(__i386__) || defined(__x86_64__))
inline uint32_t Bits::GetLowBitsImpl(const uint32_t src, const int nbits) {
return _bzhi_u32(src, nbits);
}
#endif
#if defined(__BMI2__) && defined(__x86_64__)
inline uint64_t Bits::GetLowBitsImpl(const uint64_t src, const int nbits) {
return _bzhi_u64(src, nbits);
}
#endif
#endif // __GNUC__
template<typename UnsignedT>
inline UnsignedT Bits::GetBitsImpl(const UnsignedT src,
const int offset,
const int nbits) {
const UnsignedT result = (src >> offset) & NBitsFromLSB<UnsignedT>(nbits);
return result;
}
template<typename UnsignedT>
inline UnsignedT Bits::GetLowBitsImpl(const UnsignedT src, const int nbits) {
return GetBitsImpl(src, 0, nbits);
}
} // namespace bigquery_ml_utils_base
#endif // THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_BASE_BITS_H_