sql_utils/base/bits.cc (122 lines of code) (raw):

/* * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "sql_utils/base/bits.h" #include <assert.h> #include "absl/numeric/int128.h" namespace bigquery_ml_utils_base { // this array gives the number of bits for any number from 0 to 255 // (We could make these ints. The tradeoff is size (eg does it overwhelm // the cache?) vs efficiency in referencing sub-word-sized array elements) const char Bits::num_bits[] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; int Bits::Count(const void *m, int num_bytes) { int nbits = 0; const uint8_t *s = (const uint8_t *) m; for (int i = 0; i < num_bytes; i++) nbits += num_bits[*s++]; return nbits; } int Bits::Difference(const void *m1, const void *m2, int num_bytes) { int nbits = 0; const uint8_t *s1 = (const uint8_t *) m1; const uint8_t *s2 = (const uint8_t *) m2; for (int i = 0; i < num_bytes; i++) nbits += num_bits[(*s1++) ^ (*s2++)]; return nbits; } int Bits::CappedDifference(const void *m1, const void *m2, int num_bytes, int cap) { int nbits = 0; const uint8_t *s1 = (const uint8_t *) m1; const uint8_t *s2 = (const uint8_t *) m2; for (int i = 0; i < num_bytes && nbits <= cap; i++) nbits += num_bits[(*s1++) ^ (*s2++)]; return nbits; } int Bits::Log2Floor_Portable(uint32_t n) { if (n == 0) return -1; int log = 0; uint32_t value = n; for (int i = 4; i >= 0; --i) { int shift = (1 << i); uint32_t x = value >> shift; if (x != 0) { value = x; log += shift; } } assert(value == 1); return log; } int Bits::Log2Ceiling(uint32_t n) { int floor = Log2Floor(n); if ((n & (n - 1)) == 0) // zero or a power of two return floor; else return floor + 1; } int Bits::Log2Ceiling64(uint64_t n) { int floor = Log2Floor64(n); if ((n & (n - 1)) == 0) // zero or a power of two return floor; else return floor + 1; } int Bits::Log2Ceiling128(absl::uint128 n) { int floor = Log2Floor128(n); if ((n & (n - 1)) == 0) // zero or a power of two return floor; else return floor + 1; } int Bits::FindLSBSetNonZero_Portable(uint32_t n) { int rc = 31; for (int i = 4, shift = 1 << 4; i >= 0; --i) { const uint32_t x = n << shift; if (x != 0) { n = x; rc -= shift; } shift >>= 1; } return rc; } int Bits::CountLeadingZeros32_Portable(uint32_t n) { int bits = 1; if (n == 0) return 32; if ((n >> 16) == 0) { bits += 16; n <<= 16; } if ((n >> 24) == 0) { bits += 8; n <<= 8; } if ((n >> 28) == 0) { bits += 4; n <<= 4; } if ((n >> 30) == 0) { bits += 2; n <<= 2; } return bits - (n >> 31); } int Bits::CountLeadingZeros64_Portable(uint64_t n) { return ((n >> 32) ? Bits::CountLeadingZeros32_Portable(n >> 32) : 32 + Bits::CountLeadingZeros32_Portable(n)); } } // namespace bigquery_ml_utils_base