lib/BitSet.h

/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #pragma once #include <assert.h> #include <stdint.h> #include <string.h> #include <vector> // This class migrates the BitSet class from Java to have essential methods. // Here `boost::dynamic_bitset` is not used because we need to convert the dynamic bit set // to a long array because the Pulsar API uses a long array to represent the bit set. namespace pulsar { class BitSet { public: // We must store the unsigned integer to make operator >> equivalent to Java's >>> using Data = std::vector<uint64_t>; using const_iterator = typename Data::const_iterator; BitSet() {} BitSet(int32_t numBits) : words_((numBits / 64) + ((numBits % 64 == 0) ? 0 : 1)) { assert(numBits > 0); } BitSet(Data&& words) : words_(std::move(words)), wordsInUse_(words_.size()) {} // Support range loop like: // ```c++ // BitSet bitSet(129); // for (auto x : bitSet) { /* ... */ } // ``` const_iterator begin() const noexcept { return words_.begin(); } const_iterator end() const noexcept { return words_.begin() + wordsInUse_; } /** * Returns true if this {@code BitSet} contains no bits that are set * to {@code true}. * * @return boolean indicating whether this {@code BitSet} is empty */ bool isEmpty() const noexcept { return wordsInUse_ == 0; } /** * Returns the value of the bit with the specific index. The value is {@code true} if the bit with the * index {@code bitIndex} is currently set in this {@code BitSet}; otherwise, the result is {@code false}. * * @param bitIndex the bit index * @return the value of the bit with the specified index */ bool get(int32_t bitIndex) const; /** * Sets the bits from the specified {@code fromIndex} (inclusive) to the * specified {@code toIndex} (exclusive) to {@code true}. * * @param fromIndex index of the first bit to be set * @param toIndex index after the last bit to be set */ void set(int32_t fromIndex, int32_t toIndex); /** * Sets the bits from the specified {@code fromIndex} (inclusive) to the * specified {@code toIndex} (exclusive) to {@code false}. * * @param fromIndex index of the first bit to be cleared * @param toIndex index after the last bit to be cleared */ void clear(int32_t fromIndex, int32_t toIndex); /** * Sets the bit specified by the index to {@code false}. * * @param bitIndex the index of the bit to be cleared */ void clear(int32_t bitIndex); private: Data words_; int32_t wordsInUse_ = 0; static constexpr int32_t ADDRESS_BITS_PER_WORD = 6; static constexpr int32_t BITS_PER_WORD = 1 << ADDRESS_BITS_PER_WORD; static constexpr uint64_t WORD_MASK = 0xffffffffffffffffL; static constexpr int32_t wordIndex(int32_t bitIndex) { return bitIndex >> ADDRESS_BITS_PER_WORD; } void expandTo(int32_t wordIndex) { auto wordsRequired = wordIndex + 1; if (wordsInUse_ < wordsRequired) { words_.resize(wordsRequired); wordsInUse_ = wordsRequired; } } int32_t length() { if (wordsInUse_ == 0) { return 0; } return BITS_PER_WORD * (wordsInUse_ - 1) + (BITS_PER_WORD - numberOfLeadingZeros(words_[wordsInUse_ - 1])); } void recalculateWordsInUse() { // Traverse the bitset until a used word is found int32_t i; for (i = wordsInUse_ - 1; i >= 0; i--) { if (words_[i] != 0) { break; } } wordsInUse_ = i + 1; } static int32_t numberOfLeadingZeros(uint64_t i) { auto x = static_cast<uint32_t>(i >> 32); return x == 0 ? 32 + numberOfLeadingZeros(static_cast<uint32_t>(i)) : numberOfLeadingZeros(x); } static int32_t numberOfLeadingZeros(uint32_t i) { if (i <= 0) { return i == 0 ? 32 : 0; } int32_t n = 31; if (i >= (1 << 16)) { n -= 16; i >>= 16; } if (i >= (1 << 8)) { n -= 8; i >>= 8; } if (i >= (1 << 4)) { n -= 4; i >>= 4; } if (i >= (1 << 2)) { n -= 2; i >>= 2; } return n - (i >> 1); } // In C++, when shift count is negative or >= width of type, the behavior is undefined. We should // convert n to the valid range [0, sizeof(T)) first. static constexpr int32_t safeShiftCount(int32_t width, int32_t n) { return (n < 0) ? safeShiftCount(width, n + width) : ((n >= width) ? safeShiftCount(width, n - width) : n); } template <typename T> static T safeLeftShift(T x, int32_t n) { return (x << safeShiftCount(sizeof(x) * 8, n)); } template <typename T> static T safeRightShift(T x, int32_t n) { return (x >> safeShiftCount(sizeof(x) * 8, n)); } }; inline bool BitSet::get(int32_t bitIndex) const { assert(bitIndex >= 0); auto wordIndex_ = wordIndex(bitIndex); return (wordIndex_ < wordsInUse_) && ((words_[wordIndex_] & (1L << bitIndex)) != 0); } inline void BitSet::set(int32_t fromIndex, int32_t toIndex) { assert(fromIndex < toIndex && fromIndex >= 0 && toIndex >= 0); if (fromIndex == toIndex) { return; } auto startWordIndex = wordIndex(fromIndex); auto endWordIndex = wordIndex(toIndex - 1); expandTo(endWordIndex); auto firstWordMask = safeLeftShift(WORD_MASK, fromIndex); auto lastWordMask = safeRightShift(WORD_MASK, -toIndex); if (startWordIndex == endWordIndex) { // Case 1: One word words_[startWordIndex] |= (firstWordMask & lastWordMask); } else { // Case 2: Multiple words // Handle first word words_[startWordIndex] |= firstWordMask; // Handle intermediate words, if any for (int32_t i = startWordIndex + 1; i < endWordIndex; i++) { words_[i] = WORD_MASK; } // Handle last word (restores invariants) words_[endWordIndex] |= lastWordMask; } } inline void BitSet::clear(int32_t fromIndex, int32_t toIndex) { assert(fromIndex < toIndex && fromIndex >= 0 && toIndex >= 0); if (fromIndex == toIndex) { return; } auto startWordIndex = wordIndex(fromIndex); if (startWordIndex >= wordsInUse_) { return; } auto endWordIndex = wordIndex(toIndex - 1); if (endWordIndex >= wordsInUse_) { toIndex = length(); endWordIndex = wordsInUse_ - 1; } auto firstWordMask = safeLeftShift(WORD_MASK, fromIndex); auto lastWordMask = safeRightShift(WORD_MASK, -toIndex); if (startWordIndex == endWordIndex) { // Case 1: One word words_[startWordIndex] &= ~(firstWordMask & lastWordMask); } else { // Case 2: Multiple words // Handle first word words_[startWordIndex] &= ~firstWordMask; // Handle intermediate words, if any for (int32_t i = startWordIndex + 1; i < endWordIndex; i++) { words_[i] = 0; } // Handle last word words_[endWordIndex] &= ~lastWordMask; } recalculateWordsInUse(); } inline void BitSet::clear(int32_t bitIndex) { assert(bitIndex >= 0); auto i = wordIndex(bitIndex); if (i >= wordsInUse_) { return; } words_[i] &= ~(safeLeftShift(1L, bitIndex)); recalculateWordsInUse(); } } // namespace pulsar

lib/BitSet.h (152 lines of code) (raw):