cpc/bit_matrix.go (70 lines of code) (raw):

/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cpc import ( "encoding/binary" "math/bits" "github.com/twmb/murmur3" ) // BitMatrix is a test structure that tracks bit patterns for hashed 64-bit data. // It is not part of the core CPC algorithm; it's just for certain tests. type BitMatrix struct { lgK int seed uint64 numCoupons uint64 bitMatrix []uint64 numCouponsInvalid bool // only used if merges are allowed (not typical in this usage) } // NewBitMatrixWithSeed creates a BitMatrix with the given lgK and custom seed. func NewBitMatrixWithSeed(lgK int, seed uint64) *BitMatrix { size := 1 << lgK return &BitMatrix{ lgK: lgK, seed: seed, numCoupons: 0, bitMatrix: make([]uint64, size), } } // Reset clears the entire bitMatrix, setting all bits to zero, // and resets the coupon count to zero. func (bm *BitMatrix) Reset() { for i := range bm.bitMatrix { bm.bitMatrix[i] = 0 } bm.numCoupons = 0 bm.numCouponsInvalid = false } // GetNumCoupons returns the number of set bits (coupons) in the matrix. // If numCouponsInvalid were ever set to true, it would recalculate by scanning // the bitMatrix. By default, it’s always up to date. func (bm *BitMatrix) GetNumCoupons() uint64 { if bm.numCouponsInvalid { bm.numCoupons = CountCoupons(bm.bitMatrix) bm.numCouponsInvalid = false } return bm.numCoupons } // GetMatrix returns the underlying array of 64-bit words storing the bits. func (bm *BitMatrix) GetMatrix() []uint64 { return bm.bitMatrix } // Update hashes the given 64-bit datum and sets the corresponding bit // in the matrix. If that bit was previously unset, the coupon count increments. func (bm *BitMatrix) Update(datum int64) { var scratch [8]byte binary.LittleEndian.PutUint64(scratch[:], uint64(datum)) hashLo, hashHi := murmur3.SeedSum128(bm.seed, bm.seed, scratch[:]) bm.hashUpdate(hashLo, hashHi) } // hashUpdate extracts row and column from the 128-bit hash, then sets // the appropriate bit in bm.bitMatrix[row]. func (bm *BitMatrix) hashUpdate(hash0, hash1 uint64) { col := bits.LeadingZeros64(hash1) if col > 63 { col = 63 } kMask := (uint64(1) << bm.lgK) - 1 row := int(hash0 & kMask) rowCol := (row << 6) | col if rowCol == -1 { row ^= 1 } oldPattern := bm.bitMatrix[row] newPattern := oldPattern | (uint64(1) << col) if newPattern != oldPattern { bm.numCoupons++ bm.bitMatrix[row] = newPattern } } // CountCoupons sums the number of set bits across all 64-bit words in bitMatrix. func CountCoupons(bitMatrix []uint64) uint64 { var count uint64 for _, word := range bitMatrix { count += uint64(bits.OnesCount64(word)) } return count }