hll/coupon_hash_set.go (191 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hll
import (
"encoding/binary"
"fmt"
)
type couponHashSetImpl struct {
hllSketchConfig
hllCouponState
}
func (c *couponHashSetImpl) GetCompositeEstimate() (float64, error) {
return getEstimate(c)
}
func (c *couponHashSetImpl) GetEstimate() (float64, error) {
return getEstimate(c)
}
func (c *couponHashSetImpl) GetHipEstimate() (float64, error) {
return getEstimate(c)
}
func (c *couponHashSetImpl) GetLowerBound(numStdDev int) (float64, error) {
return getLowerBound(c, numStdDev)
}
func (c *couponHashSetImpl) GetUpperBound(numStdDev int) (float64, error) {
return getUpperBound(c, numStdDev)
}
func (c *couponHashSetImpl) GetUpdatableSerializationBytes() int {
return c.getMemDataStart() + (4 << c.getLgCouponArrInts())
}
func (c *couponHashSetImpl) ToCompactSlice() ([]byte, error) {
return toCouponSlice(c, true)
}
func (c *couponHashSetImpl) ToUpdatableSlice() ([]byte, error) {
return toCouponSlice(c, false)
}
// couponUpdate updates the couponHashSetImpl with the given coupon.
func (c *couponHashSetImpl) couponUpdate(coupon int) (hllSketchStateI, error) {
index, err := findCoupon(c.couponIntArr, c.lgCouponArrInts, coupon)
if err != nil {
return nil, err
}
if index >= 0 {
return c, nil //found duplicate, ignore
}
c.couponIntArr[^index] = coupon
c.couponCount++ //found empty
t, err := c.checkGrowOrPromote()
if err != nil {
return nil, err
}
if t {
return promoteSetToHll(c)
}
return c, nil
}
func (c *couponHashSetImpl) iterator() pairIterator {
return newIntArrayPairIterator(c.couponIntArr, c.lgConfigK)
}
func (c *couponHashSetImpl) getMemDataStart() int {
return hashSetIntArrStart
}
func (c *couponHashSetImpl) getPreInts() int {
return hashSetPreInts
}
func (c *couponHashSetImpl) copyAs(tgtHllType TgtHllType) (hllSketchStateI, error) {
newC := &couponHashSetImpl{
hllSketchConfig: newHllSketchConfig(c.lgConfigK, tgtHllType, curModeSet),
hllCouponState: newHllCouponState(c.lgCouponArrInts, c.couponCount, make([]int, len(c.couponIntArr))),
}
copy(newC.couponIntArr, c.couponIntArr)
return newC, nil
}
func (c *couponHashSetImpl) copy() (hllSketchStateI, error) {
return c.copyAs(c.tgtHllType)
}
func (c *couponHashSetImpl) mergeTo(dest HllSketch) error {
return mergeCouponTo(c, dest)
}
// checkGrowOrPromote checks if the couponHashSetImpl should grow or promote to HLL.
func (c *couponHashSetImpl) checkGrowOrPromote() (bool, error) {
if (resizeDenom * c.couponCount) <= (resizeNumber * (1 << c.lgCouponArrInts)) {
return false, nil
}
if c.lgCouponArrInts == (c.lgConfigK - 3) {
return true, nil // promote to HLL
}
c.lgCouponArrInts++
arr, err := growHashSet(c.couponIntArr, c.lgCouponArrInts)
c.couponIntArr = arr
return false, err
}
// growHashSet doubles the size of the given couponHashSetImpl and reinsert the existing entries.
func growHashSet(couponIntArr []int, tgtLgCoupArrSize int) ([]int, error) {
tgtCouponIntArr := make([]int, 1<<tgtLgCoupArrSize)
for _, fetched := range couponIntArr {
if fetched != empty {
idx, err := findCoupon(tgtCouponIntArr, tgtLgCoupArrSize, fetched)
if err != nil {
return nil, err
}
if idx < 0 {
tgtCouponIntArr[^idx] = fetched
continue
}
return nil, fmt.Errorf("growHashSet, found duplicate")
}
}
return tgtCouponIntArr, nil
}
// promoteSetToHll move coupons to an hllArray from couponHashSetImpl
func promoteSetToHll(src *couponHashSetImpl) (hllArray, error) {
tgtHllArr, _ := newHllArray(src.lgConfigK, src.tgtHllType)
srcIter := src.iterator()
tgtHllArr.putKxQ0(float64(uint64(1) << src.lgConfigK))
for srcIter.nextValid() {
p, err := srcIter.getPair()
if err != nil {
return nil, err
}
_, err = tgtHllArr.couponUpdate(p)
if err != nil {
return nil, err
}
}
est, err := src.GetEstimate()
if err != nil {
return nil, err
}
tgtHllArr.putHipAccum(est)
tgtHllArr.putOutOfOrder(false)
return tgtHllArr, nil
}
// findCoupon searches the Coupon hash table for an empty slot or a duplicate depending on the context.
// If entire entry is empty, returns one's complement of index = found empty.
// If entry equals given coupon, returns its index = found duplicate coupon.
// Continues searching.
// If the probe comes back to original index, return an error.
func findCoupon(array []int, lgArrInts int, coupon int) (int, error) {
arrMask := len(array) - 1
probe := coupon & arrMask
loopIndex := probe
for ok := true; ok; ok = probe != loopIndex {
couponAtIdx := array[probe]
if couponAtIdx == empty {
return ^probe, nil //empty
} else if coupon == couponAtIdx {
return probe, nil //duplicate
}
stride := ((coupon & keyMask26) >> lgArrInts) | 1
probe = (probe + stride) & arrMask
}
return 0, fmt.Errorf("key not found and no empty slots")
}
// newCouponHashSet returns a new couponHashSetImpl.
// lgConfigK the configured Lg K
// tgtHllType the target HLL type
func newCouponHashSet(lgConfigK int, tgtHllType TgtHllType) (couponHashSetImpl, error) {
if lgConfigK <= 7 {
return couponHashSetImpl{}, fmt.Errorf("lgConfigK must be > 7 for SET mode")
}
cl, err := newCouponList(lgConfigK, tgtHllType, curModeSet)
if err != nil {
return couponHashSetImpl{}, err
}
return couponHashSetImpl(cl), nil
}
// deserializeCouponHashSet returns a new couponHashSetImpl from the given byte array.
func deserializeCouponHashSet(byteArray []byte) (hllCoupon, error) {
lgConfigK := extractLgK(byteArray)
tgtHllType := extractTgtHllType(byteArray)
curMode := extractCurMode(byteArray)
memArrStart := listIntArrStart
if curMode == curModeSet {
memArrStart = hashSetIntArrStart
}
set, err := newCouponHashSet(lgConfigK, tgtHllType)
if err != nil {
return nil, err
}
memIsCompact := extractCompactFlag(byteArray)
couponCount := extractHashSetCount(byteArray)
lgCouponArrInts := extractLgArr(byteArray)
if lgCouponArrInts < lgInitSetSize {
lgCouponArrInts, err = computeLgArr(byteArray, couponCount, lgConfigK)
if err != nil {
return nil, err
}
}
if memIsCompact {
for it := 0; it < couponCount && err == nil; it++ {
_, err = set.couponUpdate(int(binary.LittleEndian.Uint32(byteArray[memArrStart+(it<<2) : memArrStart+(it<<2)+4])))
}
if err != nil {
return nil, err
}
} else {
set.couponCount = couponCount
set.lgCouponArrInts = lgCouponArrInts
couponArrInts := 1 << lgCouponArrInts
set.couponIntArr = make([]int, couponArrInts)
for it := 0; it < couponArrInts; it++ {
set.couponIntArr[it] = int(binary.LittleEndian.Uint32(byteArray[hashSetIntArrStart+(it<<2) : hashSetIntArrStart+(it<<2)+4]))
}
}
return &set, nil
}