hll/aux_hash_map.go (165 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hll
import (
"encoding/binary"
"fmt"
)
// auxHashMap is a hash table for the Aux array.
type auxHashMap struct {
lgConfigK int //required for #slot bits
lgAuxArrInts int
auxCount int
auxIntArr []int
}
func (a *auxHashMap) copy() *auxHashMap {
newA := *a
newA.auxIntArr = make([]int, len(a.auxIntArr))
copy(newA.auxIntArr, a.auxIntArr)
return &newA
}
// newAuxHashMap returns a new auxHashMap.
func newAuxHashMap(lgAuxArrInts int, lgConfigK int) *auxHashMap {
return &auxHashMap{
lgConfigK: lgConfigK,
lgAuxArrInts: lgAuxArrInts,
auxCount: 0,
auxIntArr: make([]int, 1<<lgAuxArrInts),
}
}
// deserializeAuxHashMap returns a new auxHashMap from the given byte array.
func deserializeAuxHashMap(byteArray []byte, offset int, lgConfigL int, auxCount int, srcCompact bool) (*auxHashMap, error) {
var (
lgAuxArrInts int
)
if srcCompact {
v, err := computeLgArr(byteArray, auxCount, lgConfigL)
if err != nil {
return nil, err
}
lgAuxArrInts = v
} else {
lgAuxArrInts = extractLgArr(byteArray)
}
auxMap := newAuxHashMap(lgAuxArrInts, lgConfigL)
configKMask := (1 << lgConfigL) - 1
if srcCompact {
for i := 0; i < auxCount; i++ {
pair := int(binary.LittleEndian.Uint32(byteArray[offset+(i<<2) : offset+(i<<2)+4]))
slotNo := getPairLow26(pair) & configKMask
value := getPairValue(pair)
err := auxMap.mustAdd(slotNo, value) //increments count
if err != nil {
return nil, err
}
}
} else { //updatable
auxArrInts := 1 << lgAuxArrInts
for i := 0; i < auxArrInts; i++ {
pair := int(binary.LittleEndian.Uint32(byteArray[offset+(i<<2) : offset+(i<<2)+4]))
if pair == empty {
continue
}
slotNo := getPairLow26(pair) & configKMask
value := getPairValue(pair)
err := auxMap.mustAdd(slotNo, value) //increments count
if err != nil {
return nil, err
}
}
}
return auxMap, nil
}
func (a *auxHashMap) getAuxIntArr() []int {
return a.auxIntArr
}
func (a *auxHashMap) getCompactSizeBytes() int {
return a.auxCount << 2
}
func (a *auxHashMap) getUpdatableSizeBytes() int {
return 4 << a.lgAuxArrInts
}
func (a *auxHashMap) mustFindValueFor(slotNo int) (int, error) {
index, err := findAuxHashMap(a.auxIntArr, a.lgAuxArrInts, a.lgConfigK, slotNo)
if err != nil {
return 0, err
}
if index < 0 {
return 0, fmt.Errorf("SlotNo not found: %d", slotNo)
}
return getPairValue(a.auxIntArr[index]), nil
}
func (a *auxHashMap) mustReplace(slotNo int, value int) error {
index, err := findAuxHashMap(a.auxIntArr, a.lgAuxArrInts, a.lgConfigK, slotNo)
if err != nil {
return err
}
if index < 0 {
pairStr := pairString(pair(slotNo, value))
return fmt.Errorf("pair not found: %v", pairStr)
}
a.auxIntArr[index] = pair(slotNo, value)
return nil
}
// mustAdd adds the slotNo and value to the aux array.
// slotNo the index from the HLL array
// value the HLL value at the slotNo.
func (a *auxHashMap) mustAdd(slotNo int, value int) error {
index, err := findAuxHashMap(a.auxIntArr, a.lgAuxArrInts, a.lgConfigK, slotNo)
if err != nil {
return err
}
pair := pair(slotNo, value)
if index >= 0 {
pairStr := pairString(pair)
return fmt.Errorf("found a slotNo that should not be there: %s", pairStr)
}
a.auxIntArr[^index] = pair
a.auxCount++
return a.checkGrow()
}
func (a *auxHashMap) getLgAuxArrInts() int {
return a.lgAuxArrInts
}
// iterator returns an iterator over the Aux array.
func (a *auxHashMap) iterator() pairIterator {
return newIntArrayPairIterator(a.auxIntArr, a.lgConfigK)
}
// getAuxCount returns the number of entries in the Aux array.
func (a *auxHashMap) getAuxCount() int {
return a.auxCount
}
// checkGrow checks to see if the aux array should be grown and does so if needed.
func (a *auxHashMap) checkGrow() error {
if (resizeDenom * a.auxCount) <= (resizeNumber * len(a.auxIntArr)) {
return nil
}
return a.growAuxSpace()
}
// growAuxSpace doubles the size of the aux array and reinsert the existing entries.
func (a *auxHashMap) growAuxSpace() error {
oldArray := a.auxIntArr
configKMask := int((1 << a.lgConfigK) - 1)
a.lgAuxArrInts++
a.auxIntArr = make([]int, 1<<a.lgAuxArrInts)
for _, fetched := range oldArray {
if fetched != empty {
//find empty in new array
idx, err := findAuxHashMap(a.auxIntArr, a.lgAuxArrInts, a.lgConfigK, fetched&configKMask)
if err != nil {
return err
}
a.auxIntArr[^idx] = fetched
}
}
return nil
}
// findAuxHashMap searches the Aux arr hash table for an empty or a matching slotNo depending on the context.
// If entire entry is empty, returns one's complement of index = found empty.
// If entry contains given slotNo, returns its index = found slotNo.
// Continues searching.
// If the probe comes back to original index, return an error.
func findAuxHashMap(auxArr []int, lgAuxArrInts int, lgConfigK int, slotNo int) (int, error) {
if lgAuxArrInts >= lgConfigK {
return 0, fmt.Errorf("lgAuxArrInts >= lgConfigK")
}
auxArrMask := (1 << lgAuxArrInts) - 1
configKMask := (1 << lgConfigK) - 1
probe := slotNo & auxArrMask
loopIndex := probe
for {
arrVal := auxArr[probe]
if arrVal == empty {
return ^probe, nil
} else if slotNo == (arrVal & configKMask) {
return probe, nil
}
stride := (slotNo >> lgAuxArrInts) | 1
probe = (probe + stride) & auxArrMask
if probe == loopIndex {
return 0, fmt.Errorf("key not found and no empty slots")
}
}
}