plugins/processors/awsapplicationsignals/internal/cardinalitycontrol/count_min_sketch.go (84 lines of code) (raw):

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: MIT package cardinalitycontrol import ( "hash/adler32" "hash/crc32" "hash/fnv" ) type CountMinSketchHashFunc func(hashKey string) int64 type CountMinSketchEntry interface { HashKey() string Frequency() int } type CountMinSketch struct { depth int maxDepth int width int matrix [][]int hashFuncs []CountMinSketchHashFunc } func (cms *CountMinSketch) Insert(obj CountMinSketchEntry) { for i := 0; i < cms.depth; i++ { hashFunc := cms.hashFuncs[i] hashValue := hashFunc(obj.HashKey()) pos := int(hashValue % int64(cms.width)) cms.matrix[i][pos] += obj.Frequency() } } func NewCountMinSketch(depth, width int, hashFuncs ...CountMinSketchHashFunc) *CountMinSketch { matrix := make([][]int, depth) for i := range matrix { matrix[i] = make([]int, width) } cms := &CountMinSketch{ depth: 0, maxDepth: depth, width: width, matrix: matrix, } if hashFuncs != nil { cms.RegisterHashFunc(hashFuncs...) } else { RegisterDefaultHashFuncs(cms) } return cms } func RegisterDefaultHashFuncs(cms *CountMinSketch) { hashFunc1 := func(hashKey string) int64 { h := fnv.New32a() h.Write([]byte(hashKey)) return int64(h.Sum32()) } hashFunc2 := func(hashKey string) int64 { hash := crc32.ChecksumIEEE([]byte(hashKey)) return int64(hash) } hashFunc3 := func(hashKey string) int64 { hash := adler32.Checksum([]byte(hashKey)) return int64(hash) } cms.RegisterHashFunc(hashFunc1, hashFunc2, hashFunc3) } func (cms *CountMinSketch) RegisterHashFunc(hashFuncs ...CountMinSketchHashFunc) { if cms.hashFuncs == nil { cms.hashFuncs = hashFuncs } else { cms.hashFuncs = append(cms.hashFuncs, hashFuncs...) } if cms.maxDepth < len(cms.hashFuncs) { cms.depth = cms.maxDepth } else { cms.depth = len(cms.hashFuncs) } } func (cms *CountMinSketch) Get(obj CountMinSketchEntry) int { minCount := int(^uint(0) >> 1) // Initialize with the maximum possible integer value for i := 0; i < cms.depth; i++ { hashFunc := cms.hashFuncs[i] hashValue := hashFunc(obj.HashKey()) pos := int(hashValue % int64(cms.width)) if cms.matrix[i][pos] < minCount { minCount = cms.matrix[i][pos] } } return minCount }