lib/hrw/rendezvous.go (122 lines of code) (raw):

// Copyright (c) 2016-2019 Uber Technologies, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package hrw import ( "encoding/binary" "encoding/hex" "hash" "math" "math/big" "sort" "github.com/spaolacci/murmur3" ) // min between two integers. func min(a, b int) int { if a < b { return a } return b } // HashFactory is a function object for Hash.New() constructor. type HashFactory func() hash.Hash // Murmur3Hash is a murmur3 HashFactory. func Murmur3Hash() hash.Hash { return murmur3.New64() } // UIntToFloat is a conversion function from uint64 to float64. // Int could be potentially very big integer, like 256 bits long. type UIntToFloat func(bytesUInt []byte, maxValue []byte, hasher hash.Hash) float64 // RendezvousHashNode represents a weighted node in a hashing schema. type RendezvousHashNode struct { RHash *RendezvousHash // parent hash structure with all the configuration Label string // some string ientifying a unique node label Weight int // node weight, usually denotes node's capacity } // RendezvousHash represents a Rendezvous Hashing schema. // It does not make any assumption about concurrency model so synchronizing // access to it is a caller's responsibility. type RendezvousHash struct { Hash HashFactory // hash function ScoreFunc UIntToFloat // conversion function from generated hash to float64 Nodes []*RendezvousHashNode // all nodes MaxHashValue []byte } // RendezvousNodesByScore is a predicat that supports sorting by score(key). type RendezvousNodesByScore struct { key string nodes []*RendezvousHashNode } // Len return length. func (a RendezvousNodesByScore) Len() int { return len(a.nodes) } // Swap swaps two elements. func (a RendezvousNodesByScore) Swap(i, j int) { a.nodes[i], a.nodes[j] = a.nodes[j], a.nodes[i] } // Less is a predicate '<' for a set. func (a RendezvousNodesByScore) Less(i, j int) bool { return a.nodes[i].Score(a.key) < a.nodes[j].Score(a.key) } // NewRendezvousHash constructs and prepopulates a RendezvousHash object. func NewRendezvousHash(hashFactory HashFactory, scoreFunc UIntToFloat) *RendezvousHash { rh := &RendezvousHash{ Hash: hashFactory, ScoreFunc: scoreFunc, } hashLen := len(hashFactory().Sum(nil)) rh.MaxHashValue = make([]byte, hashLen) for i := 0; i < hashLen; i++ { rh.MaxHashValue[i] = 0xFF } return rh } // UInt64ToFloat64 Converts a uniformly random 64-bit integer // to "uniformly" random floating point number on interval [0, 1) // The approach is heavily based on this material // https://crypto.stackexchange.com/questions/31657/uniformly-distributed-secure-floating-point-numbers-in-0-1 // and this https://en.wikipedia.org/wiki/Rendezvous_hashing func UInt64ToFloat64(bytesUInt []byte, maxValue []byte, hasher hash.Hash) float64 { maxUInt := binary.BigEndian.Uint64(maxValue) fiftyThreeOnes := uint64(maxUInt >> (64 - 53)) fiftyThreeZeros := float64(1 << 53) u64val := binary.BigEndian.Uint64(bytesUInt) // val & 0xFFF000000000000 == 0 need to be handled differently // as it will result in zeros: something that score // function cannot survive. So there are 2^11 keys like that // need to be re-hashed one more time. That will introduce a tiny bias // in hashing key space distribution that we can live with val := u64val & fiftyThreeOnes if val == 0 && hasher != nil { hasher.Reset() hasher.Write(bytesUInt) val = binary.BigEndian.Uint64(hasher.Sum(nil)) & fiftyThreeOnes } return float64(val) / fiftyThreeZeros } // BigIntToFloat64 converts BigInt to float64. func BigIntToFloat64(bytesUInt []byte, maxValue []byte, hasher hash.Hash) float64 { maxHashFloat := new(big.Float) maxHashFloat.SetInt(new(big.Int).SetBytes(maxValue)) hashInt := new(big.Int) // checksumHash is being consumed as a big endian int. hashInt.SetBytes(bytesUInt) hashFloat := new(big.Float).SetInt(hashInt) // float64's precision, we would not need more then that // as we eventually cast everything to float64 // Big Float will use greater presicions in operations hashFloat.SetPrec(53) fl64value, _ := hashFloat.Quo(hashFloat, maxHashFloat).Float64() // I don't expact that to happen, the accuracy of 256 bits division // arithmetic is well within float'64 theoretical minimum for a single // division and we always divide with a non zero constant. if hashFloat.IsInf() { panic("Float64.Quo operation has failed") } return fl64value } // Score computes score of a key for this node in accordance to Weighted // Rendezvous Hash. It's using big golang float key as hexidemical encoding of // a byte array. func (rhn *RendezvousHashNode) Score(key string) float64 { hasher := rhn.RHash.Hash() keyBytes, err := hex.DecodeString(key) if err != nil { return math.NaN() } hashBytes := make([]byte, len(keyBytes)+len(rhn.Label)) // Add node's seed to a key string hashBytes = append(keyBytes, []byte(rhn.Label)...) hasher.Write(hashBytes) score := rhn.RHash.ScoreFunc(hasher.Sum(nil), rhn.RHash.MaxHashValue, hasher) // for more information on this math please look at this paper: // http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.414.9353&rep=rep1&type=pdf // and this presentation slides: // http://www.snia.org/sites/default/files/SDC15_presentations/dist_sys/Jason_Resch_New_Consistent_Hashings_Rev.pdf return -float64(rhn.Weight) / math.Log(score) } // AddNode adds a node to a hashing ring. func (rh *RendezvousHash) AddNode(seed string, weight int) { node := &RendezvousHashNode{ RHash: rh, Label: seed, Weight: weight, } rh.Nodes = append(rh.Nodes, node) } // RemoveNode removes a node from a hashing ring. func (rh *RendezvousHash) RemoveNode(name string) { for i, node := range rh.Nodes { if node.Label == name { rh.Nodes = append(rh.Nodes[:i], rh.Nodes[i+1:]...) break } } } // GetNode gets a node from a hashing ring and its index in array. func (rh *RendezvousHash) GetNode(name string) (*RendezvousHashNode, int) { for index, node := range rh.Nodes { if node.Label == name { return node, index } } return nil, -1 } // GetOrderedNodes gets an ordered set of N nodes for a key where // score(Node1) > score(N2) > ... score(NodeN). // Number of returned nodes = min(N, len(nodes)). func (rh *RendezvousHash) GetOrderedNodes(key string, n int) []*RendezvousHashNode { nodes := make([]*RendezvousHashNode, len(rh.Nodes)) copy(nodes, rh.Nodes) sort.Sort(sort.Reverse(&RendezvousNodesByScore{key: key, nodes: nodes})) if n >= len(nodes) { return nodes } return nodes[:n] }