config.go (64 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
package qf
import "fmt"
// minQBits is the initial number of quotient bits when no explicit
// configuration is provided, and the minimum number of qbits supported
//
// implementation note: minQBits must be greater than 3 as we require
// 3 bits for quotient filter book keeping
const minQBits = 4
// Config controls the behavior of the quotient filter
type Config struct {
// The number of bits of storage to allocate and manage per
// entry.
BitsOfStoragePerEntry uint
// BitPacked, when true, will use a bitpacked storage format
// which is slightly less efficient computationally but results
// in a smaller quotient filter, especially for larger numbers
// of entries
BitPacked bool
// ExpectedEntries may be provided to pre-size a quotient filter
// which can reduces time during batch loading. The quotient
// filter will be automatically sized to be just large enough to
// hold the expected number of entries without exceeding a reasonable
// loading factor
ExpectedEntries uint64
// HashFn may be specified to over-ride the default used by the
// implementation (64 bit murmur hash). When over-ridded, caller must
// take care that when a quotient filter is loaded the hash function
// is set to the same hash function used when populating the quotient
// filter
HashFn HashFn
}
// ExpectedLoading reports the expected percentage loading given the
// number of entries specified
func (c *Config) ExpectedLoading() float64 {
return 100. * float64(c.ExpectedEntries) / float64(c.BucketCount())
}
// BytesRequired reports the approximate amount of space required to represent
// the quotient filter on disk or in ram (assuming bit packing).
func (c *Config) BytesRequired() uint {
bitsPerEntry := (64 - c.QBits()) + 3 + uint(c.BitsOfStoragePerEntry)
return c.BucketCount() * bitsPerEntry / 8
}
// BucketCount reports the number of hash buckets that will be allocated
// given the quotient bits.
func (c *Config) BucketCount() uint {
return 1 << c.QBits()
}
// QBits returns the number of bits of the hash balue that will be used
// to determine the hash bucket
func (c *Config) QBits() uint {
x := uint(1)
bits := uint(0)
for (float64(x) * MaxLoadingFactor) < float64(c.ExpectedEntries) {
x <<= 1
bits++
}
if bits < minQBits {
bits = minQBits
}
return bits
}
// ExplainIndent will print an indented summary of the configuration to stdout
func (c *Config) ExplainIndent(indent string) {
fmt.Printf("%s%2d bits configured for quotient (%d buckets)\n", indent, c.QBits(), c.BucketCount())
fmt.Printf("%s%2d bits needed per bucket for remainder\n", indent, bitsPerWord-c.QBits())
fmt.Printf("%s%2d bits metadata per bucket\n", indent, 3)
fmt.Printf("%s%2d bits external storage\n", indent, c.BitsOfStoragePerEntry)
fmt.Printf("%s %s storage size expected\n", indent, humanBytes(c.BytesRequired()))
}
// Explain will print a summary of the configuration to stdout
func (c *Config) Explain() {
c.ExplainIndent("")
}
func humanBytes(bytes uint) string {
v := float64(bytes)
suffix := "bytes"
if v > 1024 {
v /= 1024.
suffix = "KB"
if v > 1024. {
suffix = "MB"
v /= 1024.0
if v > 1024. {
suffix = "GB"
v /= 1024.
}
}
}
if v < 10 {
return fmt.Sprintf("%0.2f %s", v, suffix)
} else if v < 100 {
return fmt.Sprintf("%0.1f %s", v, suffix)
} else {
return fmt.Sprintf("%0.0f %s", v, suffix)
}
}