cpc/utils.go (964 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cpc
import (
"encoding/binary"
"fmt"
"github.com/apache/datasketches-go/internal"
"math"
"math/bits"
"strings"
)
type CpcFormat int
type CpcFlavor int
const (
CpcFormatEmptyMerged CpcFormat = 0
CpcFormatEmptyHip CpcFormat = 1
CpcFormatSparseHybridMerged CpcFormat = 2
CpcFormatSparseHybridHip CpcFormat = 3
CpcFormatPinnedSlidingMergedNosv CpcFormat = 4
CpcFormatPinnedSlidingHipNosv CpcFormat = 5
CpcFormatPinnedSlidingMerged CpcFormat = 6
CpcFormatPinnedSlidingHip CpcFormat = 7
)
const (
CpcFlavorEmpty CpcFlavor = 0 // 0 == C < 1
CpcFlavorSparse CpcFlavor = 1 // 1 <= C < 3K/32
CpcFlavorHybrid CpcFlavor = 2 // 3K/32 <= C < K/2
CpcFlavorPinned CpcFlavor = 3 // K/2 <= C < 27K/8 [NB: 27/8 = 3 + 3/8]
CpcFlavorSliding CpcFlavor = 4 // 27K/8 <= C
)
const (
loFieldPreInts = iota
loFieldSerVer
loFieldFamily
loFieldLgK
loFieldFiCol
loFieldFlags
loFieldSeedHash
)
const (
// Preamble hi field definitions
// This defines the eight additional preamble fields located after the <i>LoField</i>.
// Do not change the order.
//
// Note: NUM_SV has dual meanings: In sparse and hybrid flavors it is equivalent to
// numCoupons so it isn't stored separately. In pinned and sliding flavors is the
// numSV of the PairTable, which stores only surprising values.
hiFieldNumCoupons = iota
hiFieldNumSV
hiFieldKXP
hiFieldHipAccum
hiFieldSVLengthInts
hiFieldWLengthInts
hiFieldSVStream
hiFieldWStream
)
var (
// This defines the byte offset for each of the 8 <i>HiFields</i>
// given the Format ordinal (1st dimension) and the HiField ordinal (2nd dimension).
hiFieldOffset = [8][8]byte{
{0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0},
{8, 0, 0, 0, 12, 0, 16, 0},
{8, 0, 16, 24, 12, 0, 32, 0},
{8, 0, 0, 0, 0, 12, 0, 16},
{8, 0, 16, 24, 0, 12, 0, 32},
{8, 12, 0, 0, 16, 20, 24, 24}, //the 2nd 24 is not used.
{8, 12, 16, 24, 32, 36, 40, 40}, //the 2nd 40 is not used.
}
)
const (
serVer = 1
// Flags bit masks, Byte 5
bigEndianFlagMask = 1 // Reserved.
compressedFlagMask = 2 // Compressed Flag
hipFlagMask = 4 // HIP Flag
supValFlagMask = 8 // num Surprising Values > 0
windowFlagMask = 16 // window length > 0
defaultLgK = 11
)
var (
kxpByteLookup = makeKxpByteLookup()
)
var (
// This defines the preamble space required by each of the formats in units of 4-byte integers.
preIntsDefs = []byte{2, 2, 4, 8, 4, 8, 6, 10}
)
// dataFmt is the format string used to print each integer (index and hex value).
const dataFmt = "%10d %10x"
func getDefinedPreInts(format CpcFormat) int {
return int(preIntsDefs[format])
}
func getPreInts(mem []byte) int {
return int(mem[loFieldPreInts] & 0xFF)
}
func getSerVer(mem []byte) int {
return int(mem[loFieldSerVer] & 0xFF)
}
func hasHip(mem []byte) bool {
return (getFlags(mem) & hipFlagMask) > 0
}
func checkLgK(lgK int) error {
if lgK < minLgK || lgK > maxLgK {
return fmt.Errorf("LgK must be >= %d and <= %d: %d", minLgK, maxLgK, lgK)
}
return nil
}
func checkLgSizeInts(lgSizeInts int) error {
if lgSizeInts < 2 || lgSizeInts > 26 {
return fmt.Errorf("illegal LgSizeInts: %d", lgSizeInts)
}
return nil
}
func checkSeeds(seedA uint64, seedB uint64) error {
if seedA != seedB {
return fmt.Errorf("incompatible seeds: %d %d", seedA, seedB)
}
return nil
}
func (f CpcFormat) String() string {
switch f {
case CpcFormatEmptyMerged:
return "EMPTY_MERGED"
case CpcFormatEmptyHip:
return "EMPTY_HIP"
case CpcFormatSparseHybridMerged:
return "SPARSE_HYBRID_MERGED"
case CpcFormatSparseHybridHip:
return "SPARSE_HYBRID_HIP"
case CpcFormatPinnedSlidingMergedNosv:
return "PINNED_SLIDING_MERGED_NOSV"
case CpcFormatPinnedSlidingHipNosv:
return "PINNED_SLIDING_HIP_NOSV"
case CpcFormatPinnedSlidingMerged:
return "PINNED_SLIDING_MERGED"
case CpcFormatPinnedSlidingHip:
return "PINNED_SLIDING_HIP"
default:
return fmt.Sprintf("UnknownFormat(%d)", f)
}
}
func determineFlavor(lgK int, numCoupons uint64) CpcFlavor {
c := numCoupons
k := uint64(1) << lgK
c2 := c << 1
c8 := c << 3
c32 := c << 5
if c == 0 {
return CpcFlavorEmpty // 0 == C < 1
}
if c32 < (3 * k) {
return CpcFlavorSparse // 1 <= C < 3K/32
}
if c2 < k {
return CpcFlavorHybrid // 3K/32 <= C < K/2
}
if c8 < (27 * k) {
return CpcFlavorPinned // K/2 <= C < 27K/8
}
return CpcFlavorSliding // 27K/8 <= C
}
func (f CpcFlavor) String() string {
switch f {
case CpcFlavorEmpty:
return "EMPTY"
case CpcFlavorSparse:
return "SPARSE"
case CpcFlavorHybrid:
return "HYBRID"
case CpcFlavorPinned:
return "PINNED"
case CpcFlavorSliding:
return "SLIDING"
default:
return fmt.Sprintf("UnknownFlavor(%d)", f)
}
}
func orMatrixIntoMatrix(destMatrix []uint64, destLgK int, srcMatrix []uint64, srcLgK int) {
destMask := (1 << destLgK) - 1
srcK := 1 << srcLgK
for srcRow := 0; srcRow < srcK; srcRow++ {
destMatrix[srcRow&destMask] |= srcMatrix[srcRow]
}
}
func countBitsSetInMatrix(matrix []uint64) uint64 {
count := uint64(0)
for _, v := range matrix {
count += uint64(bits.OnesCount64(v))
}
return count
}
func walkTableUpdatingSketch(dest *CpcSketch, table *pairTable) error {
slots := table.slotsArr
numSlots := 1 << table.lgSizeInts
destMask := ((1<<dest.lgK)-1)<<6 | 63 // downsamples when dest.lgK < srcLgK
stride := int(internal.InverseGolden * float64(numSlots))
if stride == (stride >> 1 << 1) {
stride++
}
for i, j := 0, 0; i < numSlots; i, j = i+1, j+stride {
j &= numSlots - 1
rowCol := slots[j]
if rowCol != -1 {
if err := dest.rowColUpdate(rowCol & destMask); err != nil {
return err
}
}
}
return nil
}
func makeKxpByteLookup() []float64 {
lookup := make([]float64, 256)
for b := 0; b < 256; b++ {
sum := 0.0
for col := 0; col < 8; col++ {
bit := (b >> col) & 1
if bit == 0 {
sumI, _ := internal.InvPow2(col + 1)
sum += sumI
}
}
lookup[b] = sum
}
return lookup
}
func checkLoPreamble(bytes []byte) error {
if err := checkBounds(0, 8, len(bytes)); err != nil {
return err
}
if bytes[loFieldSerVer] != (serVer & 0xFF) {
return fmt.Errorf("SerVer: %d, bytes[loFieldSerVer]: %d", serVer&0xFF, bytes[loFieldSerVer])
}
fmat := getFormat(bytes)
preIntsDef := preIntsDefs[fmat] & 0xFF
if bytes[loFieldPreInts] != preIntsDef {
return fmt.Errorf("preIntsDef: %d, bytes[loFieldPreInts]: %d", preIntsDef, bytes[loFieldPreInts])
}
fam := getFamilyId(bytes)
if fam != internal.FamilyEnum.CPC.Id {
return fmt.Errorf("family: %d, bytes[loFieldFamily]: %d", internal.FamilyEnum.CPC.Id, fam)
}
lgK := getLgK(bytes)
if lgK < 4 || lgK > 26 {
return fmt.Errorf("lgK: %d", lgK)
}
fiCol := bytes[loFieldFiCol] & 0xFF
if fiCol > 63 {
return fmt.Errorf("fiCol: %d", fiCol)
}
return nil
}
func checkBounds(reqOff, reqLen, allocSize int) error {
if reqOff < 0 || reqLen < 0 || reqOff+reqLen < 0 || allocSize-(reqOff+reqLen) < 0 {
return fmt.Errorf("bounds Violation: reqOffset: %d, reqLength: %d, (reqOff + reqLen): %d, allocSize: %d", reqOff, reqLen, reqOff+reqLen, allocSize)
}
return nil
}
func checkCapacity(memCap, expectedCap int) error {
if memCap < expectedCap {
return fmt.Errorf("insufficient Image Bytes = %d, Expected = %d", memCap, expectedCap)
}
return nil
}
func isCompressed(bytes []byte) bool {
return (getFlags(bytes) & compressedFlagMask) > 0
}
func getFamilyId(bytes []byte) int {
return int(bytes[loFieldFamily] & 0xFF)
}
func getLgK(bytes []byte) int {
return int(bytes[loFieldLgK] & 0xFF)
}
func getKxP(mem []byte) float64 {
offset, _ := getHiFieldOffset(getFormat(mem), hiFieldKXP)
return math.Float64frombits(binary.LittleEndian.Uint64(mem[offset:]))
}
func getHipAccum(bytes []byte) float64 {
offset, _ := getHiFieldOffset(getFormat(bytes), hiFieldHipAccum)
return math.Float64frombits(binary.LittleEndian.Uint64(bytes[offset : offset+8]))
}
func getSeedHash(mem []byte) int16 {
return int16(binary.LittleEndian.Uint16(mem[loFieldSeedHash:]))
}
func getFormat(bytes []byte) CpcFormat {
ord := getFormatOrdinal(bytes)
return CpcFormat(ord)
}
func getFormatOrdinal(bytes []byte) int {
flags := getFlags(bytes)
return (flags >> 2) & 0x7
}
func getFlags(bytes []byte) int {
return int(bytes[loFieldFlags] & 0xFF)
}
func getNumCoupons(bytes []byte) uint64 {
offset, _ := getHiFieldOffset(getFormat(bytes), hiFieldNumCoupons)
return uint64(binary.LittleEndian.Uint32(bytes[offset : offset+4]))
}
func getNumSV(bytes []byte) uint64 {
offset, _ := getHiFieldOffset(getFormat(bytes), hiFieldNumSV)
return uint64(binary.LittleEndian.Uint32(bytes[offset : offset+4]))
}
func getSvLengthInts(bytes []byte) int {
offset, _ := getHiFieldOffset(getFormat(bytes), hiFieldSVLengthInts)
return int(binary.LittleEndian.Uint32(bytes[offset : offset+4]))
}
func getWStream(bytes []byte) []int {
offset, _ := getWStreamOffset(bytes)
wLength := getWLengthInts(bytes)
wStream := make([]int, wLength)
for i := 0; i < wLength; i++ {
wStream[i] = int(binary.LittleEndian.Uint32(bytes[offset : offset+4]))
offset += 4
}
return wStream
}
func getWStreamOffset(mem []byte) (int, error) {
// Get the format from the memory.
format := getFormat(mem)
// Check that a window is present.
if getFlags(mem)&windowFlagMask <= 0 {
return 0, fmt.Errorf("window not available for format %s", format.String())
}
offset, err := getHiFieldOffset(format, hiFieldWLengthInts)
if err != nil {
return 0, err
}
// Ensure there are enough bytes.
if len(mem) < offset+4 {
return 0, fmt.Errorf("insufficient memory length to read wLengthInts")
}
wLengthInts := int(binary.LittleEndian.Uint32(mem[offset : offset+4]))
if wLengthInts == 0 {
return 0, fmt.Errorf("wLengthInts cannot be zero")
}
preInts := getPreInts(mem)
return preInts << 2, nil
}
func getSvStream(bytes []byte) []int {
offset, _ := getSvStreamOffset(bytes)
svLengthInts := getSvLengthInts(bytes)
svStream := make([]int, svLengthInts)
for i := 0; i < svLengthInts; i++ {
svStream[i] = int(binary.LittleEndian.Uint32(bytes[offset : offset+4]))
offset += 4
}
return svStream
}
func getSvStreamOffset(mem []byte) (int, error) {
// Get the format from the byte slice.
format := getFormat(mem)
// If the memory does not have an SV stream, return an error.
if getFlags(mem)&supValFlagMask <= 0 {
return 0, fieldError(format, hiFieldSVLengthInts)
}
// Retrieve svLengthInts from the appropriate hi-field.
offset, err := getHiFieldOffset(format, hiFieldSVLengthInts)
if err != nil {
return 0, err
}
svLengthInts := int(binary.LittleEndian.Uint32(mem[offset : offset+4]))
if svLengthInts == 0 {
return 0, fmt.Errorf("svLengthInts cannot be zero")
}
// Retrieve wLengthInts if a window is present.
var wLengthInts int
if getFlags(mem)&windowFlagMask > 0 {
offset, err = getHiFieldOffset(format, hiFieldWLengthInts)
if err != nil {
return 0, err
}
wLengthInts = int(binary.LittleEndian.Uint32(mem[offset : offset+4]))
if wLengthInts == 0 {
return 0, fmt.Errorf("wLengthInts cannot be zero")
}
}
// The offset for the SV stream is computed as: (preInts + wLengthInts) * 4.
preInts := getPreInts(mem)
return (preInts + wLengthInts) << 2, nil
}
func getWLengthInts(bytes []byte) int {
offset, _ := getHiFieldOffset(getFormat(bytes), hiFieldWLengthInts)
return int(binary.LittleEndian.Uint32(bytes[offset : offset+4]))
}
func getFiCol(bytes []byte) int {
return int(bytes[loFieldFiCol] & 0xFF)
}
// getHiFieldOffset returns the defined byte offset from the start of the preamble given the Format and the HiField.
func getHiFieldOffset(format CpcFormat, hiField int) (int, error) {
offset := int(hiFieldOffset[format][hiField]) & 0xFF
if offset == 0 {
return 0, fmt.Errorf("illegal operation: Format = %s, HiField = %d", format.String(), hiField)
}
return offset, nil
}
func determineCorrectOffset(lgK int, numCoupons uint64) int {
c := int(numCoupons)
k := int(1) << lgK
tmp := (c << 3) - (19 * k) // 8C - 19K
if tmp < 0 {
return 0
}
return int(tmp >> (lgK + 3)) // tmp / 8K
}
// putEmptyMerged writes the empty merged preamble into the provided raw byte slice.
// mem is the output byte slice.
// lgK is the sketch parameter, and seedHash is the computed seed hash.
// Returns an error if the capacity is insufficient or if writing fails.
func putEmptyMerged(mem []byte, lgK int, seedHash int16) error {
format := CpcFormatEmptyMerged
preInts := byte(getDefinedPreInts(format))
fiCol := byte(0)
flags := byte((int(format) << 2) | compressedFlagMask)
// Check that mem has at least 8 bytes.
if err := checkCapacity(len(mem), 8); err != nil {
return err
}
return putFirst8(mem, preInts, byte(lgK), fiCol, flags, seedHash)
}
func putEmptyHip(mem []byte, lgK int, seedHash int16) error {
format := CpcFormatEmptyHip
preInts := byte(getDefinedPreInts(format))
fiCol := byte(0)
flags := byte((int(format) << 2) | compressedFlagMask)
if err := checkCapacity(len(mem), 8); err != nil {
return err
}
return putFirst8(mem, preInts, byte(lgK), fiCol, flags, seedHash)
}
func putSparseHybridMerged(mem []byte, lgK int, numCoupons int, svLengthInts int, seedHash int16, svStream []int) error {
// Set the format to SPARSE_HYBRID_MERGED.
format := CpcFormatSparseHybridMerged
preInts := byte(getDefinedPreInts(format))
fiCol := byte(0)
flags := byte((int(format) << 2) | compressedFlagMask)
// Check that the memory slice has enough capacity.
if err := checkCapacity(len(mem), 4*(int(preInts)+svLengthInts)); err != nil {
return err
}
// Write the first 8 bytes (low preamble fields).
if err := putFirst8(mem, preInts, byte(lgK), fiCol, flags, seedHash); err != nil {
return err
}
// Write the high preamble fields.
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(numCoupons))
offset, err = getHiFieldOffset(format, hiFieldSVLengthInts)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(svLengthInts))
offset, err = getSvStreamOffset(mem)
if err != nil {
return err
}
for i := 0; i < svLengthInts; i++ {
binary.LittleEndian.PutUint32(mem[offset+4*i:], uint32(svStream[i]))
}
return nil
}
func putSparseHybridHip(mem []byte, lgK int, numCoupons, svLengthInts int, kxp, hipAccum float64, seedHash int16, svStream []int) error {
// Set the format.
format := CpcFormatSparseHybridHip
preInts := byte(getDefinedPreInts(format))
fiCol := byte(0)
flags := byte((int(format) << 2) | compressedFlagMask)
// Check that mem has enough capacity: 4 bytes per preInt + 4 bytes per sv integer.
if err := checkCapacity(len(mem), 4*(int(preInts)+svLengthInts)); err != nil {
return err
}
// Write the low preamble fields.
if err := putFirst8(mem, preInts, byte(lgK), fiCol, flags, seedHash); err != nil {
return err
}
// Write the high preamble fields.
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(numCoupons))
offset, err = getHiFieldOffset(format, hiFieldSVLengthInts)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(svLengthInts))
offset, err = getHiFieldOffset(format, hiFieldKXP)
if err != nil {
return err
}
binary.LittleEndian.PutUint64(mem[offset:], math.Float64bits(kxp))
offset, err = getHiFieldOffset(format, hiFieldHipAccum)
if err != nil {
return err
}
binary.LittleEndian.PutUint64(mem[offset:], math.Float64bits(hipAccum))
// Write the SV stream into memory.
offset, err = getSvStreamOffset(mem)
if err != nil {
return err
}
for i := 0; i < svLengthInts; i++ {
binary.LittleEndian.PutUint32(mem[offset+4*i:], uint32(svStream[i]))
}
return nil
}
func putPinnedSlidingMergedNoSv(mem []byte, lgK int, fiCol int, numCoupons int, wLengthInts int, seedHash int16, wStream []int) error {
// Set the format to PINNED_SLIDING_MERGED_NOSV.
format := CpcFormatPinnedSlidingMergedNosv
preInts := byte(getDefinedPreInts(format))
flags := byte((int(format) << 2) | compressedFlagMask)
// Check that the memory slice has enough capacity.
if err := checkCapacity(len(mem), 4*(int(preInts)+wLengthInts)); err != nil {
return err
}
// Write the low preamble fields.
if err := putFirst8(mem, preInts, byte(lgK), byte(fiCol), flags, seedHash); err != nil {
return err
}
// Write the high preamble fields.
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(numCoupons))
offset, err = getHiFieldOffset(format, hiFieldWLengthInts)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(wLengthInts))
// Write the window stream array.
offset, err = getWStreamOffset(mem)
if err != nil {
return err
}
for i := 0; i < wLengthInts; i++ {
binary.LittleEndian.PutUint32(mem[offset+4*i:], uint32(wStream[i]))
}
return nil
}
func putPinnedSlidingHipNoSv(mem []byte, lgK int, fiCol int, numCoupons int, wLengthInts int, kxp float64, hipAccum float64, seedHash int16, wStream []int) error {
// Set the format to PINNED_SLIDING_HIP_NOSV.
format := CpcFormatPinnedSlidingHipNosv
preInts := byte(getDefinedPreInts(format))
flags := byte((int(format) << 2) | compressedFlagMask)
// Check that the memory slice has enough capacity.
if err := checkCapacity(len(mem), 4*(int(preInts)+wLengthInts)); err != nil {
return err
}
// Write the low preamble fields.
if err := putFirst8(mem, preInts, byte(lgK), byte(fiCol), flags, seedHash); err != nil {
return err
}
// Write the high preamble fields.
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(numCoupons))
offset, err = getHiFieldOffset(format, hiFieldWLengthInts)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(wLengthInts))
offset, err = getHiFieldOffset(format, hiFieldKXP)
if err != nil {
return err
}
binary.LittleEndian.PutUint64(mem[offset:], math.Float64bits(kxp))
offset, err = getHiFieldOffset(format, hiFieldHipAccum)
if err != nil {
return err
}
binary.LittleEndian.PutUint64(mem[offset:], math.Float64bits(hipAccum))
// Write the window stream array.
offset, err = getWStreamOffset(mem)
if err != nil {
return err
}
for i := 0; i < wLengthInts; i++ {
binary.LittleEndian.PutUint32(mem[offset+4*i:], uint32(wStream[i]))
}
return nil
}
func putPinnedSlidingMerged(mem []byte, lgK int, fiCol int, numCoupons int, numSv int, svLengthInts int, wLengthInts int, seedHash int16, svStream []int, wStream []int) error {
// Set the format.
format := CpcFormatPinnedSlidingMerged
preInts := byte(getDefinedPreInts(format))
flags := byte((int(format) << 2) | compressedFlagMask)
// Check that the memory slice has enough capacity.
if err := checkCapacity(len(mem), 4*(int(preInts)+svLengthInts+wLengthInts)); err != nil {
return err
}
// Write the low preamble fields.
if err := putFirst8(mem, preInts, byte(lgK), byte(fiCol), flags, seedHash); err != nil {
return err
}
// Write high preamble fields.
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(numCoupons))
offset, err = getHiFieldOffset(format, hiFieldNumSV)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(numSv))
offset, err = getHiFieldOffset(format, hiFieldSVLengthInts)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(svLengthInts))
offset, err = getHiFieldOffset(format, hiFieldWLengthInts)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(wLengthInts))
// Write the SV stream array.
offset, err = getSvStreamOffset(mem)
if err != nil {
return err
}
for i := 0; i < svLengthInts; i++ {
binary.LittleEndian.PutUint32(mem[offset+4*i:], uint32(svStream[i]))
}
// Write the W stream array.
offset, err = getWStreamOffset(mem)
if err != nil {
return err
}
for i := 0; i < wLengthInts; i++ {
binary.LittleEndian.PutUint32(mem[offset+4*i:], uint32(wStream[i]))
}
return nil
}
func putPinnedSlidingHip(mem []byte, lgK int, fiCol int, numCoupons int, numSv int, kxp float64, hipAccum float64, svLengthInts int, wLengthInts int, seedHash int16, svStream []int, wStream []int) error {
// Set the format.
format := CpcFormatPinnedSlidingHip
preInts := byte(getDefinedPreInts(format))
flags := byte((int(format) << 2) | compressedFlagMask)
// Check that the memory slice has enough capacity.
if err := checkCapacity(len(mem), 4*(int(preInts)+svLengthInts+wLengthInts)); err != nil {
return err
}
// Write the low preamble fields.
if err := putFirst8(mem, preInts, byte(lgK), byte(fiCol), flags, seedHash); err != nil {
return err
}
// Write high preamble fields.
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(numCoupons))
offset, err = getHiFieldOffset(format, hiFieldNumSV)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(numSv))
offset, err = getHiFieldOffset(format, hiFieldKXP)
if err != nil {
return err
}
binary.LittleEndian.PutUint64(mem[offset:], math.Float64bits(kxp))
offset, err = getHiFieldOffset(format, hiFieldHipAccum)
if err != nil {
return err
}
binary.LittleEndian.PutUint64(mem[offset:], math.Float64bits(hipAccum))
offset, err = getHiFieldOffset(format, hiFieldSVLengthInts)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(svLengthInts))
offset, err = getHiFieldOffset(format, hiFieldWLengthInts)
if err != nil {
return err
}
binary.LittleEndian.PutUint32(mem[offset:], uint32(wLengthInts))
// Write the SV stream array.
offset, err = getSvStreamOffset(mem)
if err != nil {
return err
}
for i := 0; i < svLengthInts; i++ {
binary.LittleEndian.PutUint32(mem[offset+4*i:], uint32(svStream[i]))
}
// Write the W stream array.
offset, err = getWStreamOffset(mem)
if err != nil {
return err
}
for i := 0; i < wLengthInts; i++ {
binary.LittleEndian.PutUint32(mem[offset+4*i:], uint32(wStream[i]))
}
return nil
}
func putFirst8(mem []byte, preInts, lgK, fiCol, flags byte, seedHash int16) error {
// Compute the total number of bytes to clear (4 bytes per preInt)
requiredBytes := int(preInts) * 4
if err := checkCapacity(len(mem), requiredBytes); err != nil {
return err
}
// Clear the entire preamble region
for i := 0; i < requiredBytes; i++ {
mem[i] = 0
}
// Write the low preamble fields at their fixed offsets.
mem[loFieldPreInts] = preInts
mem[loFieldSerVer] = serVer
mem[loFieldFamily] = byte(internal.FamilyEnum.CPC.Id)
mem[loFieldLgK] = lgK
mem[loFieldFiCol] = fiCol
mem[loFieldFlags] = flags
// Write the seed hash (2 bytes) at its offset (bytes 6-7).
binary.LittleEndian.PutUint16(mem[loFieldSeedHash:], uint16(seedHash))
return nil
}
func CpcSketchToString(mem []byte, detail bool) (string, error) {
LS := "\n"
capBytes := len(mem)
// Low preamble fields (first 8 bytes)
preInts := int(mem[loFieldPreInts]) & 0xFF
serVerVal := int(mem[loFieldSerVer]) & 0xFF
family := getFamilyId(mem)
lgK := int(mem[loFieldLgK]) & 0xFF
fiCol := int(mem[loFieldFiCol]) & 0xFF
flags := int(mem[loFieldFlags]) & 0xFF
seedHash := int(binary.LittleEndian.Uint16(mem[loFieldSeedHash:]))
seedHashStr := fmt.Sprintf("%x", seedHash)
flagsStr := zeroPad(fmt.Sprintf("%b", flags), 8) + ", " + fmt.Sprintf("%d", flags)
bigEndian := (flags & bigEndianFlagMask) > 0
compressed := (flags & compressedFlagMask) > 0
hasHipVal := (flags & hipFlagMask) > 0
hasSVVal := (flags & supValFlagMask) > 0
hasWindowVal := (flags & windowFlagMask) > 0
formatOrdinal := (flags >> 2) & 0x7
format := CpcFormat(formatOrdinal)
nativeOrderStr := "LittleEndian"
var numCoupons, numSv, winOffset, svLengthInts, wLengthInts int64
var kxp, hipAccum float64
var svStreamStart, wStreamStart int64
var reqBytes int64
sb := &strings.Builder{}
sb.WriteString(LS)
sb.WriteString("### CPC SKETCH IMAGE - PREAMBLE:" + LS)
sb.WriteString(fmt.Sprintf("Format : %s%s", format.String(), LS))
sb.WriteString(fmt.Sprintf("Byte 0: Preamble Ints : %d%s", preInts, LS))
sb.WriteString(fmt.Sprintf("Byte 1: SerVer : %d%s", serVerVal, LS))
sb.WriteString(fmt.Sprintf("Byte 2: Family : %d%s", family, LS))
sb.WriteString(fmt.Sprintf("Byte 3: lgK : %d%s", lgK, LS))
sb.WriteString(fmt.Sprintf("Byte 4: First Interesting Col : %d%s", fiCol, LS))
sb.WriteString(fmt.Sprintf("Byte 5: Flags : %s%s", flagsStr, LS))
sb.WriteString(fmt.Sprintf(" BIG_ENDIAN_STORAGE : %t%s", bigEndian, LS))
sb.WriteString(fmt.Sprintf(" (Native Byte Order) : %s%s", nativeOrderStr, LS))
sb.WriteString(fmt.Sprintf(" Compressed : %t%s", compressed, LS))
sb.WriteString(fmt.Sprintf(" Has HIP : %t%s", hasHipVal, LS))
sb.WriteString(fmt.Sprintf(" Has Surprising Values : %t%s", hasSVVal, LS))
sb.WriteString(fmt.Sprintf(" Has Window Values : %t%s", hasWindowVal, LS))
sb.WriteString(fmt.Sprintf("Byte 6, 7: Seed Hash : %s%s", seedHashStr, LS))
var flavor string
switch format {
case CpcFormatEmptyMerged, CpcFormatEmptyHip:
flavor = determineFlavor(lgK, uint64(numCoupons)).String()
sb.WriteString(fmt.Sprintf("Flavor : %s%s", flavor, LS))
case CpcFormatSparseHybridMerged:
// NUM_COUPONS
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return "", err
}
numCoupons = int64(binary.LittleEndian.Uint32(mem[offset:]))
numSv = numCoupons
// SV_LENGTH_INTS
offset, err = getHiFieldOffset(format, hiFieldSVLengthInts)
if err != nil {
return "", err
}
svLengthInts = int64(binary.LittleEndian.Uint32(mem[offset:]))
// SV Stream offset
offset, err = getSvStreamOffset(mem)
if err != nil {
return "", err
}
svStreamStart = int64(offset)
reqBytes = svStreamStart + (svLengthInts << 2)
flavor = determineFlavor(lgK, uint64(numCoupons)).String()
sb.WriteString(fmt.Sprintf("Flavor : %s%s", flavor, LS))
sb.WriteString(fmt.Sprintf("Num Coupons : %d%s", numCoupons, LS))
sb.WriteString(fmt.Sprintf("Num SV : %d%s", numSv, LS))
sb.WriteString(fmt.Sprintf("SV Length Ints : %d%s", svLengthInts, LS))
sb.WriteString(fmt.Sprintf("SV Stream Start : %d%s", svStreamStart, LS))
case CpcFormatSparseHybridHip:
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return "", err
}
numCoupons = int64(binary.LittleEndian.Uint32(mem[offset:]))
numSv = numCoupons
offset, err = getHiFieldOffset(format, hiFieldSVLengthInts)
if err != nil {
return "", err
}
svLengthInts = int64(binary.LittleEndian.Uint32(mem[offset:]))
offset, err = getSvStreamOffset(mem)
if err != nil {
return "", err
}
svStreamStart = int64(offset)
offset, err = getHiFieldOffset(format, hiFieldKXP)
if err != nil {
return "", err
}
kxp = math.Float64frombits(binary.LittleEndian.Uint64(mem[offset:]))
offset, err = getHiFieldOffset(format, hiFieldHipAccum)
if err != nil {
return "", err
}
hipAccum = math.Float64frombits(binary.LittleEndian.Uint64(mem[offset:]))
reqBytes = svStreamStart + (svLengthInts << 2)
flavor = determineFlavor(lgK, uint64(numCoupons)).String()
sb.WriteString(fmt.Sprintf("Flavor : %s%s", flavor, LS))
sb.WriteString(fmt.Sprintf("Num Coupons : %d%s", numCoupons, LS))
sb.WriteString(fmt.Sprintf("Num SV : %d%s", numSv, LS))
sb.WriteString(fmt.Sprintf("SV Length Ints : %d%s", svLengthInts, LS))
sb.WriteString(fmt.Sprintf("SV Stream Start : %d%s", svStreamStart, LS))
sb.WriteString(fmt.Sprintf("KxP : %f%s", kxp, LS))
sb.WriteString(fmt.Sprintf("HipAccum : %f%s", hipAccum, LS))
case CpcFormatPinnedSlidingMergedNosv:
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return "", err
}
numCoupons = int64(binary.LittleEndian.Uint32(mem[offset:]))
winOffset = int64(determineCorrectOffset(lgK, uint64(numCoupons)))
offset, err = getHiFieldOffset(format, hiFieldWLengthInts)
if err != nil {
return "", err
}
wLengthInts = int64(binary.LittleEndian.Uint32(mem[offset:]))
offset, err = getWStreamOffset(mem)
if err != nil {
return "", err
}
wStreamStart = int64(offset)
reqBytes = wStreamStart + (wLengthInts << 2)
flavor = determineFlavor(lgK, uint64(numCoupons)).String()
sb.WriteString(fmt.Sprintf("Flavor : %s%s", flavor, LS))
sb.WriteString(fmt.Sprintf("Num Coupons : %d%s", numCoupons, LS))
sb.WriteString(fmt.Sprintf("Window Offset : %d%s", winOffset, LS))
sb.WriteString(fmt.Sprintf("Window Length Ints : %d%s", wLengthInts, LS))
sb.WriteString(fmt.Sprintf("Window Stream Start : %d%s", wStreamStart, LS))
case CpcFormatPinnedSlidingHipNosv:
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return "", err
}
numCoupons = int64(binary.LittleEndian.Uint32(mem[offset:]))
winOffset = int64(determineCorrectOffset(lgK, uint64(numCoupons)))
offset, err = getHiFieldOffset(format, hiFieldWLengthInts)
if err != nil {
return "", err
}
wLengthInts = int64(binary.LittleEndian.Uint32(mem[offset:]))
offset, err = getWStreamOffset(mem)
if err != nil {
return "", err
}
wStreamStart = int64(offset)
offset, err = getHiFieldOffset(format, hiFieldKXP)
if err != nil {
return "", err
}
kxp = math.Float64frombits(binary.LittleEndian.Uint64(mem[offset:]))
offset, err = getHiFieldOffset(format, hiFieldHipAccum)
if err != nil {
return "", err
}
hipAccum = math.Float64frombits(binary.LittleEndian.Uint64(mem[offset:]))
reqBytes = wStreamStart + (wLengthInts << 2)
flavor = determineFlavor(lgK, uint64(numCoupons)).String()
sb.WriteString(fmt.Sprintf("Flavor : %s%s", flavor, LS))
sb.WriteString(fmt.Sprintf("Num Coupons : %d%s", numCoupons, LS))
sb.WriteString(fmt.Sprintf("Window Offset : %d%s", winOffset, LS))
sb.WriteString(fmt.Sprintf("Window Length Ints : %d%s", wLengthInts, LS))
sb.WriteString(fmt.Sprintf("Window Stream Start : %d%s", wStreamStart, LS))
sb.WriteString(fmt.Sprintf("KxP : %f%s", kxp, LS))
sb.WriteString(fmt.Sprintf("HipAccum : %f%s", hipAccum, LS))
case CpcFormatPinnedSlidingMerged:
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return "", err
}
numCoupons = int64(binary.LittleEndian.Uint32(mem[offset:]))
winOffset = int64(determineCorrectOffset(lgK, uint64(numCoupons)))
offset, err = getHiFieldOffset(format, hiFieldWLengthInts)
if err != nil {
return "", err
}
wLengthInts = int64(binary.LittleEndian.Uint32(mem[offset:]))
offset, err = getHiFieldOffset(format, hiFieldNumSV)
if err != nil {
return "", err
}
numSv = int64(binary.LittleEndian.Uint32(mem[offset:]))
offset, err = getHiFieldOffset(format, hiFieldSVLengthInts)
if err != nil {
return "", err
}
svLengthInts = int64(binary.LittleEndian.Uint32(mem[offset:]))
offset, err = getWStreamOffset(mem)
if err != nil {
return "", err
}
wStreamStart = int64(offset)
offset, err = getSvStreamOffset(mem)
if err != nil {
return "", err
}
svStreamStart = int64(offset)
reqBytes = svStreamStart + (svLengthInts << 2)
flavor = determineFlavor(lgK, uint64(numCoupons)).String()
sb.WriteString(fmt.Sprintf("Flavor : %s%s", flavor, LS))
sb.WriteString(fmt.Sprintf("Num Coupons : %d%s", numCoupons, LS))
sb.WriteString(fmt.Sprintf("Num SV : %d%s", numSv, LS))
sb.WriteString(fmt.Sprintf("SV Length Ints : %d%s", svLengthInts, LS))
sb.WriteString(fmt.Sprintf("SV Stream Start : %d%s", svStreamStart, LS))
sb.WriteString(fmt.Sprintf("Window Offset : %d%s", winOffset, LS))
sb.WriteString(fmt.Sprintf("Window Length Ints : %d%s", wLengthInts, LS))
sb.WriteString(fmt.Sprintf("Window Stream Start : %d%s", wStreamStart, LS))
case CpcFormatPinnedSlidingHip:
offset, err := getHiFieldOffset(format, hiFieldNumCoupons)
if err != nil {
return "", err
}
numCoupons = int64(binary.LittleEndian.Uint32(mem[offset:]))
winOffset = int64(determineCorrectOffset(lgK, uint64(numCoupons)))
offset, err = getHiFieldOffset(format, hiFieldWLengthInts)
if err != nil {
return "", err
}
wLengthInts = int64(binary.LittleEndian.Uint32(mem[offset:]))
offset, err = getHiFieldOffset(format, hiFieldNumSV)
if err != nil {
return "", err
}
numSv = int64(binary.LittleEndian.Uint32(mem[offset:]))
offset, err = getHiFieldOffset(format, hiFieldSVLengthInts)
if err != nil {
return "", err
}
svLengthInts = int64(binary.LittleEndian.Uint32(mem[offset:]))
offset, err = getWStreamOffset(mem)
if err != nil {
return "", err
}
wStreamStart = int64(offset)
offset, err = getSvStreamOffset(mem)
if err != nil {
return "", err
}
svStreamStart = int64(offset)
offset, err = getHiFieldOffset(format, hiFieldKXP)
if err != nil {
return "", err
}
kxp = math.Float64frombits(binary.LittleEndian.Uint64(mem[offset:]))
offset, err = getHiFieldOffset(format, hiFieldHipAccum)
if err != nil {
return "", err
}
hipAccum = math.Float64frombits(binary.LittleEndian.Uint64(mem[offset:]))
reqBytes = svStreamStart + (svLengthInts << 2)
flavor = determineFlavor(lgK, uint64(numCoupons)).String()
sb.WriteString(fmt.Sprintf("Flavor : %s%s", flavor, LS))
sb.WriteString(fmt.Sprintf("Num Coupons : %d%s", numCoupons, LS))
sb.WriteString(fmt.Sprintf("Num SV : %d%s", numSv, LS))
sb.WriteString(fmt.Sprintf("SV Length Ints : %d%s", svLengthInts, LS))
sb.WriteString(fmt.Sprintf("SV Stream Start : %d%s", svStreamStart, LS))
sb.WriteString(fmt.Sprintf("Window Offset : %d%s", winOffset, LS))
sb.WriteString(fmt.Sprintf("Window Length Ints : %d%s", wLengthInts, LS))
sb.WriteString(fmt.Sprintf("Window Stream Start : %d%s", wStreamStart, LS))
sb.WriteString(fmt.Sprintf("KxP : %f%s", kxp, LS))
sb.WriteString(fmt.Sprintf("HipAccum : %f%s", hipAccum, LS))
}
sb.WriteString(fmt.Sprintf("Actual Bytes : %d%s", capBytes, LS))
sb.WriteString(fmt.Sprintf("Required Bytes : %d%s", reqBytes, LS))
if detail {
sb.WriteString(LS + "### CPC SKETCH IMAGE - DATA" + LS)
if wLengthInts > 0 {
sb.WriteString(LS + "Window Stream:" + LS)
listData(mem, int(wStreamStart), int(wLengthInts), sb)
}
if svLengthInts > 0 {
sb.WriteString(LS + "SV Stream:" + LS)
listData(mem, int(svStreamStart), int(svLengthInts), sb)
}
}
sb.WriteString("### END CPC SKETCH IMAGE" + LS)
return sb.String(), nil
}
// zeroPad returns the given string s prepended with zeros so that the total length is at least fieldLength.
// If s is already fieldLength or longer, it returns s unchanged.
func zeroPad(s string, fieldLength int) string {
return characterPad(s, fieldLength, '0', false)
}
// characterPad returns s padded with the given padChar to reach fieldLength characters.
// If append is false, the padding is added to the beginning of s; if true, to the end.
// If s is already at least fieldLength characters long, s is returned unchanged.
func characterPad(s string, fieldLength int, padChar rune, append bool) string {
if len(s) >= fieldLength {
return s
}
padCount := fieldLength - len(s)
pad := strings.Repeat(string(padChar), padCount)
if append {
return s + pad
}
return pad + s
}
// listData reads lengthInts integers from mem starting at offsetBytes,
// and appends each formatted value (using dataFmt) to the provided strings.Builder.
func listData(mem []byte, offsetBytes, lengthInts int, sb *strings.Builder) {
memCap := len(mem)
expectedCap := offsetBytes + 4*lengthInts
if err := checkCapacity(memCap, expectedCap); err != nil {
panic(err)
}
for i := 0; i < lengthInts; i++ {
start := offsetBytes + 4*i
// Read 4 bytes as an uint32 (assuming little-endian).
value := int(binary.LittleEndian.Uint32(mem[start : start+4]))
sb.WriteString(fmt.Sprintf(dataFmt, i, value))
sb.WriteString("\n")
}
}