pkg/encoding/xor.go (122 lines of code) (raw):

// Licensed to Apache Software Foundation (ASF) under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Apache Software Foundation (ASF) licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package encoding import ( "math/bits" ) const ( ctrlBitsNoContainMeaningful = 0x2 ctrlBitsContainMeaningful = 0x3 ) // XOREncoder intends to compress uint64 data // https://www.vldb.org/pvldb/vol8/p1816-teller.pdf type XOREncoder struct { bw *Writer preVal uint64 leading int trailing int first bool } // NewXOREncoder creates xor zstdEncoder for compressing uint64 data. func NewXOREncoder(bw *Writer) *XOREncoder { return &XOREncoder{ bw: bw, first: true, } } func (e *XOREncoder) Write(val uint64) { if e.first { e.first = false e.preVal = val e.bw.WriteBits(val, 64) return } delta := val ^ e.preVal e.preVal = val if delta == 0 { e.bw.WriteBool(false) return } leading := bits.LeadingZeros64(delta) trailing := bits.TrailingZeros64(delta) if leading >= e.leading && trailing >= e.trailing { // write control '10' to reuse previous block meaningful bits e.bw.WriteBits(ctrlBitsNoContainMeaningful, 2) e.bw.WriteBits(delta>>uint(e.trailing), 64-e.leading-e.trailing) } else { // write control '11' to create a new block meaningful bits e.bw.WriteBits(ctrlBitsContainMeaningful, 2) meaningfulLen := 64 - leading - trailing e.bw.WriteBits(uint64(leading), 6) // meaningfulLen is at least 1, so we can subtract 1 from it and encode it in 6 bits e.bw.WriteBits(uint64(meaningfulLen-1), 6) e.bw.WriteBits(delta>>uint(trailing), meaningfulLen) e.leading = leading e.trailing = trailing } } // XORDecoder decodes buffer to uint64 values using xor compress. type XORDecoder struct { err error br *Reader val uint64 leading uint64 trailing uint64 first bool } // NewXORDecoder create zstdDecoder decompress buffer using xor. func NewXORDecoder(br *Reader) *XORDecoder { s := &XORDecoder{ br: br, first: true, } return s } // Reset resets the underlying buffer to decode. func (d *XORDecoder) Reset() { d.first = true d.leading = 0 d.trailing = 0 d.val = 0 } // Next return if zstdDecoder has value in buffer using xor, do uncompress logic in next method, // data format reference zstdEncoder format. func (d *XORDecoder) Next() bool { if d.first { // read first value d.first = false d.val, d.err = d.br.ReadBits(64) return d.err == nil } var b bool // read delta control bit b, d.err = d.br.ReadBool() if d.err != nil { return false } if !b { return true } ctrlBits := ctrlBitsNoContainMeaningful // read control bit b, d.err = d.br.ReadBool() if d.err != nil { return false } if b { ctrlBits |= 1 } var blockSize uint64 if ctrlBits == ctrlBitsNoContainMeaningful { blockSize = 64 - d.leading - d.trailing } else { // read leading and trailing, because block is diff with previous d.leading, d.err = d.br.ReadBits(6) if d.err != nil { return false } blockSize, d.err = d.br.ReadBits(6) if d.err != nil { return false } blockSize++ d.trailing = 64 - d.leading - blockSize } delta, err := d.br.ReadBits(int(blockSize)) if err != nil { d.err = err return false } val := delta << d.trailing d.val ^= val return true } // Value returns uint64 from buffer. func (d *XORDecoder) Value() uint64 { return d.val } // Err returns error raised in Next(). func (d *XORDecoder) Err() error { return d.err }