internal/murmur3.go (142 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package internal
const (
C1 = 0x87c37b91114253d5
C2 = 0x4cf5ad432745937f
)
type SimpleMurmur3 struct {
h1 uint64
h2 uint64
}
func HashCharSliceMurmur3(key []byte, offsetChars int, lengthChars int, seed uint64) (uint64, uint64) {
hashState := SimpleMurmur3{h1: seed, h2: seed}
// Number of full 128-bit blocks of 8 chars.
// Possible exclusion of a remainder of up to 7 chars.
nblocks := lengthChars >> 3 //chars / 8
// Process the 128-bit blocks (the body) into the hash
for i := 0; i < nblocks; i++ {
k1 := getUint64(key, offsetChars+(i<<3), 4) //offsetChars + 0, 8, 16, ...
k2 := getUint64(key, offsetChars+(i<<3)+4, 4) //offsetChars + 4, 12, 20, ...
hashState.blockMix128(k1, k2)
}
// Get the tail index wrt hashed portion, remainder length
tail := nblocks << 3 // 8 chars per block
rem := lengthChars - tail // remainder chars: 0,1,2,3,4,5,6,7
// Get the tail
k1 := uint64(0)
k2 := uint64(0)
if rem > 4 {
k1 = getUint64(key, offsetChars+tail, 4)
k2 = getUint64(key, offsetChars+tail+4, rem-4)
} else {
if rem != 0 {
k1 = getUint64(key, offsetChars+tail, rem)
}
}
// Mix the tail into the hash and return
return hashState.finalMix128(k1, k2, uint64(lengthChars)<<1) //convert to bytes
}
func HashInt32SliceMurmur3(key []int32, offsetInts int, lengthInts int, seed uint64) (uint64, uint64) {
hashState := SimpleMurmur3{h1: seed, h2: seed}
// Number of full 128-bit blocks of 4 ints.
// Possible exclusion of a remainder of up to 3 ints.
nblocks := lengthInts >> 2 //ints / 4
// Process the 128-bit blocks (the body) into the hash
for i := 0; i < nblocks; i++ {
k1 := uint64(key[offsetInts+(i<<2)]) //offsetInts + 0, 4, 8, ...
k2 := uint64(key[offsetInts+(i<<2)+2]) //offsetInts + 2, 6, 10, ...
hashState.blockMix128(k1, k2)
}
// Get the tail index wrt hashed portion, remainder length
tail := nblocks << 2 // 4 ints per block
rem := lengthInts - tail // remainder ints: 0,1,2,3
// Get the tail
k1 := uint64(0)
k2 := uint64(0)
if rem > 2 {
k1 = uint64(key[offsetInts+tail]) //k2 -> whole; k1 -> partial
k2 = uint64(key[offsetInts+tail+2])
} else {
if rem != 0 {
k1 = uint64(key[offsetInts+tail]) //k1 -> whole(2), partial(1) or 0; k2 == 0
}
}
// Mix the tail into the hash and return
return hashState.finalMix128(k1, k2, uint64(lengthInts)<<2) //convert to bytes
}
func HashInt64SliceMurmur3(key []int64, offsetLongs int, lengthLongs int, seed uint64) (uint64, uint64) {
hashState := SimpleMurmur3{h1: seed, h2: seed}
// Number of full 128-bit blocks of 2 longs (the body).
// Possible exclusion of a remainder of 1 long.
nblocks := lengthLongs >> 1 //longs / 2
// Process the 128-bit blocks (the body) into the hash
for i := 0; i < nblocks; i++ {
k1 := uint64(key[offsetLongs+(i<<1)]) //offsetLongs + 0, 2, 4, ...
k2 := uint64(key[offsetLongs+(i<<1)+1]) //offsetLongs + 1, 3, 5, ...
hashState.blockMix128(k1, k2)
}
// Get the tail index wrt hashed portion, remainder length
tail := nblocks << 1 // 2 longs / block
rem := lengthLongs - tail // remainder longs: 0,1
// Get the tail
k1 := uint64(0)
if rem != 0 {
k1 = uint64(key[offsetLongs+tail]) //k2 -> 0
}
return hashState.finalMix128(k1, 0, uint64(lengthLongs)<<3)
}
func HashByteArrMurmur3(key []byte, offsetBytes int, lengthBytes int, seed uint64) (uint64, uint64) {
hashState := SimpleMurmur3{h1: seed, h2: seed}
// Number of full 128-bit blocks of 16 bytes.
// Possible exclusion of a remainder of up to 15 bytes.
nblocks := lengthBytes >> 4 //bytes / 16
// Process the 128-bit blocks (the body) into the hash
for i := 0; i < nblocks; i++ {
k1 := getUint64(key, offsetBytes+(i<<4), 8) //0, 16, 32, ...
k2 := getUint64(key, offsetBytes+(i<<4)+8, 8) //8, 24, 40, ...
hashState.blockMix128(k1, k2)
}
// Get the tail index wrt hashed portion, remainder length
tail := nblocks << 4 // 16 bytes / block
rem := lengthBytes - tail // remainder bytes: 0,1,...,15
// Get the tail
k1 := uint64(0)
k2 := uint64(0)
if rem > 8 {
k1 = getUint64(key, offsetBytes+tail, 8)
k2 = getUint64(key, offsetBytes+tail+8, rem-8)
} else {
if rem != 0 {
k1 = getUint64(key, offsetBytes+tail, rem)
}
}
// Mix the tail into the hash and return
return hashState.finalMix128(k1, k2, uint64(lengthBytes))
}
func getUint64(bArr []byte, index int, rem int) uint64 {
var out uint64
for i := rem - 1; i >= 0; i-- { //i= 7,6,5,4,3,2,1,0
b := bArr[index+i]
out ^= uint64(b&0xFF) << uint(i*8) //equivalent to |=
}
return out
}
func mixK1(k1 uint64) uint64 {
k1 *= C1
k1 = (k1 << 31) | (k1 >> (64 - 31))
k1 *= C2
return k1
}
func mixK2(k2 uint64) uint64 {
k2 *= C2
k2 = (k2 << 33) | (k2 >> (64 - 33))
k2 *= C1
return k2
}
func finalMix64(h uint64) uint64 {
h ^= h >> 33
h *= 0xff51afd7ed558ccd
h ^= h >> 33
h *= 0xc4ceb9fe1a85ec53
h ^= h >> 33
return h
}
func (m *SimpleMurmur3) blockMix128(k1, k2 uint64) {
m.h1 ^= mixK1(k1)
m.h1 = (m.h1 << 27) | (m.h1 >> (64 - 27))
m.h1 += m.h2
m.h1 = m.h1*5 + 0x52dce729
m.h2 ^= mixK2(k2)
m.h2 = (m.h2 << 31) | (m.h2 >> (64 - 31))
m.h2 += m.h1
m.h2 = m.h2*5 + 0x38495ab5
}
func (m *SimpleMurmur3) finalMix128(k1, k2, inputLengthBytes uint64) (uint64, uint64) {
m.h1 ^= mixK1(k1)
m.h2 ^= mixK2(k2)
m.h1 ^= inputLengthBytes
m.h2 ^= inputLengthBytes
m.h1 += m.h2
m.h2 += m.h1
m.h1 = finalMix64(m.h1)
m.h2 = finalMix64(m.h2)
m.h1 += m.h2
m.h2 += m.h1
return m.h1, m.h2
}