merkledb/benches/rolling_hash_benchmark.rs (245 lines of code) (raw):

use std::collections::VecDeque; use std::fmt; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use lazy_static::lazy_static; use rand_chacha::rand_core::{RngCore, SeedableRng}; use rand_chacha::ChaChaRng; pub trait RollingHash { fn clone(&self) -> Box<dyn RollingHash>; fn current_hash(&self) -> u32; fn reset(&mut self); fn accumulate_byte(&mut self, b: u8) -> u32; fn at_chunk_boundary(&self) -> bool; fn accumulate(&mut self, buf: &[u8]) -> u32 { for &c in buf.iter() { self.accumulate_byte(c); } self.current_hash() } fn scan_to_chunk_boundary(&mut self, buf: &[u8]) -> (u32, usize) { for (u, &c) in buf.iter().enumerate() { self.accumulate_byte(c); if self.at_chunk_boundary() { return (self.current_hash(), u); } } (self.current_hash(), buf.len()) } } #[derive(Debug)] pub struct SumHash { mask: u32, bytehash: [u32; 256], history: VecDeque<u8>, windowsize: usize, current_hash: u32, } impl SumHash { /// if seed is 0, each byte is added without a byte hash. i.e. /// the hash of a window is exactly the sum of all the bytes. /// Otherwise, a RNG is used to generate a fixed random value for each /// byte. pub fn init(mask: u32, windowsize: usize, seed: u64) -> SumHash { let mut bytehash: [u32; 256] = [0; 256]; if seed > 0 { let mut rng = ChaChaRng::seed_from_u64(seed); #[allow(clippy::needless_range_loop)] for i in 0..256 { bytehash[i] = rng.next_u32(); } } else { #[allow(clippy::needless_range_loop)] for i in 0..256 { bytehash[i] = i as u32; } } assert!(windowsize > 0); SumHash { mask, bytehash, history: VecDeque::new(), windowsize, current_hash: 0, } } } impl RollingHash for SumHash { fn clone(&self) -> Box<dyn RollingHash> { Box::new(SumHash { mask: self.mask, bytehash: self.bytehash, history: self.history.clone(), windowsize: self.windowsize, current_hash: self.current_hash, }) } fn current_hash(&self) -> u32 { self.current_hash } fn reset(&mut self) { self.current_hash = 0_u32; self.history.clear(); } fn at_chunk_boundary(&self) -> bool { self.current_hash & self.mask == 0 } fn accumulate_byte(&mut self, b: u8) -> u32 { self.history.push_back(b); self.current_hash = self.current_hash.wrapping_add(self.bytehash[b as usize]); if self.history.len() > self.windowsize { let first = *(self.history.front().unwrap()); self.current_hash = self.current_hash.wrapping_sub(self.bytehash[first as usize]); self.history.pop_front(); } self.current_hash } } impl fmt::Display for SumHash { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "mask: {}, windowsize {}", self.mask, self.windowsize) } } #[derive(Debug)] pub struct BuzHash { mask: u32, bytehash_right: [u32; 256], // bytehash to use when adding a byte to the right bytehash_left: [u32; 256], // bytehash to use when removing a byte from the left history: VecDeque<u8>, windowsize: usize, current_hash: u32, } impl BuzHash { pub fn init(mask: u32, windowsize: usize, seed: u64) -> BuzHash { let mut bytehash_right: [u32; 256] = [0; 256]; let mut bytehash_left: [u32; 256] = [0; 256]; let mut rng = ChaChaRng::seed_from_u64(seed); for i in 0..256 { bytehash_right[i] = rng.next_u32(); bytehash_left[i] = bytehash_right[i].rotate_left(windowsize as u32); } assert!(windowsize > 0); BuzHash { mask, bytehash_right, bytehash_left, history: VecDeque::new(), windowsize, current_hash: 0, } } } impl RollingHash for BuzHash { fn clone(&self) -> Box<dyn RollingHash> { Box::new(BuzHash { mask: self.mask, bytehash_right: self.bytehash_right, bytehash_left: self.bytehash_left, history: self.history.clone(), windowsize: self.windowsize, current_hash: self.current_hash, }) } fn current_hash(&self) -> u32 { self.current_hash } fn reset(&mut self) { self.current_hash = 0_u32; self.history.clear(); } fn at_chunk_boundary(&self) -> bool { self.current_hash & self.mask == 0 } fn accumulate_byte(&mut self, b: u8) -> u32 { self.history.push_back(b); self.current_hash = self.current_hash.rotate_left(1) ^ self.bytehash_right[b as usize]; if self.history.len() > self.windowsize { let first = *(self.history.front().unwrap()); self.current_hash ^= self.bytehash_left[first as usize]; self.history.pop_front(); } self.current_hash } } impl fmt::Display for BuzHash { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "mask: {}, windowsize {}", self.mask, self.windowsize) } } #[derive(Debug)] pub struct GearHash { mask: u32, bytehash: [u32; 256], current_hash: u32, } impl GearHash { pub fn init(mask: u32, seed: u64) -> GearHash { let mut rng = ChaChaRng::seed_from_u64(seed); let mut bytehash: [u32; 256] = [0; 256]; #[allow(clippy::needless_range_loop)] for i in 0..256 { bytehash[i] = rng.next_u32(); } GearHash { mask, bytehash, current_hash: 0, } } } impl RollingHash for GearHash { fn clone(&self) -> Box<dyn RollingHash> { Box::new(GearHash { mask: self.mask, bytehash: self.bytehash, current_hash: self.current_hash, }) } #[inline(always)] fn current_hash(&self) -> u32 { self.current_hash } fn reset(&mut self) { self.current_hash = 0_u32; } #[inline(always)] fn accumulate_byte(&mut self, b: u8) -> u32 { self.current_hash = self.current_hash.wrapping_shl(1).wrapping_add(self.bytehash[b as usize]); self.current_hash } #[inline(always)] fn at_chunk_boundary(&self) -> bool { self.current_hash & self.mask == 0 } } impl fmt::Display for GearHash { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "mask: {}, windowsize 32", self.mask) } } const BENCH_INPUT_SEED: u64 = 0xa383d96f7becd17e; const BUF_SIZE: usize = 128; // * 1024 lazy_static! { static ref BENCH_INPUT_BUF: [u8; BUF_SIZE] = { use rand::{RngCore, SeedableRng}; let mut bytes = [0u8; BUF_SIZE]; rand::rngs::StdRng::seed_from_u64(BENCH_INPUT_SEED).fill_bytes(&mut bytes); bytes }; } fn run_rolling_hash(rolling_hash: &mut impl RollingHash) { let len = 0usize; loop { let (_, len) = black_box(rolling_hash.scan_to_chunk_boundary(&BENCH_INPUT_BUF[len..])); if len >= BUF_SIZE { break; } } } fn bench_rolling_hashes(c: &mut Criterion) { let mut sh = SumHash::init(0xFF, 64, 1234); let mut bh = BuzHash::init(0xFF, 128, 1234); let mut gh = GearHash::init(0xFF, 1234); let mut group = c.benchmark_group("Rolling Hashes"); group.bench_function("SumHash", |b| b.iter(|| run_rolling_hash(&mut sh))); group.bench_function("BuzzHash", |b| b.iter(|| run_rolling_hash(&mut bh))); group.bench_function("GearHash", |b| b.iter(|| run_rolling_hash(&mut gh))); group.finish(); } criterion_group!(benches, bench_rolling_hashes); criterion_main!(benches);