in merkledb/src/chunk_iterator.rs [140:216]
fn gen(&mut self) -> Vec<ChunkInfo> {
let mut ret: Vec<ChunkInfo> = Vec::with_capacity(1024);
let mut chunkbuf: Vec<u8> = Vec::with_capacity(self.maximum_chunk);
let mut cur_chunk_len: usize = 0;
let mut readbuf: [u8; READ_BUF_SIZE] = [0; READ_BUF_SIZE];
const MAX_WINDOW_SIZE: usize = 64;
let mut cur_hasher = self.hash.as_mut_ptr();
let mut cur_hash_index: usize = 0;
while let Ok(read_bytes) = fill_buf(&mut self.iter, &mut readbuf) {
if read_bytes == 0 {
break;
}
let mut cur_pos = 0;
while cur_pos < read_bytes {
// every pass through this loop we either
// 1: create a chunk
// OR
// 2: consume the entire buffer
let chunk_buf_copy_start = cur_pos;
// skip the minimum chunk size
// and noting that the hash has a window size of 64
// so we should be careful to skip only minimum_chunk - 64 - 1
if cur_chunk_len < self.minimum_chunk - MAX_WINDOW_SIZE {
let max_advance =
min(self.minimum_chunk - cur_chunk_len - MAX_WINDOW_SIZE - 1, read_bytes - cur_pos);
cur_pos += max_advance;
cur_chunk_len += max_advance;
}
let mut consume_len;
let mut create_chunk = false;
// find a chunk boundary after minimum chunk
if let Some(boundary) = unsafe { (*cur_hasher).next_match(&readbuf[cur_pos..read_bytes], self.mask) } {
consume_len = boundary;
create_chunk = true;
} else {
consume_len = read_bytes - cur_pos;
}
// if we hit maximum chunk we must create a chunk
if consume_len + cur_chunk_len >= self.maximum_chunk {
consume_len = self.maximum_chunk - cur_chunk_len;
create_chunk = true;
}
cur_chunk_len += consume_len;
cur_pos += consume_len;
chunkbuf.extend_from_slice(&readbuf[chunk_buf_copy_start..cur_pos]);
if create_chunk {
// advance the current hash index.
// we actually create a chunk when we run out of hashers
unsafe { (*cur_hasher).set_hash(0) };
cur_hash_index += 1;
unsafe {
cur_hasher = self.hash.as_mut_ptr().add(cur_hash_index);
}
if cur_hash_index >= self.hash.len() {
ret.push(ChunkInfo {
length: chunkbuf.len(),
hash: compute_data_hash(&chunkbuf[..]),
});
// reset chunk buffer state and continue to find the next chunk
chunkbuf.clear();
cur_hash_index = 0;
cur_hasher = self.hash.as_mut_ptr();
}
cur_chunk_len = 0;
}
}
}
if !chunkbuf.is_empty() {
ret.push(ChunkInfo {
hash: compute_data_hash(&chunkbuf[..]),
length: chunkbuf.len(),
});
}
ret
}