fn gen()

in merkledb/src/chunk_iterator.rs [140:216]


    fn gen(&mut self) -> Vec<ChunkInfo> {
        let mut ret: Vec<ChunkInfo> = Vec::with_capacity(1024);
        let mut chunkbuf: Vec<u8> = Vec::with_capacity(self.maximum_chunk);
        let mut cur_chunk_len: usize = 0;
        let mut readbuf: [u8; READ_BUF_SIZE] = [0; READ_BUF_SIZE];
        const MAX_WINDOW_SIZE: usize = 64;
        let mut cur_hasher = self.hash.as_mut_ptr();
        let mut cur_hash_index: usize = 0;
        while let Ok(read_bytes) = fill_buf(&mut self.iter, &mut readbuf) {
            if read_bytes == 0 {
                break;
            }
            let mut cur_pos = 0;
            while cur_pos < read_bytes {
                // every pass through this loop we either
                // 1: create a chunk
                // OR
                // 2: consume the entire buffer
                let chunk_buf_copy_start = cur_pos;
                // skip the minimum chunk size
                // and noting that the hash has a window size of 64
                // so we should be careful to skip only minimum_chunk - 64 - 1
                if cur_chunk_len < self.minimum_chunk - MAX_WINDOW_SIZE {
                    let max_advance =
                        min(self.minimum_chunk - cur_chunk_len - MAX_WINDOW_SIZE - 1, read_bytes - cur_pos);
                    cur_pos += max_advance;
                    cur_chunk_len += max_advance;
                }
                let mut consume_len;
                let mut create_chunk = false;
                // find a chunk boundary after minimum chunk
                if let Some(boundary) = unsafe { (*cur_hasher).next_match(&readbuf[cur_pos..read_bytes], self.mask) } {
                    consume_len = boundary;
                    create_chunk = true;
                } else {
                    consume_len = read_bytes - cur_pos;
                }

                // if we hit maximum chunk we must create a chunk
                if consume_len + cur_chunk_len >= self.maximum_chunk {
                    consume_len = self.maximum_chunk - cur_chunk_len;
                    create_chunk = true;
                }
                cur_chunk_len += consume_len;
                cur_pos += consume_len;
                chunkbuf.extend_from_slice(&readbuf[chunk_buf_copy_start..cur_pos]);
                if create_chunk {
                    // advance the current hash index.
                    // we actually create a chunk when we run out of hashers
                    unsafe { (*cur_hasher).set_hash(0) };
                    cur_hash_index += 1;
                    unsafe {
                        cur_hasher = self.hash.as_mut_ptr().add(cur_hash_index);
                    }
                    if cur_hash_index >= self.hash.len() {
                        ret.push(ChunkInfo {
                            length: chunkbuf.len(),
                            hash: compute_data_hash(&chunkbuf[..]),
                        });

                        // reset chunk buffer state and continue to find the next chunk
                        chunkbuf.clear();
                        cur_hash_index = 0;
                        cur_hasher = self.hash.as_mut_ptr();
                    }
                    cur_chunk_len = 0;
                }
            }
        }
        if !chunkbuf.is_empty() {
            ret.push(ChunkInfo {
                hash: compute_data_hash(&chunkbuf[..]),
                length: chunkbuf.len(),
            });
        }
        ret
    }