crates/iceberg/src/delete_vector.rs (50 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. use roaring::bitmap::Iter; use roaring::treemap::BitmapIter; use roaring::RoaringTreemap; #[allow(unused)] pub struct DeleteVector { inner: RoaringTreemap, } impl DeleteVector { #[allow(unused)] pub(crate) fn new(roaring_treemap: RoaringTreemap) -> DeleteVector { DeleteVector { inner: roaring_treemap, } } pub fn iter(&self) -> DeleteVectorIterator { let outer = self.inner.bitmaps(); DeleteVectorIterator { outer, inner: None } } } // Ideally, we'd just wrap `roaring::RoaringTreemap`'s iterator, `roaring::treemap::Iter` here. // But right now, it does not have a corresponding implementation of `roaring::bitmap::Iter::advance_to`, // which is very handy in ArrowReader::build_deletes_row_selection. // There is a PR open on roaring to add this (https://github.com/RoaringBitmap/roaring-rs/pull/314) // and if that gets merged then we can simplify `DeleteVectorIterator` here, refactoring `advance_to` // to just a wrapper around the underlying iterator's method. pub struct DeleteVectorIterator<'a> { // NB: `BitMapIter` was only exposed publicly in https://github.com/RoaringBitmap/roaring-rs/pull/316 // which is not yet released. As a consequence our Cargo.toml temporarily uses a git reference for // the roaring dependency. outer: BitmapIter<'a>, inner: Option<DeleteVectorIteratorInner<'a>>, } struct DeleteVectorIteratorInner<'a> { high_bits: u32, bitmap_iter: Iter<'a>, } impl Iterator for DeleteVectorIterator<'_> { type Item = u64; fn next(&mut self) -> Option<Self::Item> { if let Some(ref mut inner) = &mut self.inner { if let Some(inner_next) = inner.bitmap_iter.next() { return Some(u64::from(inner.high_bits) << 32 | u64::from(inner_next)); } } if let Some((high_bits, next_bitmap)) = self.outer.next() { self.inner = Some(DeleteVectorIteratorInner { high_bits, bitmap_iter: next_bitmap.iter(), }) } else { return None; } self.next() } } impl DeleteVectorIterator<'_> { pub fn advance_to(&mut self, pos: u64) { let hi = (pos >> 32) as u32; let lo = pos as u32; let Some(ref mut inner) = self.inner else { return; }; while inner.high_bits < hi { let Some((next_hi, next_bitmap)) = self.outer.next() else { return; }; *inner = DeleteVectorIteratorInner { high_bits: next_hi, bitmap_iter: next_bitmap.iter(), } } inner.bitmap_iter.advance_to(lo); } }