in arrow-select/src/take.rs [465:555]
fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
array: &GenericByteArray<T>,
indices: &PrimitiveArray<IndexType>,
) -> Result<GenericByteArray<T>, ArrowError> {
let mut offsets = Vec::with_capacity(indices.len() + 1);
offsets.push(T::Offset::default());
let input_offsets = array.value_offsets();
let mut capacity = 0;
let nulls = take_nulls(array.nulls(), indices);
let (offsets, values) = if array.null_count() == 0 && indices.null_count() == 0 {
offsets.extend(indices.values().iter().map(|index| {
let index = index.as_usize();
capacity += input_offsets[index + 1].as_usize() - input_offsets[index].as_usize();
T::Offset::from_usize(capacity).expect("overflow")
}));
let mut values = Vec::with_capacity(capacity);
for index in indices.values() {
values.extend_from_slice(array.value(index.as_usize()).as_ref());
}
(offsets, values)
} else if indices.null_count() == 0 {
offsets.extend(indices.values().iter().map(|index| {
let index = index.as_usize();
if array.is_valid(index) {
capacity += input_offsets[index + 1].as_usize() - input_offsets[index].as_usize();
}
T::Offset::from_usize(capacity).expect("overflow")
}));
let mut values = Vec::with_capacity(capacity);
for index in indices.values() {
let index = index.as_usize();
if array.is_valid(index) {
values.extend_from_slice(array.value(index).as_ref());
}
}
(offsets, values)
} else if array.null_count() == 0 {
offsets.extend(indices.values().iter().enumerate().map(|(i, index)| {
let index = index.as_usize();
if indices.is_valid(i) {
capacity += input_offsets[index + 1].as_usize() - input_offsets[index].as_usize();
}
T::Offset::from_usize(capacity).expect("overflow")
}));
let mut values = Vec::with_capacity(capacity);
for (i, index) in indices.values().iter().enumerate() {
if indices.is_valid(i) {
values.extend_from_slice(array.value(index.as_usize()).as_ref());
}
}
(offsets, values)
} else {
let nulls = nulls.as_ref().unwrap();
offsets.extend(indices.values().iter().enumerate().map(|(i, index)| {
let index = index.as_usize();
if nulls.is_valid(i) {
capacity += input_offsets[index + 1].as_usize() - input_offsets[index].as_usize();
}
T::Offset::from_usize(capacity).expect("overflow")
}));
let mut values = Vec::with_capacity(capacity);
for (i, index) in indices.values().iter().enumerate() {
// check index is valid before using index. The value in
// NULL index slots may not be within bounds of array
let index = index.as_usize();
if nulls.is_valid(i) {
values.extend_from_slice(array.value(index).as_ref());
}
}
(offsets, values)
};
T::Offset::from_usize(values.len()).ok_or(ArrowError::ComputeError(format!(
"Offset overflow for {}BinaryArray: {}",
T::Offset::PREFIX,
values.len()
)))?;
let array = unsafe {
let offsets = OffsetBuffer::new_unchecked(offsets.into());
GenericByteArray::<T>::new_unchecked(offsets, values.into(), nulls)
};
Ok(array)
}