in arrow-select/src/take.rs [84:212]
fn take_impl<IndexType: ArrowPrimitiveType>(
values: &dyn Array,
indices: &PrimitiveArray<IndexType>,
options: Option<TakeOptions>,
) -> Result<ArrayRef, ArrowError> {
let options = options.unwrap_or_default();
if options.check_bounds {
let len = values.len();
if indices.null_count() > 0 {
indices.iter().flatten().try_for_each(|index| {
let ix = index.to_usize().ok_or_else(|| {
ArrowError::ComputeError("Cast to usize failed".to_string())
})?;
if ix >= len {
return Err(ArrowError::ComputeError(
format!("Array index out of bounds, cannot get item at index {ix} from {len} entries"))
);
}
Ok(())
})?;
} else {
indices.values().iter().try_for_each(|index| {
let ix = index.to_usize().ok_or_else(|| {
ArrowError::ComputeError("Cast to usize failed".to_string())
})?;
if ix >= len {
return Err(ArrowError::ComputeError(
format!("Array index out of bounds, cannot get item at index {ix} from {len} entries"))
);
}
Ok(())
})?
}
}
downcast_primitive_array! {
values => Ok(Arc::new(take_primitive(values, indices)?)),
DataType::Boolean => {
let values = values.as_any().downcast_ref::<BooleanArray>().unwrap();
Ok(Arc::new(take_boolean(values, indices)))
}
DataType::Utf8 => {
Ok(Arc::new(take_bytes(values.as_string::<i32>(), indices)?))
}
DataType::LargeUtf8 => {
Ok(Arc::new(take_bytes(values.as_string::<i64>(), indices)?))
}
DataType::List(_) => {
Ok(Arc::new(take_list::<_, Int32Type>(values.as_list(), indices)?))
}
DataType::LargeList(_) => {
Ok(Arc::new(take_list::<_, Int64Type>(values.as_list(), indices)?))
}
DataType::FixedSizeList(_, length) => {
let values = values
.as_any()
.downcast_ref::<FixedSizeListArray>()
.unwrap();
Ok(Arc::new(take_fixed_size_list(
values,
indices,
*length as u32,
)?))
}
DataType::Map(_, _) => {
let list_arr = ListArray::from(values.as_map().clone());
let list_data = take_list::<_, Int32Type>(&list_arr, indices)?;
let builder = list_data.into_data().into_builder().data_type(values.data_type().clone());
Ok(Arc::new(MapArray::from(unsafe { builder.build_unchecked() })))
}
DataType::Struct(fields) => {
let array: &StructArray = values.as_struct();
let arrays = array
.columns()
.iter()
.map(|a| take_impl(a.as_ref(), indices, Some(options.clone())))
.collect::<Result<Vec<ArrayRef>, _>>()?;
let fields: Vec<(FieldRef, ArrayRef)> =
fields.iter().cloned().zip(arrays).collect();
// Create the null bit buffer.
let is_valid: Buffer = indices
.iter()
.map(|index| {
if let Some(index) = index {
array.is_valid(index.to_usize().unwrap())
} else {
false
}
})
.collect();
Ok(Arc::new(StructArray::from((fields, is_valid))) as ArrayRef)
}
DataType::Dictionary(_, _) => downcast_dictionary_array! {
values => Ok(Arc::new(take_dict(values, indices)?)),
t => unimplemented!("Take not supported for dictionary type {:?}", t)
}
DataType::RunEndEncoded(_, _) => downcast_run_array! {
values => Ok(Arc::new(take_run(values, indices)?)),
t => unimplemented!("Take not supported for run type {:?}", t)
}
DataType::Binary => {
Ok(Arc::new(take_bytes(values.as_binary::<i32>(), indices)?))
}
DataType::LargeBinary => {
Ok(Arc::new(take_bytes(values.as_binary::<i64>(), indices)?))
}
DataType::FixedSizeBinary(size) => {
let values = values
.as_any()
.downcast_ref::<FixedSizeBinaryArray>()
.unwrap();
Ok(Arc::new(take_fixed_size_binary(values, indices, *size)?))
}
DataType::Null => {
// Take applied to a null array produces a null array.
if values.len() >= indices.len() {
// If the existing null array is as big as the indices, we can use a slice of it
// to avoid allocating a new null array.
Ok(values.slice(0, indices.len()))
} else {
// If the existing null array isn't big enough, create a new one.
Ok(new_null_array(&DataType::Null, indices.len()))
}
}
t => unimplemented!("Take not supported for data type {:?}", t)
}
}