in src/nanoarrow/common/array.c [823:1046]
static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
struct ArrowError* error) {
if (array_view->length < 0) {
ArrowErrorSet(error, "Expected length >= 0 but found length %" PRId64,
array_view->length);
return EINVAL;
}
if (array_view->offset < 0) {
ArrowErrorSet(error, "Expected offset >= 0 but found offset %" PRId64,
array_view->offset);
return EINVAL;
}
// Ensure that offset + length fits within an int64 before a possible overflow
if ((uint64_t)array_view->offset + (uint64_t)array_view->length > (uint64_t)INT64_MAX) {
ArrowErrorSet(error, "Offset + length is > INT64_MAX");
return EINVAL;
}
// Calculate buffer sizes that do not require buffer access. If marked as
// unknown, assign the buffer size; otherwise, validate it.
int64_t offset_plus_length = array_view->offset + array_view->length;
// Only loop over the first two buffers because the size of the third buffer
// is always data dependent for all current Arrow types.
for (int i = 0; i < 2; i++) {
int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
// Initialize with a value that will cause an error if accidentally used uninitialized
// Need to suppress the clang-tidy warning because gcc warns for possible use
int64_t min_buffer_size_bytes = // NOLINT(clang-analyzer-deadcode.DeadStores)
array_view->buffer_views[i].size_bytes + 1;
switch (array_view->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_VALIDITY:
if (array_view->null_count == 0 && array_view->buffer_views[i].size_bytes == 0) {
continue;
}
min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length);
break;
case NANOARROW_BUFFER_TYPE_SIZE:
min_buffer_size_bytes = element_size_bytes * offset_plus_length;
break;
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
// Probably don't want/need to rely on the producer to have allocated an
// offsets buffer of length 1 for a zero-size array
min_buffer_size_bytes =
(offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1);
break;
case NANOARROW_BUFFER_TYPE_VIEW_OFFSET:
min_buffer_size_bytes =
(offset_plus_length != 0) * element_size_bytes * offset_plus_length;
break;
case NANOARROW_BUFFER_TYPE_DATA:
min_buffer_size_bytes =
_ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] *
offset_plus_length) /
8;
break;
case NANOARROW_BUFFER_TYPE_TYPE_ID:
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
min_buffer_size_bytes = element_size_bytes * offset_plus_length;
break;
case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
case NANOARROW_BUFFER_TYPE_NONE:
continue;
}
// Assign or validate buffer size
if (array_view->buffer_views[i].size_bytes == -1) {
array_view->buffer_views[i].size_bytes = min_buffer_size_bytes;
} else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes) {
ArrowErrorSet(error,
"Expected %s array buffer %d to have size >= %" PRId64
" bytes but found "
"buffer with %" PRId64 " bytes",
ArrowTypeString(array_view->storage_type), i, min_buffer_size_bytes,
array_view->buffer_views[i].size_bytes);
return EINVAL;
}
}
// For list, fixed-size list and map views, we can validate the number of children
switch (array_view->storage_type) {
case NANOARROW_TYPE_LIST:
case NANOARROW_TYPE_LARGE_LIST:
case NANOARROW_TYPE_FIXED_SIZE_LIST:
case NANOARROW_TYPE_MAP:
case NANOARROW_TYPE_LIST_VIEW:
case NANOARROW_TYPE_LARGE_LIST_VIEW:
if (array_view->n_children != 1) {
ArrowErrorSet(error,
"Expected 1 child of %s array but found %" PRId64 " child arrays",
ArrowTypeString(array_view->storage_type), array_view->n_children);
return EINVAL;
}
break;
case NANOARROW_TYPE_RUN_END_ENCODED:
if (array_view->n_children != 2) {
ArrowErrorSet(
error, "Expected 2 children for %s array but found %" PRId64 " child arrays",
ArrowTypeString(array_view->storage_type), array_view->n_children);
return EINVAL;
}
break;
default:
break;
}
// For struct, the sparse union, and the fixed-size list views, we can validate child
// lengths.
int64_t child_min_length;
switch (array_view->storage_type) {
case NANOARROW_TYPE_SPARSE_UNION:
case NANOARROW_TYPE_STRUCT:
child_min_length = (array_view->offset + array_view->length);
for (int64_t i = 0; i < array_view->n_children; i++) {
if (array_view->children[i]->length < child_min_length) {
ArrowErrorSet(error,
"Expected struct child %" PRId64 " to have length >= %" PRId64
" but found child with "
"length %" PRId64,
i + 1, child_min_length, array_view->children[i]->length);
return EINVAL;
}
}
break;
case NANOARROW_TYPE_FIXED_SIZE_LIST:
child_min_length = (array_view->offset + array_view->length) *
array_view->layout.child_size_elements;
if (array_view->children[0]->length < child_min_length) {
ArrowErrorSet(error,
"Expected child of fixed_size_list array to have length >= %" PRId64
" but "
"found array with length %" PRId64,
child_min_length, array_view->children[0]->length);
return EINVAL;
}
break;
case NANOARROW_TYPE_RUN_END_ENCODED: {
if (array_view->n_children != 2) {
ArrowErrorSet(error,
"Expected 2 children for run-end encoded array but found %" PRId64,
array_view->n_children);
return EINVAL;
}
struct ArrowArrayView* run_ends_view = array_view->children[0];
struct ArrowArrayView* values_view = array_view->children[1];
int64_t max_length;
switch (run_ends_view->storage_type) {
case NANOARROW_TYPE_INT16:
max_length = INT16_MAX;
break;
case NANOARROW_TYPE_INT32:
max_length = INT32_MAX;
break;
case NANOARROW_TYPE_INT64:
max_length = INT64_MAX;
break;
default:
ArrowErrorSet(
error,
"Run-end encoded array only supports INT16, INT32 or INT64 run-ends "
"but found run-ends type %s",
ArrowTypeString(run_ends_view->storage_type));
return EINVAL;
}
// There is already a check above that offset_plus_length < INT64_MAX
if (offset_plus_length > max_length) {
ArrowErrorSet(error,
"Offset + length of a run-end encoded array must fit in a value"
" of the run end type %s but is %" PRId64 " + %" PRId64,
ArrowTypeString(run_ends_view->storage_type), array_view->offset,
array_view->length);
return EINVAL;
}
if (run_ends_view->length > values_view->length) {
ArrowErrorSet(error,
"Length of run_ends is greater than the length of values: %" PRId64
" > %" PRId64,
run_ends_view->length, values_view->length);
return EINVAL;
}
if (run_ends_view->length == 0 && values_view->length != 0) {
ArrowErrorSet(error,
"Run-end encoded array has zero length %" PRId64
", but values array has "
"non-zero length",
values_view->length);
return EINVAL;
}
if (run_ends_view->null_count != 0) {
ArrowErrorSet(error, "Null count must be 0 for run ends array, but is %" PRId64,
run_ends_view->null_count);
return EINVAL;
}
break;
}
default:
break;
}
// Recurse for children
for (int64_t i = 0; i < array_view->n_children; i++) {
NANOARROW_RETURN_NOT_OK(
ArrowArrayViewValidateMinimal(array_view->children[i], error));
}
// Recurse for dictionary
if (array_view->dictionary != NULL) {
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view->dictionary, error));
}
return NANOARROW_OK;
}