in src/nanoarrow/array.c [870:1009]
static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view,
struct ArrowError* error) {
// Perform minimal validation. This will validate or assign
// buffer sizes as long as buffer access is not required.
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error));
// Calculate buffer sizes or child lengths that require accessing the offsets
// buffer. Where appropriate, validate that the first offset is >= 0.
// If a buffer size is marked as unknown, assign it; otherwise, validate it.
int64_t offset_plus_length = array_view->offset + array_view->length;
int64_t first_offset;
int64_t last_offset;
switch (array_view->storage_type) {
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_BINARY:
if (array_view->buffer_views[1].size_bytes != 0) {
first_offset = array_view->buffer_views[1].data.as_int32[0];
if (first_offset < 0) {
ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
(long)first_offset);
return EINVAL;
}
last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length];
// If the data buffer size is unknown, assign it; otherwise, check it
if (array_view->buffer_views[2].size_bytes == -1) {
array_view->buffer_views[2].size_bytes = last_offset;
} else if (array_view->buffer_views[2].size_bytes < last_offset) {
ArrowErrorSet(error,
"Expected %s array buffer 2 to have size >= %ld bytes but found "
"buffer with %ld bytes",
ArrowTypeString(array_view->storage_type), (long)last_offset,
(long)array_view->buffer_views[2].size_bytes);
return EINVAL;
}
}
break;
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_LARGE_BINARY:
if (array_view->buffer_views[1].size_bytes != 0) {
first_offset = array_view->buffer_views[1].data.as_int64[0];
if (first_offset < 0) {
ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
(long)first_offset);
return EINVAL;
}
last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length];
// If the data buffer size is unknown, assign it; otherwise, check it
if (array_view->buffer_views[2].size_bytes == -1) {
array_view->buffer_views[2].size_bytes = last_offset;
} else if (array_view->buffer_views[2].size_bytes < last_offset) {
ArrowErrorSet(error,
"Expected %s array buffer 2 to have size >= %ld bytes but found "
"buffer with %ld bytes",
ArrowTypeString(array_view->storage_type), (long)last_offset,
(long)array_view->buffer_views[2].size_bytes);
return EINVAL;
}
}
break;
case NANOARROW_TYPE_STRUCT:
for (int64_t i = 0; i < array_view->n_children; i++) {
if (array_view->children[i]->length < offset_plus_length) {
ArrowErrorSet(
error,
"Expected struct child %d to have length >= %ld but found child with "
"length %ld",
(int)(i + 1), (long)offset_plus_length,
(long)array_view->children[i]->length);
return EINVAL;
}
}
break;
case NANOARROW_TYPE_LIST:
case NANOARROW_TYPE_MAP:
if (array_view->buffer_views[1].size_bytes != 0) {
first_offset = array_view->buffer_views[1].data.as_int32[0];
if (first_offset < 0) {
ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
(long)first_offset);
return EINVAL;
}
last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length];
if (array_view->children[0]->length < last_offset) {
ArrowErrorSet(
error,
"Expected child of %s array to have length >= %ld but found array with "
"length %ld",
ArrowTypeString(array_view->storage_type), (long)last_offset,
(long)array_view->children[0]->length);
return EINVAL;
}
}
break;
case NANOARROW_TYPE_LARGE_LIST:
if (array_view->buffer_views[1].size_bytes != 0) {
first_offset = array_view->buffer_views[1].data.as_int64[0];
if (first_offset < 0) {
ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
(long)first_offset);
return EINVAL;
}
last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length];
if (array_view->children[0]->length < last_offset) {
ArrowErrorSet(
error,
"Expected child of large list array to have length >= %ld but found array "
"with length %ld",
(long)last_offset, (long)array_view->children[0]->length);
return EINVAL;
}
}
break;
default:
break;
}
// Recurse for children
for (int64_t i = 0; i < array_view->n_children; i++) {
NANOARROW_RETURN_NOT_OK(
ArrowArrayViewValidateDefault(array_view->children[i], error));
}
// Recurse for dictionary
if (array_view->dictionary != NULL) {
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view->dictionary, error));
}
return NANOARROW_OK;
}