static int ArrowArrayViewValidateMinimal()

in src/nanoarrow/common/array.c [823:1046]


static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
                                         struct ArrowError* error) {
  if (array_view->length < 0) {
    ArrowErrorSet(error, "Expected length >= 0 but found length %" PRId64,
                  array_view->length);
    return EINVAL;
  }

  if (array_view->offset < 0) {
    ArrowErrorSet(error, "Expected offset >= 0 but found offset %" PRId64,
                  array_view->offset);
    return EINVAL;
  }

  // Ensure that offset + length fits within an int64 before a possible overflow
  if ((uint64_t)array_view->offset + (uint64_t)array_view->length > (uint64_t)INT64_MAX) {
    ArrowErrorSet(error, "Offset + length is > INT64_MAX");
    return EINVAL;
  }

  // Calculate buffer sizes that do not require buffer access. If marked as
  // unknown, assign the buffer size; otherwise, validate it.
  int64_t offset_plus_length = array_view->offset + array_view->length;

  // Only loop over the first two buffers because the size of the third buffer
  // is always data dependent for all current Arrow types.
  for (int i = 0; i < 2; i++) {
    int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
    // Initialize with a value that will cause an error if accidentally used uninitialized
    // Need to suppress the clang-tidy warning because gcc warns for possible use
    int64_t min_buffer_size_bytes =  // NOLINT(clang-analyzer-deadcode.DeadStores)
        array_view->buffer_views[i].size_bytes + 1;

    switch (array_view->layout.buffer_type[i]) {
      case NANOARROW_BUFFER_TYPE_VALIDITY:
        if (array_view->null_count == 0 && array_view->buffer_views[i].size_bytes == 0) {
          continue;
        }

        min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length);
        break;
      case NANOARROW_BUFFER_TYPE_SIZE:
        min_buffer_size_bytes = element_size_bytes * offset_plus_length;
        break;
      case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
        // Probably don't want/need to rely on the producer to have allocated an
        // offsets buffer of length 1 for a zero-size array
        min_buffer_size_bytes =
            (offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1);
        break;
      case NANOARROW_BUFFER_TYPE_VIEW_OFFSET:
        min_buffer_size_bytes =
            (offset_plus_length != 0) * element_size_bytes * offset_plus_length;
        break;
      case NANOARROW_BUFFER_TYPE_DATA:
        min_buffer_size_bytes =
            _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] *
                                       offset_plus_length) /
            8;
        break;
      case NANOARROW_BUFFER_TYPE_TYPE_ID:
      case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
        min_buffer_size_bytes = element_size_bytes * offset_plus_length;
        break;
      case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
      case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
      case NANOARROW_BUFFER_TYPE_NONE:
        continue;
    }

    // Assign or validate buffer size
    if (array_view->buffer_views[i].size_bytes == -1) {
      array_view->buffer_views[i].size_bytes = min_buffer_size_bytes;
    } else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes) {
      ArrowErrorSet(error,
                    "Expected %s array buffer %d to have size >= %" PRId64
                    " bytes but found "
                    "buffer with %" PRId64 " bytes",
                    ArrowTypeString(array_view->storage_type), i, min_buffer_size_bytes,
                    array_view->buffer_views[i].size_bytes);
      return EINVAL;
    }
  }

  // For list, fixed-size list and map views, we can validate the number of children
  switch (array_view->storage_type) {
    case NANOARROW_TYPE_LIST:
    case NANOARROW_TYPE_LARGE_LIST:
    case NANOARROW_TYPE_FIXED_SIZE_LIST:
    case NANOARROW_TYPE_MAP:
    case NANOARROW_TYPE_LIST_VIEW:
    case NANOARROW_TYPE_LARGE_LIST_VIEW:
      if (array_view->n_children != 1) {
        ArrowErrorSet(error,
                      "Expected 1 child of %s array but found %" PRId64 " child arrays",
                      ArrowTypeString(array_view->storage_type), array_view->n_children);
        return EINVAL;
      }
      break;
    case NANOARROW_TYPE_RUN_END_ENCODED:
      if (array_view->n_children != 2) {
        ArrowErrorSet(
            error, "Expected 2 children for %s array but found %" PRId64 " child arrays",
            ArrowTypeString(array_view->storage_type), array_view->n_children);
        return EINVAL;
      }
      break;
    default:
      break;
  }

  // For struct, the sparse union, and the fixed-size list views, we can validate child
  // lengths.
  int64_t child_min_length;
  switch (array_view->storage_type) {
    case NANOARROW_TYPE_SPARSE_UNION:
    case NANOARROW_TYPE_STRUCT:
      child_min_length = (array_view->offset + array_view->length);
      for (int64_t i = 0; i < array_view->n_children; i++) {
        if (array_view->children[i]->length < child_min_length) {
          ArrowErrorSet(error,
                        "Expected struct child %" PRId64 " to have length >= %" PRId64
                        " but found child with "
                        "length %" PRId64,
                        i + 1, child_min_length, array_view->children[i]->length);
          return EINVAL;
        }
      }
      break;

    case NANOARROW_TYPE_FIXED_SIZE_LIST:
      child_min_length = (array_view->offset + array_view->length) *
                         array_view->layout.child_size_elements;
      if (array_view->children[0]->length < child_min_length) {
        ArrowErrorSet(error,
                      "Expected child of fixed_size_list array to have length >= %" PRId64
                      " but "
                      "found array with length %" PRId64,
                      child_min_length, array_view->children[0]->length);
        return EINVAL;
      }
      break;

    case NANOARROW_TYPE_RUN_END_ENCODED: {
      if (array_view->n_children != 2) {
        ArrowErrorSet(error,
                      "Expected 2 children for run-end encoded array but found %" PRId64,
                      array_view->n_children);
        return EINVAL;
      }
      struct ArrowArrayView* run_ends_view = array_view->children[0];
      struct ArrowArrayView* values_view = array_view->children[1];
      int64_t max_length;
      switch (run_ends_view->storage_type) {
        case NANOARROW_TYPE_INT16:
          max_length = INT16_MAX;
          break;
        case NANOARROW_TYPE_INT32:
          max_length = INT32_MAX;
          break;
        case NANOARROW_TYPE_INT64:
          max_length = INT64_MAX;
          break;
        default:
          ArrowErrorSet(
              error,
              "Run-end encoded array only supports INT16, INT32 or INT64 run-ends "
              "but found run-ends type %s",
              ArrowTypeString(run_ends_view->storage_type));
          return EINVAL;
      }

      // There is already a check above that offset_plus_length < INT64_MAX
      if (offset_plus_length > max_length) {
        ArrowErrorSet(error,
                      "Offset + length of a run-end encoded array must fit in a value"
                      " of the run end type %s but is %" PRId64 " + %" PRId64,
                      ArrowTypeString(run_ends_view->storage_type), array_view->offset,
                      array_view->length);
        return EINVAL;
      }

      if (run_ends_view->length > values_view->length) {
        ArrowErrorSet(error,
                      "Length of run_ends is greater than the length of values: %" PRId64
                      " > %" PRId64,
                      run_ends_view->length, values_view->length);
        return EINVAL;
      }

      if (run_ends_view->length == 0 && values_view->length != 0) {
        ArrowErrorSet(error,
                      "Run-end encoded array has zero length %" PRId64
                      ", but values array has "
                      "non-zero length",
                      values_view->length);
        return EINVAL;
      }

      if (run_ends_view->null_count != 0) {
        ArrowErrorSet(error, "Null count must be 0 for run ends array, but is %" PRId64,
                      run_ends_view->null_count);
        return EINVAL;
      }
      break;
    }

    default:
      break;
  }

  // Recurse for children
  for (int64_t i = 0; i < array_view->n_children; i++) {
    NANOARROW_RETURN_NOT_OK(
        ArrowArrayViewValidateMinimal(array_view->children[i], error));
  }

  // Recurse for dictionary
  if (array_view->dictionary != NULL) {
    NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view->dictionary, error));
  }

  return NANOARROW_OK;
}