r/inst/include/nanoarrow/r.h (216 lines of code) (raw):
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef NANOARROW_R_H_INCLUDED
#define NANOARROW_R_H_INCLUDED
#include <R.h>
#include <Rinternals.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/// \defgroup nanoarrow-r Utilities for Arrow R extensions
///
/// EXPERIMENTAL: The interface and lifecycle semantics described in this header
/// should be considered experimental and may change in a future version based on
/// user feedback.
///
/// In the nanoarrow R package, an external pointer to an ArrowSchema, ArrowArray, or
/// ArrowArrayStream carries the class "nanoarrow_schema", "nanoarrow_array", or
/// "nanoarrow_array_stream" (respectively). The pointer must point to valid memory
/// or be NULL until the R external pointer object is finalized.
///
/// nanoarrow_schema_owning_xptr(), nanoarrow_array_owning_xptr(), and
/// nanoarrow_array_stream_owning_xptr() initialize such an external pointer using
/// malloc() and a NULL initial release() callback such that it can be distinguished from
/// a pointer to an initialized value according to the Arrow C Data/Stream interface
/// documentation. This structure is intended to have a valid value initialized into it
/// using ArrowXXXMove() or by passing the pointer to a suitable exporting function.
///
/// External pointers allocated by nanoarrow_xxxx_owning_xptr() register a finalizer
/// that will call the release() callback when its value is non-NULL and points to
/// a structure whose release() callback is also non-NULL. External pointers may also
/// manage lifecycle by declaring a strong reference to a single R object via
/// R_SetExternalPtrProtected(); however, when passing the address of an R external
/// pointer to a non-R library, the ownership of the structure must *not* have such SEXP
/// dependencies. The nanoarrow R package can wrap such an SEXP dependency into a
/// self-contained thread-safe release callback via nanoarrow_pointer_export() that
/// manages the SEXP dependency using a preserve/release mechanism similar to
/// R_PreserveObject()/ R_ReleaseObject().
///
/// The "tag" of an external pointer to an ArrowArray must be R_NilValue or an external
/// pointer to an ArrowSchema that may be used to interpret the pointed-to ArrowArray. The
/// "tag" of a nanoarrow external pointer to an ArrowSchema or ArrowArrayStream is
/// reserved for future use and must be R_NilValue.
///
/// @{
// Extra guard for versions of Arrow without the canonical guard
#ifndef ARROW_FLAG_DICTIONARY_ORDERED
#ifndef ARROW_C_DATA_INTERFACE
#define ARROW_C_DATA_INTERFACE
#include <stdint.h>
#define ARROW_FLAG_DICTIONARY_ORDERED 1
#define ARROW_FLAG_NULLABLE 2
#define ARROW_FLAG_MAP_KEYS_SORTED 4
struct ArrowSchema {
// Array type description
const char* format;
const char* name;
const char* metadata;
int64_t flags;
int64_t n_children;
struct ArrowSchema** children;
struct ArrowSchema* dictionary;
// Release callback
void (*release)(struct ArrowSchema*);
// Opaque producer-specific data
void* private_data;
};
struct ArrowArray {
// Array data description
int64_t length;
int64_t null_count;
int64_t offset;
int64_t n_buffers;
int64_t n_children;
const void** buffers;
struct ArrowArray** children;
struct ArrowArray* dictionary;
// Release callback
void (*release)(struct ArrowArray*);
// Opaque producer-specific data
void* private_data;
};
#endif // ARROW_C_DATA_INTERFACE
#ifndef ARROW_C_STREAM_INTERFACE
#define ARROW_C_STREAM_INTERFACE
struct ArrowArrayStream {
// Callback to get the stream type
// (will be the same for all arrays in the stream).
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowSchema must be released independently from the stream.
int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
// Callback to get the next array
// (if no error and the array is released, the stream has ended)
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowArray must be released independently from the stream.
int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
// Callback to get optional detailed error information.
// This must only be called if the last stream operation failed
// with a non-0 return code.
//
// Return value: pointer to a null-terminated character array describing
// the last error, or NULL if no description is available.
//
// The returned pointer is only valid until the next operation on this stream
// (including release).
const char* (*get_last_error)(struct ArrowArrayStream*);
// Release callback: release the stream's own resources.
// Note that arrays returned by `get_next` must be individually released.
void (*release)(struct ArrowArrayStream*);
// Opaque producer-specific data
void* private_data;
};
#endif // ARROW_C_STREAM_INTERFACE
#endif // ARROW_FLAG_DICTIONARY_ORDERED
/// \brief Allocate an external pointer to an ArrowSchema
///
/// Allocate an external pointer to an uninitialized ArrowSchema with a finalizer that
/// ensures that any non-null release callback in a pointed-to structure will be called
/// when the external pointer is garbage collected.
static inline SEXP nanoarrow_schema_owning_xptr(void);
/// \brief Allocate an external pointer to an ArrowArray
///
/// Allocate an external pointer to an uninitialized ArrowArray with a finalizer that
/// ensures that any non-null release callback in a pointed-to structure will be called
/// when the external pointer is garbage collected.
static inline SEXP nanoarrow_array_owning_xptr(void);
/// \brief Allocate an external pointer to an ArrowArrayStream
///
/// Allocate an external pointer to an uninitialized ArrowArrayStream with a finalizer
/// that ensures that any non-null release callback in a pointed-to structure will be
/// called when the external pointer is garbage collected.
static inline SEXP nanoarrow_array_stream_owning_xptr(void);
/// \brief Ensure an input SEXP points to an initialized ArrowSchema
///
/// This function will always return an ArrowSchema pointer that can be safely
/// consumed or raise an error via Rf_error(). This is intended to be used to
/// sanitize an *input* ArrowSchema.
static inline struct ArrowSchema* nanoarrow_schema_from_xptr(SEXP schema_xptr);
/// \brief Ensure an output SEXP points to an uninitialized ArrowSchema
///
/// This function will always return an ArrowSchema pointer that can be safely
/// used as an output argument or raise an error via Rf_error(). This is intended
/// to be used to sanitize an *output* ArrowSchema allocated from R or elsewhere.
static inline struct ArrowSchema* nanoarrow_output_schema_from_xptr(SEXP schema_xptr);
/// \brief Ensure an input SEXP points to an initialized ArrowArray
///
/// This function will always return an ArrowArray pointer that can be safely
/// consumed or raise an error via Rf_error(). This is intended to be used to
/// sanitize an *input* ArrowArray.
static inline struct ArrowArray* nanoarrow_array_from_xptr(SEXP array_xptr);
/// \brief Ensure an output SEXP points to an uninitialized ArrowArray
///
/// This function will always return an ArrowArray pointer that can be safely
/// used as an output argument or raise an error via Rf_error(). This is intended
/// to be used to sanitize an *output* ArrowArray allocated from R or elsewhere.
static inline struct ArrowArray* nanoarrow_output_array_from_xptr(SEXP array_xptr);
/// \brief Ensure an input SEXP points to an initialized ArrowArrayStream
///
/// This function will always return an ArrowArrayStream pointer that can be safely
/// consumed or raise an error via Rf_error(). This is intended to be used to
/// sanitize an *input* ArrowArrayStream.
static inline struct ArrowArrayStream* nanoarrow_array_stream_from_xptr(
SEXP array_stream_xptr);
/// \brief Ensure an output SEXP points to an uninitialized ArrowArrayStream
///
/// This function will always return an ArrowArrayStream pointer that can be safely
/// used as an output argument or raise an error via Rf_error(). This is intended
/// to be used to sanitize an *output* ArrowArrayStream allocated from R or elsewhere.
static inline struct ArrowArrayStream* nanoarrow_output_array_stream_from_xptr(
SEXP array_stream_xptr);
/// \brief Finalize an external pointer to an ArrowSchema
///
/// This function is provided for internal use by nanoarrow_schema_owning_xptr()
/// and should not be called directly.
static void nanoarrow_finalize_schema_xptr(SEXP schema_xptr);
/// \brief Finalize an external pointer to an ArrowArray
///
/// This function is provided for internal use by nanoarrow_array_owning_xptr()
/// and should not be called directly.
static void nanoarrow_finalize_array_xptr(SEXP array_xptr);
/// \brief Finalize an external pointer to an ArrowArrayStream
///
/// This function is provided for internal use by nanoarrow_array_stream_owning_xptr()
/// and should not be called directly.
static void nanoarrow_finalize_array_stream_xptr(SEXP array_stream_xptr);
/// @}
// Implementations follow
static void nanoarrow_finalize_schema_xptr(SEXP schema_xptr) {
struct ArrowSchema* schema = (struct ArrowSchema*)R_ExternalPtrAddr(schema_xptr);
if (schema != NULL && schema->release != NULL) {
schema->release(schema);
}
if (schema != NULL) {
free(schema);
R_ClearExternalPtr(schema_xptr);
}
}
static void nanoarrow_finalize_array_xptr(SEXP array_xptr) {
struct ArrowArray* array = (struct ArrowArray*)R_ExternalPtrAddr(array_xptr);
if (array != NULL && array->release != NULL) {
array->release(array);
}
if (array != NULL) {
free(array);
R_ClearExternalPtr(array_xptr);
}
}
static void nanoarrow_finalize_array_stream_xptr(SEXP array_stream_xptr) {
struct ArrowArrayStream* array_stream =
(struct ArrowArrayStream*)R_ExternalPtrAddr(array_stream_xptr);
if (array_stream != NULL && array_stream->release != NULL) {
array_stream->release(array_stream);
}
if (array_stream != NULL) {
free(array_stream);
R_ClearExternalPtr(array_stream_xptr);
}
}
static inline SEXP nanoarrow_schema_owning_xptr(void) {
struct ArrowSchema* schema = (struct ArrowSchema*)malloc(sizeof(struct ArrowSchema));
if (schema == NULL) {
Rf_error("Failed to allocate ArrowSchema");
}
schema->release = NULL;
SEXP schema_xptr = PROTECT(R_MakeExternalPtr(schema, R_NilValue, R_NilValue));
SEXP schema_cls = PROTECT(Rf_mkString("nanoarrow_schema"));
Rf_setAttrib(schema_xptr, R_ClassSymbol, schema_cls);
R_RegisterCFinalizer(schema_xptr, &nanoarrow_finalize_schema_xptr);
UNPROTECT(2);
return schema_xptr;
}
static inline SEXP nanoarrow_array_owning_xptr(void) {
struct ArrowArray* array = (struct ArrowArray*)malloc(sizeof(struct ArrowArray));
array->release = NULL;
SEXP array_xptr = PROTECT(R_MakeExternalPtr(array, R_NilValue, R_NilValue));
SEXP array_cls = PROTECT(Rf_mkString("nanoarrow_array"));
Rf_setAttrib(array_xptr, R_ClassSymbol, array_cls);
R_RegisterCFinalizer(array_xptr, &nanoarrow_finalize_array_xptr);
UNPROTECT(2);
return array_xptr;
}
static inline SEXP nanoarrow_array_stream_owning_xptr(void) {
struct ArrowArrayStream* array_stream =
(struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream));
array_stream->release = NULL;
SEXP array_stream_xptr =
PROTECT(R_MakeExternalPtr(array_stream, R_NilValue, R_NilValue));
SEXP array_stream_cls = PROTECT(Rf_mkString("nanoarrow_array_stream"));
Rf_setAttrib(array_stream_xptr, R_ClassSymbol, array_stream_cls);
R_RegisterCFinalizer(array_stream_xptr, &nanoarrow_finalize_array_stream_xptr);
UNPROTECT(2);
return array_stream_xptr;
}
static inline struct ArrowSchema* nanoarrow_schema_from_xptr(SEXP schema_xptr) {
if (!Rf_inherits(schema_xptr, "nanoarrow_schema")) {
Rf_error("`schema` argument that does not inherit from 'nanoarrow_schema'");
}
struct ArrowSchema* schema = (struct ArrowSchema*)R_ExternalPtrAddr(schema_xptr);
if (schema == NULL) {
Rf_error("nanoarrow_schema() is an external pointer to NULL");
}
if (schema->release == NULL) {
Rf_error("nanoarrow_schema() has already been released");
}
return schema;
}
static inline struct ArrowSchema* nanoarrow_output_schema_from_xptr(SEXP schema_xptr) {
if (!Rf_inherits(schema_xptr, "nanoarrow_schema")) {
Rf_error("`schema` argument that does not inherit from 'nanoarrow_schema'");
}
struct ArrowSchema* schema = (struct ArrowSchema*)R_ExternalPtrAddr(schema_xptr);
if (schema == NULL) {
Rf_error("nanoarrow_schema() is an external pointer to NULL");
}
if (schema->release != NULL) {
Rf_error("nanoarrow_schema() output has already been initialized");
}
return schema;
}
static inline struct ArrowArray* nanoarrow_array_from_xptr(SEXP array_xptr) {
if (!Rf_inherits(array_xptr, "nanoarrow_array")) {
Rf_error("`array` argument that is not a nanoarrow_array()");
}
struct ArrowArray* array = (struct ArrowArray*)R_ExternalPtrAddr(array_xptr);
if (array == NULL) {
Rf_error("nanoarrow_array() is an external pointer to NULL");
}
if (array->release == NULL) {
Rf_error("nanoarrow_array() has already been released");
}
return array;
}
static inline struct ArrowArray* nanoarrow_output_array_from_xptr(SEXP array_xptr) {
if (!Rf_inherits(array_xptr, "nanoarrow_array")) {
Rf_error("`array` argument that is not a nanoarrow_array()");
}
struct ArrowArray* array = (struct ArrowArray*)R_ExternalPtrAddr(array_xptr);
if (array == NULL) {
Rf_error("nanoarrow_array() is an external pointer to NULL");
}
if (array->release != NULL) {
Rf_error("nanoarrow_array() output has already been initialized");
}
return array;
}
static inline struct ArrowArrayStream* nanoarrow_array_stream_from_xptr(
SEXP array_stream_xptr) {
if (!Rf_inherits(array_stream_xptr, "nanoarrow_array_stream")) {
Rf_error("`array_stream` argument that is not a nanoarrow_array_stream()");
}
struct ArrowArrayStream* array_stream =
(struct ArrowArrayStream*)R_ExternalPtrAddr(array_stream_xptr);
if (array_stream == NULL) {
Rf_error("nanoarrow_array_stream() is an external pointer to NULL");
}
if (array_stream->release == NULL) {
Rf_error("nanoarrow_array_stream() has already been released");
}
return array_stream;
}
static inline struct ArrowArrayStream* nanoarrow_output_array_stream_from_xptr(
SEXP array_stream_xptr) {
if (!Rf_inherits(array_stream_xptr, "nanoarrow_array_stream")) {
Rf_error("`array_stream` argument that is not a nanoarrow_array_stream()");
}
struct ArrowArrayStream* array_stream =
(struct ArrowArrayStream*)R_ExternalPtrAddr(array_stream_xptr);
if (array_stream == NULL) {
Rf_error("nanoarrow_array_stream() is an external pointer to NULL");
}
if (array_stream->release != NULL) {
Rf_error("nanoarrow_array_stream() output has already been initialized");
}
return array_stream;
}
#ifdef __cplusplus
}
#endif
#endif