in src/datasets/formatting/formatting.py [0:0]
def _arrow_array_to_numpy(self, pa_array: pa.Array) -> np.ndarray:
if isinstance(pa_array, pa.ChunkedArray):
if isinstance(pa_array.type, _ArrayXDExtensionType):
# don't call to_pylist() to preserve dtype of the fixed-size array
zero_copy_only = _is_zero_copy_only(pa_array.type.storage_dtype, unnest=True)
array: list = [
row for chunk in pa_array.chunks for row in chunk.to_numpy(zero_copy_only=zero_copy_only)
]
else:
zero_copy_only = _is_zero_copy_only(pa_array.type) and all(
not _is_array_with_nulls(chunk) for chunk in pa_array.chunks
)
array: list = [
row for chunk in pa_array.chunks for row in chunk.to_numpy(zero_copy_only=zero_copy_only)
]
else:
if isinstance(pa_array.type, _ArrayXDExtensionType):
# don't call to_pylist() to preserve dtype of the fixed-size array
zero_copy_only = _is_zero_copy_only(pa_array.type.storage_dtype, unnest=True)
array: list = pa_array.to_numpy(zero_copy_only=zero_copy_only)
else:
zero_copy_only = _is_zero_copy_only(pa_array.type) and not _is_array_with_nulls(pa_array)
array: list = pa_array.to_numpy(zero_copy_only=zero_copy_only).tolist()
if len(array) > 0:
if any(
(isinstance(x, np.ndarray) and (x.dtype == object or x.shape != array[0].shape))
or (isinstance(x, float) and np.isnan(x))
for x in array
):
if np.lib.NumpyVersion(np.__version__) >= "2.0.0b1":
return np.asarray(array, dtype=object)
return np.array(array, copy=False, dtype=object)
if np.lib.NumpyVersion(np.__version__) >= "2.0.0b1":
return np.asarray(array)
else:
return np.array(array, copy=False)