in src/datasets/features/features.py [0:0]
def to_numpy(self, zero_copy_only=True):
storage: pa.ListArray = self.storage
null_mask = storage.is_null().to_numpy(zero_copy_only=False)
if self.type.shape[0] is not None:
size = 1
null_indices = np.arange(len(storage))[null_mask] - np.arange(np.sum(null_mask))
for i in range(self.type.ndims):
size *= self.type.shape[i]
storage = storage.flatten()
numpy_arr = storage.to_numpy(zero_copy_only=zero_copy_only)
numpy_arr = numpy_arr.reshape(len(self) - len(null_indices), *self.type.shape)
if len(null_indices):
numpy_arr = np.insert(numpy_arr.astype(np.float64), null_indices, np.nan, axis=0)
else:
shape = self.type.shape
ndims = self.type.ndims
arrays = []
first_dim_offsets = np.array([off.as_py() for off in storage.offsets])
for i, is_null in enumerate(null_mask):
if is_null:
arrays.append(np.nan)
else:
storage_el = storage[i : i + 1]
first_dim = first_dim_offsets[i + 1] - first_dim_offsets[i]
# flatten storage
for _ in range(ndims):
storage_el = storage_el.flatten()
numpy_arr = storage_el.to_numpy(zero_copy_only=zero_copy_only)
arrays.append(numpy_arr.reshape(first_dim, *shape[1:]))
if len(np.unique(np.diff(first_dim_offsets))) > 1:
# ragged
numpy_arr = np.empty(len(arrays), dtype=object)
numpy_arr[:] = arrays
else:
numpy_arr = np.array(arrays)
return numpy_arr