in src/datasets/features/video.py [0:0]
def encode_example(self, value: Union[str, bytes, bytearray, Example, np.ndarray, "VideoDecoder"]) -> Example:
"""Encode example into a format for Arrow.
Args:
value (`str`, `np.ndarray`, `bytes`, `bytearray`, `VideoDecoder` or `dict`):
Data passed as input to Video feature.
Returns:
`dict` with "path" and "bytes" fields
"""
if value is None:
raise ValueError("value must be provided")
if config.TORCHCODEC_AVAILABLE:
from torchcodec.decoders import VideoDecoder
else:
VideoDecoder = None
if isinstance(value, list):
value = np.array(value)
if isinstance(value, str):
return {"path": value, "bytes": None}
elif isinstance(value, (bytes, bytearray)):
return {"path": None, "bytes": value}
elif isinstance(value, np.ndarray):
# convert the video array to bytes
return encode_np_array(value)
elif VideoDecoder is not None and isinstance(value, VideoDecoder):
# convert the torchcodec video decoder to bytes
return encode_torchcodec_video(value)
elif isinstance(value, dict):
path, bytes_ = value.get("path"), value.get("bytes")
if path is not None and os.path.isfile(path):
# we set "bytes": None to not duplicate the data if they're already available locally
return {"bytes": None, "path": path}
elif bytes_ is not None or path is not None:
# store the video bytes, and path is used to infer the video format using the file extension
return {"bytes": bytes_, "path": path}
else:
raise ValueError(
f"A video sample should have one of 'path' or 'bytes' but they are missing or None in {value}."
)
else:
raise TypeError(f"Unsupported encode_example type: {type(value)}")