in src/mlio-py/mlio/core/data_store.cc [94:281]
void register_data_stores(py::module &m)
{
py::enum_<Compression>(m, "Compression", "Specifies the compression type of a data store.")
.value("NONE", Compression::none)
.value("INFER", Compression::infer)
.value("GZIP", Compression::gzip)
.value("BZIP2", Compression::bzip2)
.value("ZIP", Compression::zip);
py::class_<Data_store, Py_data_store, Intrusive_ptr<Data_store>>(
m, "DataStore", "Represents a repository of data.")
.def(py::init<>())
.def("open_read",
&Data_store::open_read,
py::call_guard<py::gil_scoped_release>(),
"Return an ``Input_stream`` for reading from the data store.")
.def("__eq__",
[](const Data_store &self, const Data_store &other) {
return self == other;
})
.def("__hash__",
[](const Data_store &self) {
return std::hash<Data_store>{}(self);
})
.def("__repr__", &Data_store::repr)
.def_property_readonly(
"id", &Data_store::id, "Returns a unique identifier for the data store.");
py::class_<File, Data_store, Intrusive_ptr<File>>(
m, "File", "Represents a File as a ``DataStore``.")
.def(py::init<std::string, bool, Compression>(),
"path"_a,
"memory_map"_a = true,
"compression"_a = Compression::infer,
R"(
Parameters
----------
path : str
The path to the File.
memory_map : bool
A boolean value indicating whether the File should be
memory-mapped.
compression : compression
The compression type of the File. If set to `INFER`, the
compression will be inferred from the filename.
)");
py::class_<In_memory_store, Data_store, Intrusive_ptr<In_memory_store>>(
m, "InMemoryStore", "Represents a memory block as a ``Data_store``.")
.def(py::init(&make_in_memory_store),
"buf"_a,
"compression"_a = Compression::none,
R"(
Parameters
----------
buf : buffer
The Python buffer to wrap as a data store.
compression : compression
The compression type of the data.
)");
py::class_<S3_object, Data_store, Intrusive_ptr<S3_object>>(
m, "S3Object", "Represents an S3 object as a ``DataStore``.")
.def(py::init<Intrusive_ptr<S3_client>, std::string, std::string, Compression>(),
"client"_a,
"uri"_a,
"version_id"_a = "",
"compression"_a = Compression::infer,
R"(
Parameters
----------
client : S3Client
The `S3Client` to use.
uri : str
The URI of the S3 object.
version_id : str
The version of the S3 object to read.
compression : compression
The compression type of the S3 object. If set to `INFER`, the
compression will be inferred from the URI.
)");
py::class_<Sagemaker_pipe, Data_store, Intrusive_ptr<Sagemaker_pipe>>(
m, "SageMakerPipe", "Represents an Amazon SageMaker pipe channel as a ``DataStore``.")
.def(py::init<std::string, std::chrono::seconds, std::optional<std::size_t>, Compression>(),
"path"_a,
"timeout"_a = sagemaker_pipe_default_timeout,
"fifo_id"_a = std::nullopt,
"compression"_a = Compression::none,
R"(
Parameters
----------
path : str
The path to the SageMaker pipe channel.
timeout : datetime.timedelta
The duration to wait for data to appear in the SageMaker pipe
channel.
fifo_id : int, optional
The FIFO suffix of the SageMaker pipe channel.
compression : compression, optional
The compression type of the data.
)");
m.def("list_files",
&py_list_files,
"paths"_a,
"pattern"_a = "",
"predicate"_a = nullptr,
"memory_map"_a = true,
"compression"_a = Compression::infer,
R"(
Recursively list all files residing under the specified paths.
Parameters
----------
paths : list of strs
The list of paths to traverse.
pattern : str, optional
The pattern to match the filenames against.
predicate : callable
The callback function for user-specific filtering.
memory_map : bool
A boolean value indicating whether the files should be
memory-mapped.
compression : compression
The compression type of the files. If set to `INFER`, the
compression will be inferred from the filenames.
)");
m.def("list_files",
py::overload_cast<const std::string &, std::string_view>(&list_files),
"path"_a,
"pattern"_a = "",
R"(
Recursively list all files residing under the specified path.
Parameters
----------
path : str
The path to traverse.
pattern : str, optional
The pattern to match the filenames against.
)");
m.def("list_s3_objects",
&py_list_s3_objects,
"client"_a,
"uris"_a,
"pattern"_a = "",
"predicate"_a = nullptr,
"compression"_a = Compression::infer,
R"(
List all S3 objects residing under the specified URIs.
Parameters
----------
client : S3Client
The S3 client to use.
uris : list of strs
The list of URIs to traverse.
pattern : str, optional
The pattern to match the S3 objects against.
predicate : callable
The callback function for user-specific filtering.
compression : compression
The compression type of the S3 objects. If set to `INFER`, the
compression will be inferred from the URIs.
)");
m.def("list_s3_objects",
py::overload_cast<const S3_client &, const std::string &, std::string_view>(
&list_s3_objects),
"client"_a,
"uri"_a,
"pattern"_a = "",
R"(
List all S3 objects residing under the specified URI.
Parameters
----------
client : S3Client
The S3 client to use.
uri : str
The URI to traverse.
pattern : str, optional
The pattern to match the S3 objects against.
)");
}