in python/riegeli/records/record_reader.cc [1143:1263]
This includes seek(), seek_numeric(), and size().
)doc"),
nullptr},
{nullptr, nullptr, nullptr, nullptr, nullptr}};
PyTypeObject PyRecordReader_Type = {
// clang-format off
PyVarObject_HEAD_INIT(&PyType_Type, 0)
// clang-format on
"riegeli.records.record_reader.RecordReader", // tp_name
sizeof(PyRecordReaderObject), // tp_basicsize
0, // tp_itemsize
reinterpret_cast<destructor>(RecordReaderDestructor), // tp_dealloc
#if PY_VERSION_HEX >= 0x03080000
0, // tp_vectorcall_offset
#else
nullptr, // tp_print
#endif
nullptr, // tp_getattr
nullptr, // tp_setattr
nullptr, // tp_as_async
reinterpret_cast<reprfunc>(RecordReaderRepr), // tp_repr
nullptr, // tp_as_number
nullptr, // tp_as_sequence
nullptr, // tp_as_mapping
nullptr, // tp_hash
nullptr, // tp_call
nullptr, // tp_str
nullptr, // tp_getattro
nullptr, // tp_setattro
nullptr, // tp_as_buffer
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, // tp_flags
R"doc(
RecordReader(
src: BinaryIO,
*,
owns_src: bool = True,
assumed_pos: Optional[int] = None,
buffer_size: int = 64 << 10,
field_projection: Optional[Iterable[Iterable[int]]] = None,
recovery: Optional[Callable[[SkippedRegion], Any]] = None) -> RecordReader
Will read from the given file.
Args:
src: Binary IO stream to read from.
owns_src: If True, src is owned, and close() or __exit__() calls src.close().
assumed_pos: If None, src must support random access, RecordReader will
support random access, and RecordReader will set the position of src on
close(). If an int, it is enough that src supports sequential access, and
this position will be assumed initially.
buffer_size: Tunes how much data is buffered after reading from src.
field_projection: If not None, the set of fields to be included in returned
records, allowing to exclude the remaining fields (but does not guarantee
that they will be excluded). Excluding data makes reading faster. Projection
is effective if the file has been written with "transpose" in RecordWriter
options. Additionally, "bucket_fraction" in RecordWriter options with a
lower value can make reading with projection faster. A field projection is
specified as an iterable of field paths. A field path is specified as an
iterable of proto field numbers descending from the root message. A special
field EXISTENCE_ONLY can be added to the end of the path; it preserves
field existence but ignores its value; warning: for a repeated field this
preserves the field count only if the field is not packed.
recovery: If None, then invalid file contents cause RecordReader to raise
RiegeliError. If not None, then invalid file contents cause RecordReader to
skip over the invalid region and call this recovery function with a
SkippedRegion as an argument. If the recovery function returns normally,
reading continues. If the recovery function raises StopIteration, reading
ends. If close() is called and file contents were truncated, the recovery
function is called if set; the RecordReader remains closed.
The src argument should be a binary IO stream which supports:
* close() - for close() or __exit__() if owns_src
* readinto1(memoryview) or readinto(memoryview) or read1(int) or read(int)
* seek(int[, int]) - if assumed_pos is None,
or for seek(), seek_numeric(), or size()
* tell() - if assumed_pos is None,
or for seek(), seek_numeric(), or size()
Example values for src:
* io.FileIO(filename, 'rb')
* io.open(filename, 'rb') - better with buffering=0, or use io.FileIO() instead
* open(filename, 'rb') - better with buffering=0, or use io.FileIO() instead
* io.BytesIO(contents)
* tf.io.gfile.GFile(filename, 'rb')
Warning: if owns_src is False and assumed_pos is not None, src will have an
unpredictable amount of extra data consumed because of buffering.
)doc", // tp_doc
reinterpret_cast<traverseproc>(RecordReaderTraverse), // tp_traverse
reinterpret_cast<inquiry>(RecordReaderClear), // tp_clear
nullptr, // tp_richcompare
0, // tp_weaklistoffset
nullptr, // tp_iter
nullptr, // tp_iternext
const_cast<PyMethodDef*>(RecordReaderMethods), // tp_methods
nullptr, // tp_members
const_cast<PyGetSetDef*>(RecordReaderGetSet), // tp_getset
nullptr, // tp_base
nullptr, // tp_dict
nullptr, // tp_descr_get
nullptr, // tp_descr_set
0, // tp_dictoffset
reinterpret_cast<initproc>(RecordReaderInit), // tp_init
nullptr, // tp_alloc
PyType_GenericNew, // tp_new
nullptr, // tp_free
nullptr, // tp_is_gc
nullptr, // tp_bases
nullptr, // tp_mro
nullptr, // tp_cache
nullptr, // tp_subclasses
nullptr, // tp_weaklist
nullptr, // tp_del
0, // tp_version_tag
nullptr, // tp_finalize
};
extern "C" {
static void RecordIterDestructor(PyRecordIterObject* self) {