in tensorflow_datasets/core/splits.py [0:0]
def _str_to_relative_instruction(spec: str) -> AbstractSplit:
"""Returns ReadInstruction for given string."""
# <split_name>[<split_selector>] (e.g. `train[54%:]`)
res = _SUB_SPEC_RE.match(spec)
err_msg = (f'Unrecognized split format: {spec!r}. See format at '
'https://www.tensorflow.org/datasets/splits')
if not res:
raise ValueError(err_msg)
split_name = res.group('split_name')
split_selector = res.group('split_selector')
if split_name == 'all':
if split_selector:
# TODO(tfds): `all[:75%]` could be supported by creating a
# `_SliceSplit(split, from_=, to=, unit=)`.
raise NotImplementedError(
f'{split_name!r} does not support slice. Please open a github issue '
'if you need this feature.')
return _SplitAll()
if split_selector is None: # split='train'
from_ = None
to = None
unit = 'abs'
else: # split='train[x:y]' or split='train[x]'
slices = [_SLICE_RE.match(x) for x in split_selector.split(':')]
# Make sure all slices are valid, and at least one is not empty
if not all(slices) or not any(x.group(0) for x in slices):
raise ValueError(err_msg)
if len(slices) == 1:
from_match, = slices
from_ = from_match['val']
to = int(from_) + 1
unit = from_match['unit'] or 'abs'
if unit != 'shard':
raise ValueError('Absolute or percent only support slice syntax.')
elif len(slices) == 2:
from_match, to_match = slices
from_ = from_match['val']
to = to_match['val']
unit = from_match['unit'] or to_match['unit'] or 'abs'
else:
raise ValueError(err_msg)
if from_ is not None:
from_ = int(from_)
if to is not None:
to = int(to)
return ReadInstruction(
split_name=split_name,
rounding='closest',
from_=from_,
to=to,
unit=unit,
)