in tfx/components/example_gen/input_processor.py [0:0]
def __init__(self,
input_base_uri: str,
splits: Iterable[example_gen_pb2.Input.Split],
range_config: Optional[range_config_pb2.RangeConfig] = None):
"""Initialize FileBasedInputProcessor.
Args:
input_base_uri: The base path from which files will be searched.
splits: An iterable collection of example_gen_pb2.Input.Split objects.
range_config: An instance of range_config_pb2.RangeConfig, defines the
rules for span resolving.
"""
super().__init__(splits=splits, range_config=range_config)
self._is_match_span = None
self._is_match_date = None
self._is_match_version = None
for split in splits:
is_match_span, is_match_date, is_match_version = utils.verify_split_pattern_specs(
split)
if self._is_match_span is None:
self._is_match_span = is_match_span
self._is_match_date = is_match_date
self._is_match_version = is_match_version
elif (self._is_match_span != is_match_span or
self._is_match_date != is_match_date or
self._is_match_version != is_match_version):
raise ValueError('Spec setup should the same for all splits: %s.' %
split.pattern)
if (self._is_match_span or self._is_match_date) and not range_config:
range_config = range_config_pb2.RangeConfig(
rolling_range=range_config_pb2.RollingRange(num_spans=1))
if not self._is_match_span and not self._is_match_date and range_config:
raise ValueError(
'Span or Date spec should be specified in split pattern if RangeConfig is specified.'
)
self._input_base_uri = input_base_uri
self._fingerprint = None