in tfx_addons/sampling/component.py [0:0]
def __init__(
self,
label: str,
input_data: types.Channel = None,
output_data: types.Channel = None,
name: Optional[Text] = None,
splits: Optional[List[Text]] = None,
copy_others: Optional[bool] = True,
shards: Optional[int] = 0,
null_classes: Optional[List[Text]] = None,
sampling_strategy: SamplingStrategy = SamplingStrategy.UNDERSAMPLE):
"""Construct a SamplerComponent.
Args:
input_data: A Channel of type `standard_artifacts.Examples`.
output_data: A Channel of type `standard_artifacts.Examples`.
By default, only the train split is sampled; all others are copied.
name: Optional unique name. Necessary if multiple components are
declared in the same pipeline.
label: The name of the column containing class names to sample by.
splits: A list containing splits to sample.
copy_others: Determines whether we copy over the splits that aren't
sampled, or just exclude them from the output artifact.
shards: The number of files that each sampled split should
contain. Default 0 is Beam's tfrecordio function's default.
null_classes: A list determining which classes that we should not sample.
"""
if not output_data:
output_data = channel_utils.as_channel([standard_artifacts.Examples()])
spec = SamplerSpec(
input_data=input_data,
output_data=output_data,
label=label,
name=name,
splits=json_utils.dumps(splits),
copy_others=int(copy_others),
shards=shards,
null_classes=json_utils.dumps(null_classes),
sampling_strategy=sampling_strategy,
)
super().__init__(spec=spec)