in tfx_addons/sampling/executor.py [0:0]
def sample_data(_,
val,
sampling_strategy=spec.SamplingStrategy.UNDERSAMPLE,
side=0):
"""Function called in a Beam pipeline that performs sampling using Python's
random module on an input key:value pair, where the key is the class label
and the values are the data points to sample. Note that the key is discarded."""
if sampling_strategy == spec.SamplingStrategy.UNDERSAMPLE:
random_sample_data = random.sample(val, side)
elif sampling_strategy == spec.SamplingStrategy.OVERSAMPLE:
random_sample_data = random.choices(val, k=side)
else:
raise ValueError("Invalid value for sampling_strategy variable!")
for item in random_sample_data:
yield item