in src/data_prep_intent.py [0:0]
def _sample_tgt_by_percentages(self, df):
"""return sampled label data by sampling percentages"""
sampled_df = df.groupby('target', group_keys=False).apply(
lambda x: x.sample(frac=self.SAMPLING_PERCENTAGES.get(x.name, self.DFLT_PCT_VAL),
random_state=42), include_groups=True)\
.reset_index(drop=True)
sampled_df['label'] = sampled_df['target'].map(INTENT_LABEL2ID)
logger.info("Size of the sampled data = %d", len(sampled_df))
logger.info("Sampled target sizes %s", sampled_df['label'].value_counts())
return sampled_df