def _sample_tgt_by_percentages()

in src/data_prep_intent.py [0:0]


    def _sample_tgt_by_percentages(self, df):
        """return sampled label data by sampling percentages"""
        sampled_df = df.groupby('target', group_keys=False).apply(
            lambda x: x.sample(frac=self.SAMPLING_PERCENTAGES.get(x.name, self.DFLT_PCT_VAL),
                               random_state=42), include_groups=True)\
                       .reset_index(drop=True)
        sampled_df['label'] = sampled_df['target'].map(INTENT_LABEL2ID)
        logger.info("Size of the sampled data = %d", len(sampled_df))
        logger.info("Sampled target sizes %s", sampled_df['label'].value_counts())
        return sampled_df