def generate_k_shot_data()

in preprocess/fewshot_gym_dataset.py [0:0]


    def generate_k_shot_data(self, k, seed, path=None):
        """
        generate a k-shot (k) dataset using random seed (seed)
        return train, dev, test
        """

        if self.hf_identifier not in config_dict:
            return None, None, None

        if use_instruct and self.hf_identifier not in prompt_names_per_task:
            return None, None, None

        if do_train:
            if seed<100:
                return None, None, None
            k = 16384
        elif do_test:
            k = 16

        # load dataset
        dataset = self.load_dataset()

        # formulate into list (for consistency in np.random)
        train_lines, test_lines = self.get_train_test_lines(dataset)

        # shuffle the data
        np.random.seed(seed)
        np.random.shuffle(train_lines)

        # Get label list for balanced sampling
        label_list = {}
        for line in train_lines:
            label = "all"
            if label not in label_list:
                label_list[label] = [line]
            else:
                label_list[label].append(line)

        # make train, dev, test data
        k_shot_train = []
        for label in label_list:
            for line in label_list[label][:k]:
                k_shot_train.append(line)

        k_shot_dev = []
        for label in label_list:
            for line in label_list[label][k:2*k]:
                k_shot_dev.append(line)

        k_shot_test = test_lines

        # save to path
        self.save(path, k, seed, k_shot_train, k_shot_dev, k_shot_test)
        return k_shot_train, k_shot_dev, k_shot_test