def dissimilarity_grouping()

in question_generation_model.py [0:0]


    def dissimilarity_grouping(self, temp, num_groups, beam_size):
        """
        This function is responsible for clustering hypothesis at each step to produce diverse solutions
        :param temp: Candidate solutions
        :param num_groups: Number of clusters
        :param beam_size: Beam size to be used for decoding
        :return:
        """
        all_sentences = []
        for s, prob in temp:
            intermediate_question = [self.datasets.idx_to_word[i] for i in s]
            all_sentences.append(' '.join(intermediate_question[1:]))
        self.logger.debug('Sentences being clustered %s' % all_sentences)
        dict_map_sentence_to_idx = dict()
        for i, s in enumerate(all_sentences):
            if s not in dict_map_sentence_to_idx:
                dict_map_sentence_to_idx[s] = i

        uniq_sentences = list(dict_map_sentence_to_idx.keys())

        try:
            retained = dict(cluster_texts(uniq_sentences, num_groups))
        except:
            # logging.error('Error with clustering in diversity beam search')
            return temp

        # Add best 2 out of the box in retained_sentences
        picked = [len(all_sentences)-2, len(all_sentences)-1]
        retained_sentences = [temp[idx] for idx in picked]
        while(1):
            if len(retained_sentences) >= beam_size:
                break
            for key, values in retained.items():
                # Pick one from each cluster
                idx = random.choice(values)
                if idx not in picked:
                    picked.append(idx)
                    if len(retained_sentences) >= beam_size:
                        break
                    retained_sentences.append(temp[idx])
        self.logger.debug('Filtered sentences')
        for s in retained_sentences:
            iq = ' '.join([self.datasets.idx_to_word[i] for i in s[0]])
            self.logger.debug(iq)

        return retained_sentences