def prune_puzzles()

in clutrr/relations/builder.py [0:0]


    def prune_puzzles(self, weight=None):
        """
        In order to keep all puzzles homogenously distributed ("f_comb"), we calcuate
        the count of all type of puzzles, and retain the minimum count
        :param weight: a dict of weights f_comb:p where 0 <= p <= 1
        :return:
        """
        pztype = self._value_counts()
        pztype_min_count = min([len(v) for k,v in pztype.items()])
        keep_puzzles = []
        for f_comb, pids in pztype.items():
            keep_puzzles.extend(random.sample(pids, pztype_min_count))
        not_keep = set(self.puzzles.keys()) - set(keep_puzzles)
        for pid in not_keep:
            del self.puzzles[pid]
        if weight:
            pztype = self._value_counts()
            # fill in missing weights
            for f_comb, pids in pztype.items():
                if f_comb not in weight:
                    weight[f_comb] = 1.0
            keep_puzzles = []
            for f_comb,pids in pztype.items():
                if weight[f_comb] == 1.0:
                    keep_puzzles.extend(pids)
            not_keep = set(self.puzzles.keys()) - set(keep_puzzles)
            for pid in not_keep:
                del self.puzzles[pid]