in clutrr/relations/builder.py [0:0]
def prune_puzzles(self, weight=None):
"""
In order to keep all puzzles homogenously distributed ("f_comb"), we calcuate
the count of all type of puzzles, and retain the minimum count
:param weight: a dict of weights f_comb:p where 0 <= p <= 1
:return:
"""
pztype = self._value_counts()
pztype_min_count = min([len(v) for k,v in pztype.items()])
keep_puzzles = []
for f_comb, pids in pztype.items():
keep_puzzles.extend(random.sample(pids, pztype_min_count))
not_keep = set(self.puzzles.keys()) - set(keep_puzzles)
for pid in not_keep:
del self.puzzles[pid]
if weight:
pztype = self._value_counts()
# fill in missing weights
for f_comb, pids in pztype.items():
if f_comb not in weight:
weight[f_comb] = 1.0
keep_puzzles = []
for f_comb,pids in pztype.items():
if weight[f_comb] == 1.0:
keep_puzzles.extend(pids)
not_keep = set(self.puzzles.keys()) - set(keep_puzzles)
for pid in not_keep:
del self.puzzles[pid]