in rules/conjuncts.py [0:0]
def generate_conjuncts(df: pd.DataFrame, target, predicates, beam_width):
stats = ConfusionMatrix(df, target)
num_pos = stats.pos_df.shape[0]
num_total = df.shape[0]
min_support = math.sqrt(num_pos) / num_total
#beam and new_beam are mappings from rules to q_values
beam = {}
new_beam = {}
for pred in predicates:
r = Rule()
r.add_conjunct(pred)
q = r.eval(stats)
beam[r] = q
if beam_width > len(predicates):
new_beam = beam.copy()
else:
values = sorted(beam.values(), reverse=True)
cutoff = values[beam_width - 1]
new_beam = {key: value for (key, value) in beam.items() if value >= cutoff}
beam = new_beam.copy()
i = 0
while True:
improved = False
for rule in list(beam):
for pred in predicates:
if rule.check_useless(pred):
continue
new_rule = Rule(rule)
new_rule.add_conjunct(pred)
if is_duplicate(new_rule, new_beam):
continue
q = new_rule.eval(stats)
num_tp = len(stats.true_positive(new_rule))
exceeds_min_support = num_tp / df.shape[0] >= min_support
if not exceeds_min_support:
continue
(worst_rule, worst_q) = get_worst_rule(new_beam, beam_width)
if q <= worst_q:
continue
# TP(new_rule) < TP(existing) AND
# FP(new_rule) > FP(existing)
if is_irrelevant(new_rule, new_beam, stats):
continue
if worst_q > 0:
new_beam.pop(worst_rule)
new_beam[new_rule] = q
improved = True
if i == 0:
clean_beam(new_beam)
if not improved:
break
beam = new_beam
i += 1
return beam