in src/datatuner/classification/distractors.py [0:0]
def get_distractors(data, text, swapping_candidates, cutting_candidates, random_text, num_candidates=5,
max_per_operation=5):
"""Get the distractors for the given inputs"""
distractors_dict = {}
for cands in swapping_candidates:
distractors_dict["value_error"] = swap_entities(cands, text, max_outputs=max_per_operation)
for cands in cutting_candidates:
distractors_dict["value_error"].extend(cut_entities(cands, text, max_outputs=max_per_operation))
distractors_dict["value_error"].extend(add_negation_errors(text, max_outputs=int(math.ceil(max_per_operation / 2))))
distractors_dict["omission"] = add_omission(text, max_outputs=max_per_operation)
if "," in text:
distractors_dict["omission"].extend(add_phrase_omission(text, max_outputs=1 + max_per_operation))
distractors_dict["repetition"] = add_repetition(text, max_outputs=1 + max_per_operation)
distractors_dict["hallucination"] = add_repetition(
text, random_text=random_text, replace=True, max_outputs=max_per_operation
) + add_repetition(text, random_text=random_text, max_outputs=max_per_operation)
distractors = set(chain(*distractors_dict.values()))
# Remove text itself if present
if text in distractors:
distractors.remove(text)
# Shuffle and cut
distractors = list(distractors)
random.shuffle(distractors)
distractors = distractors[:num_candidates]
# If no distractors found, add placeholders
if len(distractors) == 0:
distractors = ["placeholder"] * num_candidates
# Pad to get to the right number of candidates
if len(distractors) < num_candidates:
ratio = int(math.ceil(num_candidates / len(distractors)))
distractors = (distractors * ratio)[:num_candidates]
classification_items = [
{"text": value, "data": data, "label": key} for key in distractors_dict for value in
distractors_dict[key]
] + [{"text": text, "data": data, "label": "accurate"}]
# Add negation
replacements = {"[ no ]": "[ yes ]", "[ yes ]": "[ no ]"}
for cand in replacements:
if cand in data:
negated_data = data.replace(cand, replacements[cand], 1)
classification_items.extend([{"text": text, "data": negated_data, "label": "value_error"}])
random.shuffle(classification_items)
classification_items = classification_items[:num_candidates]
return distractors, classification_items