def get_distractors()

in src/datatuner/classification/distractors.py [0:0]
38 lines of code
13 McCabe index (conditional complexity)

def get_distractors(data, text, swapping_candidates, cutting_candidates, random_text, num_candidates=5,
                    max_per_operation=5):
    """Get the distractors for the given inputs"""
    distractors_dict = {}

    for cands in swapping_candidates:
        distractors_dict["value_error"] = swap_entities(cands, text, max_outputs=max_per_operation)

    for cands in cutting_candidates:
        distractors_dict["value_error"].extend(cut_entities(cands, text, max_outputs=max_per_operation))

    distractors_dict["value_error"].extend(add_negation_errors(text, max_outputs=int(math.ceil(max_per_operation / 2))))
    distractors_dict["omission"] = add_omission(text, max_outputs=max_per_operation)
    if "," in text:
        distractors_dict["omission"].extend(add_phrase_omission(text, max_outputs=1 + max_per_operation))

    distractors_dict["repetition"] = add_repetition(text, max_outputs=1 + max_per_operation)
    distractors_dict["hallucination"] = add_repetition(
        text, random_text=random_text, replace=True, max_outputs=max_per_operation
    ) + add_repetition(text, random_text=random_text, max_outputs=max_per_operation)

    distractors = set(chain(*distractors_dict.values()))

    # Remove text itself if present
    if text in distractors:
        distractors.remove(text)

    # Shuffle and cut
    distractors = list(distractors)
    random.shuffle(distractors)
    distractors = distractors[:num_candidates]

    # If no distractors found, add placeholders
    if len(distractors) == 0:
        distractors = ["placeholder"] * num_candidates
    # Pad to get to the right number of candidates
    if len(distractors) < num_candidates:
        ratio = int(math.ceil(num_candidates / len(distractors)))
        distractors = (distractors * ratio)[:num_candidates]

    classification_items = [
                               {"text": value, "data": data, "label": key} for key in distractors_dict for value in
                               distractors_dict[key]
                           ] + [{"text": text, "data": data, "label": "accurate"}]

    # Add negation
    replacements = {"[ no ]": "[ yes ]", "[ yes ]": "[ no ]"}
    for cand in replacements:
        if cand in data:
            negated_data = data.replace(cand, replacements[cand], 1)
            classification_items.extend([{"text": text, "data": negated_data, "label": "value_error"}])

    random.shuffle(classification_items)
    classification_items = classification_items[:num_candidates]
    return distractors, classification_items