src/substitution_fns.py [96:116]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    dset: QADataset,
    wikidata_info_path: str,
    replace_every: bool,
    num_samples: int,
    category: str,
):
    """Derives a new dataset of modified examples, where the original answer has been replaced
    by another answer of the same `type` drawn randomly from the corpus of answers in the original dataset.
    This substitution function maintains the same distribution of answers as the original dataset.

    Args:
        dset: The original QADataset
        wikidata_info_path: A path to a mapping from wikidata ID to a dictionary of
            wikidata info (see extract_wikidata_info.py for details).
        replace_every: If True, replace every original answer in the substitution examples context, otherwise replace just the primary one.
        num_samples: How many new (modified) examples to create from one original example.
        category: This limits substitution generation to only use original examples with this answer type category. 
            `ALL` is an option.
    """
    # generate a corpus of substitute answers, keyed by answer type
    answer_corpus_by_groups = group_answers_by_answer_type(dset)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


src/substitution_fns.py [239:260]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    dset: QADataset,
    wikidata_info_path: str,
    replace_every: bool,
    num_samples: int,
    category: str,
):
    """Derives a new dataset of modified examples, where the original answer has been replaced
    by another answer of a different `type` drawn randomly from the corpus of answers in the original dataset.
    This substitution function is the same as corpus_substitution_fn except the answer types are different
    rather than the same.

    Args:
        dset: The original QADataset
        wikidata_info_path: A path to a mapping from wikidata ID to a dictionary of
            wikidata info (see extract_wikidata_info.py for details).
        replace_every: If True, replace every original answer in the substitution examples context, otherwise replace just the primary one.
        num_samples: How many new (modified) examples to create from one original example.
        category: This limits substitution generation to only use original examples with this answer type category. 
            `ALL` is an option.
    """
    # generate a corpus of substitute answers, keyed by answer type
    answer_corpus_by_groups = group_answers_by_answer_type(dset)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -