id: 1 unit: def _create_es_index() file: distant_supervision/ds_es_client.py start line: 0 end line: 0 size: 111 LOC McCabe index: 3 number of parameters: 1 id: 2 unit: def _obtain_retrieved_sentences_for_single_article() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 65 LOC McCabe index: 19 number of parameters: 6 id: 3 unit: def main() file: spark_scripts/create_ds_synthetic_dataset.py start line: 0 end line: 0 size: 54 LOC McCabe index: 6 number of parameters: 1 id: 4 unit: def _run_stat() file: spark_scripts/stat_for_ner_category_to_wh_words.py start line: 0 end line: 0 size: 40 LOC McCabe index: 9 number of parameters: 5 id: 5 unit: def _construct_dataset_sample() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 40 LOC McCabe index: 3 number of parameters: 10 id: 6 unit: def _get_entity2qpa_list() file: distant_supervision/entity_to_queries_mapper.py start line: 0 end line: 0 size: 32 LOC McCabe index: 9 number of parameters: 3 id: 7 unit: def __init__() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 27 LOC McCabe index: 1 number of parameters: 13 id: 8 unit: def _compute_answer_start() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 25 LOC McCabe index: 6 number of parameters: 5 id: 9 unit: def run_job() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 24 LOC McCabe index: 5 number of parameters: 5 id: 10 unit: def main() file: spark_scripts/write_sentence_level_es_index.py start line: 0 end line: 0 size: 23 LOC McCabe index: 2 number of parameters: 1 id: 11 unit: def main() file: spark_scripts/create_squad_ner_dataset.py start line: 0 end line: 0 size: 23 LOC McCabe index: 5 number of parameters: 1 id: 12 unit: def _index_rdd_partition() file: distant_supervision/ds_es_client.py start line: 0 end line: 0 size: 23 LOC McCabe index: 3 number of parameters: 2 id: 13 unit: def main() file: spark_scripts/tokenize_and_ner_inputs.py start line: 0 end line: 0 size: 22 LOC McCabe index: 2 number of parameters: 1 id: 14 unit: def _process_row() file: distant_supervision/input_parser.py start line: 0 end line: 0 size: 22 LOC McCabe index: 5 number of parameters: 2 id: 15 unit: def main() file: spark_scripts/stat_for_ner_category_to_wh_words.py start line: 0 end line: 0 size: 19 LOC McCabe index: 2 number of parameters: 1 id: 16 unit: def _make_styled_questions() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 17 LOC McCabe index: 2 number of parameters: 6 id: 17 unit: def _create_jsonl_training_files() file: spark_scripts/create_ds_synthetic_dataset.py start line: 0 end line: 0 size: 15 LOC McCabe index: 3 number of parameters: 2 id: 18 unit: def _get_valid_context_sentences() file: distant_supervision/entity_to_queries_mapper.py start line: 0 end line: 0 size: 15 LOC McCabe index: 4 number of parameters: 3 id: 19 unit: def is_similar() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 14 LOC McCabe index: 3 number of parameters: 6 id: 20 unit: def _process_row() file: distant_supervision/squad_ner_creator.py start line: 0 end line: 0 size: 14 LOC McCabe index: 4 number of parameters: 2 id: 21 unit: def preprocess() file: distant_supervision/input_parser.py start line: 0 end line: 0 size: 13 LOC McCabe index: 1 number of parameters: 1 id: 22 unit: def _print_output_stats() file: distant_supervision/squad_ner_creator.py start line: 0 end line: 0 size: 13 LOC McCabe index: 2 number of parameters: 3 id: 23 unit: def help_print_histogram() file: distant_supervision/stat_computation.py start line: 0 end line: 0 size: 12 LOC McCabe index: 2 number of parameters: 5 id: 24 unit: def print_phrase_category_counts() file: distant_supervision/stat_computation.py start line: 0 end line: 0 size: 12 LOC McCabe index: 2 number of parameters: 4 id: 25 unit: def print_output_stats() file: distant_supervision/stat_computation.py start line: 0 end line: 0 size: 12 LOC McCabe index: 1 number of parameters: 3 id: 26 unit: def _obtain_retrieved_sentences() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 12 LOC McCabe index: 3 number of parameters: 4 id: 27 unit: def sent_tokenize() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 11 LOC McCabe index: 7 number of parameters: 3 id: 28 unit: def make_template_qg_styles() file: distant_supervision/question_generator.py start line: 0 end line: 0 size: 11 LOC McCabe index: 2 number of parameters: 5 id: 29 unit: def _print_backfill_stats() file: distant_supervision/stat_computation.py start line: 0 end line: 0 size: 11 LOC McCabe index: 2 number of parameters: 3 id: 30 unit: def get_phrases() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 10 LOC McCabe index: 4 number of parameters: 3 id: 31 unit: def compute_ner_and_noun_chunks() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 10 LOC McCabe index: 4 number of parameters: 2 id: 32 unit: def deserialize_json() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 10 LOC McCabe index: 3 number of parameters: 2 id: 33 unit: def _wrangle_articles() file: distant_supervision/ds_es_client.py start line: 0 end line: 0 size: 10 LOC McCabe index: 3 number of parameters: 2 id: 34 unit: def run_job() file: distant_supervision/ds_es_client.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 3 id: 35 unit: def _compute_freqs() file: distant_supervision/whxx_ngram_table.py start line: 0 end line: 0 size: 10 LOC McCabe index: 3 number of parameters: 1 id: 36 unit: def _print_stats() file: distant_supervision/ner_entity_gatherer.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 3 id: 37 unit: def load_en_disable_all() file: distant_supervision/utils.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 1 id: 38 unit: def load_en_sentencizer() file: distant_supervision/utils.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 1 id: 39 unit: def convert_ner_rdd_to_set() file: distant_supervision/ner_entity_gatherer.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 2 id: 40 unit: def _get_unique_entity_pairs() file: distant_supervision/ner_entity_gatherer.py start line: 0 end line: 0 size: 9 LOC McCabe index: 4 number of parameters: 2 id: 41 unit: def _extract_leading_ngrams() file: spark_scripts/stat_for_ner_category_to_wh_words.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 1 id: 42 unit: def _generate_template_awb() file: distant_supervision/question_generator.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 4 id: 43 unit: def _generate_template_wba() file: distant_supervision/question_generator.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 4 id: 44 unit: def find_all() file: distant_supervision/utils.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 2 id: 45 unit: def __init__() file: distant_supervision/ds_es_client.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 6 id: 46 unit: def _get_hit_phrases() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 8 LOC McCabe index: 4 number of parameters: 2 id: 47 unit: def _compute_ds_data_by_partition() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 2 id: 48 unit: def _perform_subsample_by_count() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 8 LOC McCabe index: 5 number of parameters: 5 id: 49 unit: def get_entity_to_queries_v2() file: distant_supervision/entity_to_queries_mapper.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 5 id: 50 unit: def gather_entities() file: distant_supervision/ner_entity_gatherer.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 3 id: 51 unit: def _run_job() file: spark_scripts/create_squad_ner_dataset.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 4 id: 52 unit: def __init__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 8 id: 53 unit: def make_cloze_style() file: distant_supervision/question_generator.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 4 id: 54 unit: def _print_article_diversity_stats_helper() file: distant_supervision/stat_computation.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 4 id: 55 unit: def _largest_index_exceeding_ulim_context() file: distant_supervision/entity_to_queries_mapper.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 2 id: 56 unit: def _clean_ners() file: distant_supervision/ner_entity_gatherer.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 1 id: 57 unit: def _stats_top_occurring_entities() file: distant_supervision/ner_entity_gatherer.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 2 id: 58 unit: def _add_to_toml() file: spark_scripts/stat_for_ner_category_to_wh_words.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 3 id: 59 unit: def __init__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 7 id: 60 unit: def deserialize_json() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 2 id: 61 unit: def __init__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 5 id: 62 unit: def run_job() file: distant_supervision/squad_ner_creator.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 4 id: 63 unit: def _print_question_stats() file: distant_supervision/stat_computation.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 3 id: 64 unit: def _print_answer_stats() file: distant_supervision/stat_computation.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 3 id: 65 unit: def load() file: distant_supervision/utils.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 4 id: 66 unit: def rand_sample_ngram() file: distant_supervision/whxx_ngram_table.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 3 id: 67 unit: def _calculate_phrase_rdd() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 3 id: 68 unit: def import_from() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 2 id: 69 unit: def __init__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 5 LOC McCabe index: 4 number of parameters: 5 id: 70 unit: def get_phrases() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 2 id: 71 unit: def jsonify_single_style() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 2 id: 72 unit: def __init__() file: distant_supervision/squad_ner_creator.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 5 id: 73 unit: def get_instance() file: distant_supervision/utils.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 3 id: 74 unit: def _split_by_style_and_write() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 5 LOC McCabe index: 4 number of parameters: 2 id: 75 unit: def _get_all_phrases_from_sentence_list() file: distant_supervision/entity_to_queries_mapper.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 2 id: 76 unit: def _stats_phrase_category_counts() file: distant_supervision/ner_entity_gatherer.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 3 id: 77 unit: def __init__() file: spark_scripts/stat_for_ner_category_to_wh_words.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 4 id: 78 unit: def __init__() file: distant_supervision/input_parser.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 4 id: 79 unit: def tokenize_and_perform_rollup() file: distant_supervision/input_parser.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 80 unit: def clean_and_tokenize_str() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 81 unit: def compute_ner() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 2 id: 82 unit: def import_from() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 83 unit: def _post_questionify() file: distant_supervision/question_generator.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 84 unit: def _print_context_stats() file: distant_supervision/stat_computation.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 3 id: 85 unit: def __init__() file: distant_supervision/utils.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 5 id: 86 unit: def get_random_number_generator() file: distant_supervision/utils.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 87 unit: def _perform_subsample() file: distant_supervision/synthetic_data_creator.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 3 id: 88 unit: def __init__() file: distant_supervision/ner_entity_gatherer.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 4 id: 89 unit: def _compute_count_per_category() file: spark_scripts/stat_for_ner_category_to_wh_words.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 90 unit: def _load_json() file: distant_supervision/input_parser.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 91 unit: def _convert_jsonl_to_prdd() file: distant_supervision/input_parser.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 92 unit: def word_tokenize() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 2 id: 93 unit: def normalize_basic() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 3 LOC McCabe index: 3 number of parameters: 2 id: 94 unit: def __init__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 95 unit: def __init__() file: distant_supervision/question_generator.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 96 unit: def _print_article_diversity_stats() file: distant_supervision/stat_computation.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 3 id: 97 unit: def read_default_config_toml() file: distant_supervision/utils.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 98 unit: def __init__() file: distant_supervision/utils.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 99 unit: def get_numpy_random_number_generator() file: distant_supervision/utils.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 100 unit: def random_str() file: distant_supervision/utils.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 101 unit: def _perform_es_index() file: distant_supervision/ds_es_client.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 102 unit: def __init__() file: distant_supervision/whxx_ngram_table.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 103 unit: def import_from_toml() file: distant_supervision/whxx_ngram_table.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 104 unit: def __init__() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 105 unit: def unicode_normalize() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 106 unit: def findall_substr() file: distant_supervision/text_preprocessor.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 107 unit: def jsonify() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 108 unit: def __init__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 109 unit: def __repr__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 110 unit: def jsonify() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 111 unit: def __repr__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 112 unit: def __repr__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 113 unit: def __repr__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 114 unit: def jsonify() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 115 unit: def __repr__() file: distant_supervision/data_models.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 116 unit: def _replace_with_question_mark_ending() file: distant_supervision/question_generator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 117 unit: def __init__() file: distant_supervision/stat_computation.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 118 unit: def _is_good_sentence() file: distant_supervision/ds_es_client.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 119 unit: def __repr__() file: distant_supervision/whxx_ngram_table.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 120 unit: def __init__() file: distant_supervision/entity_to_queries_mapper.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2