in distant_supervision/synthetic_data_creator.py [0:0]
def __init__(self,
output_dir, *,
es_hosts,
es_index_name,
debug_save,
ulim_count,
nb_ner_ulim,
num_partitions,
nb_aux_qs_matches,
nb_aux_awc_matches,
phrase_mode,
whxx_ngram_table):
self.output_dir = output_dir
self.debug_save = debug_save
self.num_partitions = num_partitions
self.phrase_mode = phrase_mode
self.ulim_count = ulim_count # limit the number of results
self.nb_ner_ulim = nb_ner_ulim
self.nb_aux_qs_matches = nb_aux_qs_matches
self.nb_aux_awc_matches = nb_aux_awc_matches
self.whxx_ngram_table = whxx_ngram_table
self.text_preprocessor = TextPreprocessor()
self.question_generator = QuestionGenerator(self.whxx_ngram_table, self.text_preprocessor)
self.es_conf = ElasticsearchConfig(
hosts=es_hosts,
index_name=es_index_name,
doc_type='doc')