in datasets.py [0:0]
def __init__(self, train_file_name, validation_file_name, test_file_name, embedding_file_name, max_train,
image_encoding_algo, use_keyword=False, build_vocab_dev=True):
bucket_name = 'vqg-data'
self.image_encoding_algo = image_encoding_algo
self.build_vocab_dev = build_vocab_dev
if not os.path.exists(train_file_name):
logger.info("Download training data to %s" % train_file_name)
exit(0)
if not os.path.exists(validation_file_name):
logger.info("Download validation data to %s" % validation_file_name)
exit(0)
if not os.path.exists(test_file_name):
logger.info("Download testing data to %s" % test_file_name)
exit(0)
if 'glove' in embedding_file_name and not os.path.exists(embedding_file_name):
logger.info("Download embedding file to %s" % embedding_file_name)
exit(0)
self.train_file = train_file_name
self.validation_file = validation_file_name
self.test_file = test_file_name
self.embedding_file = embedding_file_name
self.max_samples = max_train
if self.image_encoding_algo == 'VGG19':
self.image_encoding_model = VGG19(weights='imagenet', include_top=False)
elif self.image_encoding_algo == 'MobileNet':
self.image_encoding_model = MobileNetV2(weights='imagenet', include_top=False)
elif image_encoding_algo == 'ResNet':
self.image_encoding_model = ResNet50(weights='imagenet', include_top=False)
# elif self.image_encoding_algo=='Inception':
# self.image_encoding_model = InceptionV3(weights='imagenet', include_top=False)
elif self.image_encoding_algo == 'DenseNet':
self.image_encoding_model = DenseNet201(weights='imagenet', include_top=False)
logger.info('%s model loaded' % image_encoding_algo)
self.vocabulary = dict()
self.idx_to_word = dict()
self.word_to_idx = dict()
self.train_image_id_questions_dict = dict()
self.train_image_id_imagefeat_dict = dict()
self.train_image_id_keyword_dict = dict()
self.dev_image_id_questions_dict = dict()
self.dev_image_id_imagefeat_dict = dict()
self.dev_image_id_keyword_dict = dict()
self.test_image_id_questions_dict = dict()
self.test_image_id_imagefeat_dict = dict()
self.test_image_id_url_dict = dict()
self.test_image_id_keyword_dict = dict()
self.unique_train_questions = set()
self.unique_generated_questions = set()
self.generated_questions = []
self.max_question_len = -1
self.max_keyword_len = 10
self.no_of_samples = 0
if use_keyword == 'YES':
self.use_keyword = True
else:
self.use_keyword = False
# self.load_data(self.train_file)
self.build_vocabulary()