def __init__()

in datasets.py [0:0]


    def __init__(self, train_file_name, validation_file_name, test_file_name, embedding_file_name, max_train,
                 image_encoding_algo, use_keyword=False, build_vocab_dev=True):
        
        bucket_name = 'vqg-data'
        self.image_encoding_algo = image_encoding_algo
        self.build_vocab_dev = build_vocab_dev

        if not os.path.exists(train_file_name):
            logger.info("Download training data to %s" % train_file_name)
            exit(0)

        if not os.path.exists(validation_file_name):
            logger.info("Download validation data to %s" % validation_file_name)
            exit(0)

        if not os.path.exists(test_file_name):
            logger.info("Download testing data to %s" % test_file_name)
            exit(0)

        if 'glove' in embedding_file_name and not os.path.exists(embedding_file_name):
            logger.info("Download embedding file to %s" % embedding_file_name)
            exit(0)

        self.train_file = train_file_name
        self.validation_file = validation_file_name
        self.test_file = test_file_name
        self.embedding_file = embedding_file_name

        self.max_samples = max_train
        if self.image_encoding_algo == 'VGG19':
            self.image_encoding_model = VGG19(weights='imagenet', include_top=False)
        elif self.image_encoding_algo == 'MobileNet':
            self.image_encoding_model = MobileNetV2(weights='imagenet', include_top=False)
        elif image_encoding_algo == 'ResNet':
            self.image_encoding_model = ResNet50(weights='imagenet', include_top=False)
        # elif self.image_encoding_algo=='Inception':
        #     self.image_encoding_model = InceptionV3(weights='imagenet', include_top=False)
        elif self.image_encoding_algo == 'DenseNet':
            self.image_encoding_model = DenseNet201(weights='imagenet', include_top=False)

        logger.info('%s model loaded' % image_encoding_algo)

        self.vocabulary = dict()
        self.idx_to_word = dict()
        self.word_to_idx = dict()
        self.train_image_id_questions_dict = dict()
        self.train_image_id_imagefeat_dict = dict()
        self.train_image_id_keyword_dict = dict()

        self.dev_image_id_questions_dict = dict()
        self.dev_image_id_imagefeat_dict = dict()
        self.dev_image_id_keyword_dict = dict()

        self.test_image_id_questions_dict = dict()
        self.test_image_id_imagefeat_dict = dict()
        self.test_image_id_url_dict = dict()
        self.test_image_id_keyword_dict = dict()

        self.unique_train_questions = set()
        self.unique_generated_questions = set()
        self.generated_questions = []

        self.max_question_len = -1
        self.max_keyword_len = 10
        self.no_of_samples = 0
        if use_keyword == 'YES':
            self.use_keyword = True
        else:
            self.use_keyword = False

        # self.load_data(self.train_file)

        self.build_vocabulary()