def get_processed_fields()

in datasets.py [0:0]


    def get_processed_fields(self, line, file_name, build_image_feature = False):
        """
        This function splits a record into id and questions. It also extracts image features from a given url
        :param line: String containing image id, image url and questions
        :param file_name: File name containing dataset name
        :param:build_image_feature: Flag if image features need to be built
        :return:
        """
        line = line.strip()
        records = line.split(",")
        image_id = records[0]
        image_url = records[1]
        if 'bing' in file_name:
            image_questions = records[3].split("---")
        else:
            image_questions = records[2].split("---")

        if self.use_keyword:
            keyword = records[3]

        else:
            keyword = None

        cleaned_image_questions = list()
        for question in image_questions:
            question = self.preprocess_text(question)
            if question == 'none':
                continue
            cleaned_image_questions.append(question)

        image_feature = None
        if build_image_feature:
            # If image feature dict exists ignore
            if 'train' in file_name and image_id in self.train_image_id_imagefeat_dict:
                image_feature = self.train_image_id_imagefeat_dict[image_id]
            elif 'test' in file_name and image_id in self.test_image_id_imagefeat_dict:
                image_feature = self.test_image_id_imagefeat_dict[image_id]
            elif 'dev' in file_name and image_id in self.dev_image_id_imagefeat_dict:
                image_feature = self.train_image_id_imagefeat_dict[image_id]
            else:
                # Image feature doesnt exist in dictionary so build it
                try:
                    # TODO: Download image features given flickr id
                    logger.debug('Building feature for image: %s' % image_url)
                    image_feature = self.get_processed_image_features(image_url)
                except:
                    logger.error('Image url has error %s' % image_url)

        return image_id, image_feature, cleaned_image_questions, image_url, keyword