in datasets.py [0:0]
def get_processed_fields(self, line, file_name, build_image_feature = False):
"""
This function splits a record into id and questions. It also extracts image features from a given url
:param line: String containing image id, image url and questions
:param file_name: File name containing dataset name
:param:build_image_feature: Flag if image features need to be built
:return:
"""
line = line.strip()
records = line.split(",")
image_id = records[0]
image_url = records[1]
if 'bing' in file_name:
image_questions = records[3].split("---")
else:
image_questions = records[2].split("---")
if self.use_keyword:
keyword = records[3]
else:
keyword = None
cleaned_image_questions = list()
for question in image_questions:
question = self.preprocess_text(question)
if question == 'none':
continue
cleaned_image_questions.append(question)
image_feature = None
if build_image_feature:
# If image feature dict exists ignore
if 'train' in file_name and image_id in self.train_image_id_imagefeat_dict:
image_feature = self.train_image_id_imagefeat_dict[image_id]
elif 'test' in file_name and image_id in self.test_image_id_imagefeat_dict:
image_feature = self.test_image_id_imagefeat_dict[image_id]
elif 'dev' in file_name and image_id in self.dev_image_id_imagefeat_dict:
image_feature = self.train_image_id_imagefeat_dict[image_id]
else:
# Image feature doesnt exist in dictionary so build it
try:
# TODO: Download image features given flickr id
logger.debug('Building feature for image: %s' % image_url)
image_feature = self.get_processed_image_features(image_url)
except:
logger.error('Image url has error %s' % image_url)
return image_id, image_feature, cleaned_image_questions, image_url, keyword