in src_batch_job/source/image_processor.py [0:0]
def process(self, s3_source_file_path, early_stop=False):
'''
Read a s3 txt file with paths to images in the s3 bucket,
process images and store the results to a Dynamo DB table.
Method returns tuple consisting of: number of succefully
processed images, and total number of images to be processed
'''
image_paths = self._read_paths_from_source(s3_source_file_path)
image_keys = map(lambda path: Path(path).stem, image_paths)
images = map(self._s3_read_image_file, image_paths)
vectors = map(self._vectorize, images)
results = zip(image_keys, vectors)
dynamodb_items = map(
lambda pair: self._dynamodb_itemize(pair[0], pair[1]), results)
responses = map(self._dynamodb_put, dynamodb_items)
status_codes = map(
lambda response: response['ResponseMetadata']['HTTPStatusCode'], responses)
for count, status_code in enumerate(status_codes):
if status_code is not 200 and early_stop is True:
return count, len(image_paths)
return len(image_paths), len(image_paths)