in microservices/classification_service/src/utils/classification/split_and_classify.py [0:0]
def execute_job(self, page_num=0):
"""
Run splitting and classification job
Args:
page_num (int): Page to extract from pdf. Defaults to 0.
Returns:
JSON: json object
"""
# contains output image path
img_path = self.splitter.split_save2img(page_num=page_num)
print(f"split_save2img: {img_path}")
prediction_result = self.classifier.get_classification_predications(
endpoint_id=self.endpoint_id, filename=img_path)
# Sample raw prediction_result
# {'scores': [0.0136728594, 0.0222843271, 0.908525527, 0.0222843271, 0.0332329459], 'labels': ['PayStub', 'Utility', 'UE', 'Claim', 'DL'], 'key': '/opt/routes/temp_files/06_09_2022_01_59_10_temp_files\\7f2ec4ee-2d87-11ed-a71c-c2c2b7b788a8_7FvQ5G3dddti02sHbBhK_arizona-application-form_0.png'}
print("prediction_result:")
print(prediction_result)
predicted_score = -1.0
predicted_class = None
for index, label in enumerate(prediction_result["labels"]):
if prediction_result["scores"][index] > predicted_score:
predicted_score = prediction_result["scores"][index]
predicted_class = label
print(f"predicted_class: {predicted_class}")
print(f"predicted_score: {predicted_score}")
# If confidence is greater than the threshold then its a valid doc
if predicted_score < CLASSIFICATION_CONFIDENCE_THRESHOLD:
predicted_class = CLASSIFICATION_UNDETECTABLE
output = {
'case_id': self.case_id,
'u_id': self.uid,
'predicted_class': predicted_class,
'model_conf': predicted_score,
}
# remove the image from local after prediction as it is of no use further
os.remove(img_path)
os.remove(self.pdf_path)
return json.dumps(output)