def execute_job()

in microservices/classification_service/src/utils/classification/split_and_classify.py [0:0]


  def execute_job(self, page_num=0):
    """
    Run splitting and classification job

    Args:
    page_num (int): Page to extract from pdf. Defaults to 0.

    Returns:
        JSON: json object
    """

    # contains output image path
    img_path = self.splitter.split_save2img(page_num=page_num)

    print(f"split_save2img: {img_path}")

    prediction_result = self.classifier.get_classification_predications(
        endpoint_id=self.endpoint_id, filename=img_path)

    # Sample raw prediction_result
    # {'scores': [0.0136728594, 0.0222843271, 0.908525527, 0.0222843271, 0.0332329459], 'labels': ['PayStub', 'Utility', 'UE', 'Claim', 'DL'], 'key': '/opt/routes/temp_files/06_09_2022_01_59_10_temp_files\\7f2ec4ee-2d87-11ed-a71c-c2c2b7b788a8_7FvQ5G3dddti02sHbBhK_arizona-application-form_0.png'}

    print("prediction_result:")
    print(prediction_result)

    predicted_score = -1.0
    predicted_class = None
    for index, label in enumerate(prediction_result["labels"]):
      if prediction_result["scores"][index] > predicted_score:
        predicted_score = prediction_result["scores"][index]
        predicted_class = label

    print(f"predicted_class: {predicted_class}")
    print(f"predicted_score: {predicted_score}")

    # If confidence is greater than the threshold then its a valid doc
    if predicted_score < CLASSIFICATION_CONFIDENCE_THRESHOLD:
      predicted_class = CLASSIFICATION_UNDETECTABLE

    output = {
        'case_id': self.case_id,
        'u_id': self.uid,
        'predicted_class': predicted_class,
        'model_conf': predicted_score,
    }

    # remove the image from local after prediction as it is of no use further
    os.remove(img_path)
    os.remove(self.pdf_path)
    return json.dumps(output)