def processRequest()

in code/textract_async/textract_processor.py [0:0]


def processRequest(request):

    output = ""
    status = request['jobStatus']
    jobId = request['jobId']
    jobTag = request['jobTag']
    jobAPI = request['jobAPI']
    bucketName = request['bucketName']
    objectName = request['objectName']
    
    pipeline_client.body = {
        "documentId": jobTag,
        "bucketName": bucketName,
        "objectName": objectName,
        "stage":      PIPELINE_STAGE
    }
    if status == 'FAILED':
        pipeline_client.stageFailed("Textract job for document ID {}; bucketName {} fileName {}; failed during Textract analysis. Please double check the document quality".format(jobTag, bucketName, objectName))
        raise Exception("Textract Analysis didn't complete successfully")
    
    pipeline_client.stageInProgress()
    try:
       resultJSON = getJobResults(jobAPI, jobId, objectName)
    except Exception as e:
        pipeline_client.stageFailed("Textract job for document ID {}; bucketName {} filename {} failed during Textract processing. Could not read Textract output files under job Name {}".format(jobTag, bucketName, objectName, jobId))
        raise Exception("Textract Analysis didn't complete successfully")
        
    print("Result Textract result objects received: {}".format(len(resultJSON)))

    detectForms = False
    detectTables = False
    if(jobAPI == "StartDocumentAnalysis"):
        detectForms = True
        detectTables = True

    try:
        opg = OutputGenerator(
            documentId = jobTag,
            response   = resultJSON,
            bucketName = textractBucketName,
            objectName = objectName,
            forms      = detectForms,
            tables     = detectTables
        )
    except Exception as e:
        pipeline_client.stageFailed("Could not convert results from Textract into processable object. Try uploading again.")
        raise(e)
        
    tagging = "documentId={}".format(jobTag)
    opg.writeTextractOutputs(taggingStr=tagging)
    
    lineage_client.recordLineage({
        "documentId":       jobTag,
        "callerId":         request["callerId"],
        "sourceBucketName": bucketName,
        "targetBucketName": textractBucketName,
        "sourceFileName":   objectName,
        "targetFileName":   objectName
    })
    
    output = "Processed -> Document: {}, Object: {}/{} processed.".format(jobTag, bucketName, objectName)
    pipeline_client.stageSucceeded()
    print(output)
    return {
        'statusCode': 200,
        'body': output
    }