def getJobResults()

in code/textract_async/textract_processor.py [0:0]


def getJobResults(api, jobId, objectName):

    resultJSON = []

    s3_helper = S3Helper()
    textractRawResultsFiles = s3_helper.listObjectsInS3(
        bucketName   = textractBucketName,
        bucketPrefix = objectName + "/textract-output/" + jobId
    )
    # skip the s3 access file, which will always appear first
    for textractResultFile in textractRawResultsFiles[1:]:
        resultJSON.append(json.loads(s3_helper.readFromS3(textractBucketName, textractResultFile)))
    
    # time.sleep(5)

    # client = AwsHelper().getClient('textract')
    # if(api == "StartDocumentTextDetection"):
    #     response = client.get_document_text_detection(JobId=jobId)
    # else:
    #     response = client.get_document_analysis(JobId=jobId)
    # pages.append(response)
    # print("Resultset page received: {}".format(len(pages)))
    # nextToken = None
    # if('NextToken' in response):
    #     nextToken = response['NextToken']
    #     print("Next token: {}".format(nextToken))

    # while(nextToken):
    #     time.sleep(5)

    #     if(api == "StartDocumentTextDetection"):
    #         response = client.get_document_text_detection(JobId=jobId, NextToken=nextToken)
    #     else:
    #         response = client.get_document_analysis(JobId=jobId, NextToken=nextToken)

    #     pages.append(response)
    #     print("Resultset page received: {}".format(len(pages)))
    #     nextToken = None
    #     if('NextToken' in response):
    #         nextToken = response['NextToken']
    #         print("Next token: {}".format(nextToken))

    return resultJSON